February 28, 2013
On Thursday, 28 February 2013 at 17:00:51 UTC, Walter Bright wrote:
> On 2/28/2013 6:31 AM, Steven Schveighoffer wrote:
>> If this doesn't translate to the same code, I don't know why not.
>
> Try it and see with your favorite C compiler.
>
> Then try the lookahead cases I also posted.

You are being stubborn here.

You claim that this is significantly faster, but give no numbers.
You claim that the compiler can't do some optimization, when LLVM does it already.

See sample code bellow, which is a typical structure find in lexer.c :

int bar(int n) {
	switch(n) {
		case 0:
			return 0; // Bad case !
		
		case 25:
			return 75;
		
		case 42:
			return 69;
		
		case 666:
			return 999;
		
		default:
			return -1;
	}
}

int foo(int n) {
	switch(n) {
		case 1:
			return 23;
		
		case 2:
		case 3:
			return n;
			
		default:
			return bar(n);
	}
}

As we see, foo don't check for 0, and bar does, but only when necessary, as bar isn't called. Or this is what you think. Let's see how SDC compile this :

define i32 @_D3barFiZi(i32 %arg.n) {
  %n = alloca i32
  store i32 %arg.n, i32* %n
  br label %body

body:                                             ; preds = %0
  %1 = load i32* %n
  switch i32 %1, label %default [
    i32 0, label %.case
    i32 25, label %.case1
    i32 42, label %.case2
    i32 666, label %.case3
  ]

switchstart:                                      ; No predecessors!
  br label %.case

.case:                                            ; preds = %body, %switchstart
  ret i32 0

.case1:                                           ; preds = %body
  ret i32 75

.case2:                                           ; preds = %body
  ret i32 69

.case3:                                           ; preds = %body
  ret i32 999

default:                                          ; preds = %body
  ret i32 -1

switchend:                                        ; No predecessors!
  unreachable
}

define i32 @_D3fooFiZi(i32 %arg.n) {
  %n = alloca i32
  store i32 %arg.n, i32* %n
  br label %body

body:                                             ; preds = %0
  %1 = load i32* %n
  switch i32 %1, label %default [
    i32 1, label %.case
    i32 2, label %.case1
    i32 3, label %.case2
  ]

switchstart:                                      ; No predecessors!
  br label %.case

.case:                                            ; preds = %body, %switchstart
  ret i32 23

.case1:                                           ; preds = %body
  br label %.case2

.case2:                                           ; preds = %body, %.case1
  %2 = load i32* %n
  ret i32 %2

default:                                          ; preds = %body
  %3 = load i32* %n
  %4 = call i32 @_D3barFiZi(i32 %3)
  ret i32 %4

switchend:                                        ; No predecessors!
  unreachable
}

Here is the raw result of the frontend in LLVM IR. Now here is what the optimizer give us :

define i32 @_D3barFiZi(i32 %arg.n) nounwind readnone {
body:
  switch i32 %arg.n, label %default [
    i32 0, label %.case
    i32 25, label %.case1
    i32 42, label %.case2
    i32 666, label %.case3
  ]

.case:                                            ; preds = %default, %.case3, %.case2, %.case1, %body
  %merge = phi i32 [ 0, %body ], [ 75, %.case1 ], [ 69, %.case2 ], [ 999, %.case3 ], [ -1, %default ]
  ret i32 %merge

.case1:                                           ; preds = %body
  br label %.case

.case2:                                           ; preds = %body
  br label %.case

.case3:                                           ; preds = %body
  br label %.case

default:                                          ; preds = %body
  br label %.case
}

define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
  switch i32 %arg.n, label %default.i [
    i32 1, label %.case
    i32 2, label %.case2
    i32 3, label %.case2
    i32 0, label %_D3barFiZi.exit
    i32 25, label %.case1.i
    i32 42, label %.case2.i
    i32 666, label %.case3.i
  ]

.case:                                            ; preds = %body, %_D3barFiZi.exit, %.case2
  %merge = phi i32 [ 23, %body ], [ %arg.n, %.case2 ], [ %merge.i, %_D3barFiZi.exit ]
  ret i32 %merge

.case2:                                           ; preds = %body, %body
  br label %.case

.case1.i:                                         ; preds = %body
  br label %_D3barFiZi.exit

.case2.i:                                         ; preds = %body
  br label %_D3barFiZi.exit

.case3.i:                                         ; preds = %body
  br label %_D3barFiZi.exit

default.i:                                        ; preds = %body
  br label %_D3barFiZi.exit

_D3barFiZi.exit:                                  ; preds = %body, %.case1.i, %.case2.i, %.case3.i, %default.i
  %merge.i = phi i32 [ 75, %.case1.i ], [ 69, %.case2.i ], [ 999, %.case3.i ], [ -1, %default.i ], [ 0, %body ]
  br label %.case
}

See, foo don't even call bar anymore. No let's include the null check, as it would have been done if checking for empty on a regular range (that happen to internally use a sentinel) :

int foo(int n) {
	if(n != 0) {
		switch(n) {
			case 1:
				return 23;
		
			case 2:
			case 3:
				return n;
			
			default:
				return bar(n);
		}
	}
	
	return bar(n);
}

I didn't repeated bar as it doesn't change.

Which is optimized as :
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
  switch i32 %arg.n, label %default.i [
    i32 0, label %_D3barFiZi.exit8
    i32 1, label %.case
    i32 2, label %.case2
    i32 3, label %.case2
    i32 666, label %.case3.i
    i32 25, label %_D3barFiZi.exit
    i32 42, label %.case2.i
  ]

.case:                                            ; preds = %body, %_D3barFiZi.exit8, %_D3barFiZi.exit, %.case2
  %merge = phi i32 [ %arg.n, %.case2 ], [ %merge.i, %_D3barFiZi.exit ], [ 0, %_D3barFiZi.exit8 ], [ 23, %body ]
  ret i32 %merge

.case2:                                           ; preds = %body, %body
  br label %.case

.case2.i:                                         ; preds = %body
  br label %_D3barFiZi.exit

.case3.i:                                         ; preds = %body
  br label %_D3barFiZi.exit

default.i:                                        ; preds = %body
  br label %_D3barFiZi.exit

_D3barFiZi.exit:                                  ; preds = %body, %.case2.i, %.case3.i, %default.i
  %merge.i = phi i32 [ 69, %.case2.i ], [ 999, %.case3.i ], [ -1, %default.i ], [ 75, %body ]
  br label %.case

_D3barFiZi.exit8:                                 ; preds = %body
  br label %.case
}

The exact damn same thing ! Let's try with a different schemes, we check first an early return :

int foo(int n) {
	if(n == 0) {
		return bar(n);	
	}
	
	switch(n) {
		case 1:
			return 23;
	
		case 2:
		case 3:
			return n;
		
		default:
			return bar(n);
	}
}

Guess what ? It is optimized as :
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
  switch i32 %arg.n, label %default.i7 [
    i32 0, label %_D3barFiZi.exit
    i32 1, label %.case
    i32 2, label %.case2
    i32 3, label %.case2
    i32 666, label %.case3.i6
    i32 25, label %_D3barFiZi.exit8
    i32 42, label %.case2.i5
  ]

_D3barFiZi.exit:                                  ; preds = %body, %_D3barFiZi.exit8, %.case2, %.case
  %merge9 = phi i32 [ 0, %body ], [ 23, %.case ], [ %arg.n, %.case2 ], [ %merge.i3, %_D3barFiZi.exit8 ]
  ret i32 %merge9

.case:                                            ; preds = %body
  br label %_D3barFiZi.exit

.case2:                                           ; preds = %body, %body
  br label %_D3barFiZi.exit

.case2.i5:                                        ; preds = %body
  br label %_D3barFiZi.exit8

.case3.i6:                                        ; preds = %body
  br label %_D3barFiZi.exit8

default.i7:                                       ; preds = %body
  br label %_D3barFiZi.exit8

_D3barFiZi.exit8:                                 ; preds = %body, %.case2.i5, %.case3.i6, %default.i7
  %merge.i3 = phi i32 [ 69, %.case2.i5 ], [ 999, %.case3.i6 ], [ -1, %default.i7 ], [ 75, %body ]
  br label %_D3barFiZi.exit
}

Yet again the same thing.

A range using a sentinel internally is clearly beneficial for a lexer (like C strings). Defining explicitly a range that does so is plain useless.
February 28, 2013
On 2/28/13 12:20 PM, Dmitry Olshansky wrote:
> 28-Feb-2013 19:31, Andrei Alexandrescu пишет:
>> On 2/28/13 5:54 AM, Walter Bright wrote:
>>> On 2/27/2013 11:55 PM, Jonathan M Davis wrote:
>>>>> Again, please see how lexer.c works. I assure you, there is no double
>>>>> copying going on, nor is there a double test for the terminating 0.
>>>>
>>>> I know what the lexer does, and remember that it _doesn't_ operate on
>>>> ranges,
>>>> and there are subtle differences between being able to just use char*
>>>> and
>>>> trying to handle generic ranges.
>>>
>>> Hence the need to invent SentinelInputRange.
>>
>> I don't think the sentinel input range is a blocker for redoing the
>> parser (with ranges) in D. This discussion has probably run its course.
>
>
>> The right thing to do at this point is port the lexer and figure what
>> primitives are necessary from its input.
>>
>
> No need to port - look at std.d.lexer again. It was revamped and is
> ready for the new round of review I should say. Let's not use the old
> source for the new module and go the long path of :
> split off --> port --> patch up --> D-ify & re-write to ranges
>
> Instead we can just tweak the current std.d.lexer a little bit more and
> we have good clean-room lexer written in the idiomatic D. Well, it's
> getting there w.r.t. idiomaticness but it supports ranges including both
> random-access and forward ones (by transparently specializing for each
> one).

I think that's a good idea but I took a look at https://github.com/bhelyer/std.d.lexer/blob/master/std/d/lexer.d and I will destroy it. In a good sense :o).

Andrei
February 28, 2013
28-Feb-2013 22:08, Timon Gehr пишет:
> On 02/28/2013 05:48 PM, Dmitry Olshansky wrote:
>> ...
>>
>> line 300:
>>      case 0x1A:          // ^Z means end of file
>>      case 0:
>>                          break;
>>
>> On the lines you noted it claimed that that 0x1a is outdate. Along with
>> the fact that you allocate filesize+2 and fill the last 2 bytes with
>> zeros.
>>
>> In any case I see 0 and 0x1a as 2 values that act like sentinels i.e. a
>> tuple. And this is what spec says - any one of them is a sentinel.
>> Correct me if I'm wrong.
>>
>
> A sentinel is some data the original data is augmented with in order to
> simplify its processing.

I thought 0 was proposed as such. Might have misunderstood the proposition.

> The lexer acts the same on 0x1A and 0, but only the additional 0 at the
> end which does not occur in the input is the sentinel.


That would mean that when you see 0 or 0x1A you do a check to see if that's the end of input then decide it's really the end of input.

If that's the intended behavior I fail to decipher it from  here:
http://dlang.org/lex.html#EndOfFile

> The lexer may
> even encounter a 0 that is not a sentinel.


-- 
Dmitry Olshansky
February 28, 2013
28-Feb-2013 22:38, Andrei Alexandrescu пишет:
> On 2/28/13 12:20 PM, Dmitry Olshansky wrote:
>> 28-Feb-2013 19:31, Andrei Alexandrescu пишет:
>>> On 2/28/13 5:54 AM, Walter Bright wrote:
>>>> On 2/27/2013 11:55 PM, Jonathan M Davis wrote:
>>>>>> Again, please see how lexer.c works. I assure you, there is no double
>>>>>> copying going on, nor is there a double test for the terminating 0.
>>>>>
>>>>> I know what the lexer does, and remember that it _doesn't_ operate on
>>>>> ranges,
>>>>> and there are subtle differences between being able to just use char*
>>>>> and
>>>>> trying to handle generic ranges.
>>>>
>>>> Hence the need to invent SentinelInputRange.
>>>
>>> I don't think the sentinel input range is a blocker for redoing the
>>> parser (with ranges) in D. This discussion has probably run its course.
>>
>>
>>> The right thing to do at this point is port the lexer and figure what
>>> primitives are necessary from its input.
>>>
>>
>> No need to port - look at std.d.lexer again. It was revamped and is
>> ready for the new round of review I should say. Let's not use the old
>> source for the new module and go the long path of :
>> split off --> port --> patch up --> D-ify & re-write to ranges
>>
>> Instead we can just tweak the current std.d.lexer a little bit more and
>> we have good clean-room lexer written in the idiomatic D. Well, it's
>> getting there w.r.t. idiomaticness but it supports ranges including both
>> random-access and forward ones (by transparently specializing for each
>> one).
>
> I think that's a good idea but I took a look at
> https://github.com/bhelyer/std.d.lexer/blob/master/std/d/lexer.d and I
> will destroy it. In a good sense :o).

That's the wrong one. This is the one:
https://github.com/Hackerpilot/Dscanner/tree/range-based-lexer

Though feel free to destroy the other one too ;)
But I need your full powers with Dscanner first :o)

> Andrei


-- 
Dmitry Olshansky
February 28, 2013
On Thursday, 28 February 2013 at 18:38:59 UTC, Andrei Alexandrescu wrote:
> I think that's a good idea but I took a look at https://github.com/bhelyer/std.d.lexer/blob/master/std/d/lexer.d and I will destroy it. In a good sense :o).
>
> Andrei

https://github.com/Hackerpilot/Dscanner/blob/range-based-lexer/std/d/lexer.d
He's talking about this one.
February 28, 2013
On Thursday, February 28, 2013 08:52:45 Walter Bright wrote:
> I've given you two examples (lexer and regexp) where you are certainly not stuck with that, and those two cases matter.
> 
> > Pure input ranges fail utterly as you can't save them, so you get _zero_ lookahead [...]

My point is that in almost all cases, what'll end up happening with a sentinel range which is something like this:

struct SRange(R)
    if(is(Unqual!(ElementType!R) == char))
{
    enum char sentinel = 0;

    @property char front() { return _front; }
    @property bool empty() { return _front == sentinel; }
    void popFront()
    {
        _range.popFront();
        if(_range.empty)
            _front = sentinel;
    }

    this(R range)
    {
        if(_range.empty)
            _front = sentinel;
        else
            _front = _rang.front;
    }

private:
    char _front;
    R _range;
}

Notice that it has to check for empty every time that the front is popped, and it can't avoid that, because it's wrapping another range which is not a sentinel range. The only time that it can avoid that is if it's managing its own memory internally via an array or pointer or whatnot. And if it's just going to be a string or an array, I'd argue for simply special casing strings or arrays to skip unnecessary empty checks.

Also, with sentinel ranges, you'll be forced to wrap strings because of that sentinel marker that's required for isSentinelRange, meaning that you'll get something like

struct SRange
{
    enum char sentinel = 0;

    @property char front() { return _front; }
    @property bool empty() { return _front == sentinel; }
    void popFront() { _str = _str[1 .. $]; }

    this(string str)
    {
        _str = str;

        if(_str.empty)
            _str = [sentinel];
        else if(_str[$ - 1] != sentinel)
            _str ~= sentinel;
    }

private:
    string _str;
}

And while the compiler will hopefully optimize away all of the extra overhead of the wrapper type, any functions which would be able to optimize what they're doing for a string or array will not special case for this wrapper type, because they probably won't even know about it. A prime case where that might be an issue would be with std.utf.decode or std.utf.decodeFront which do some special casing for strings and which any unicode-aware lexer would have to use at least some of the time (even if the vast majority of the time, it can just check ASCII stuff).

So, for the most part, the only time that you'll get any performance boost out of this is with strings or arrays, and you'll take a performance hit if you're using functions which special case strings or arrays but not the wrapper sentinel type. And yet special-casing strings and arrays will give you exactly the same performance boost without this sentinel range concept.

- Jonathan M Davis
February 28, 2013
On 2/28/13 1:42 PM, Dmitry Olshansky wrote:
> That's the wrong one. This is the one:
> https://github.com/Hackerpilot/Dscanner/tree/range-based-lexer
>
> Though feel free to destroy the other one too ;)
> But I need your full powers with Dscanner first :o)

Whoa, that looks really good!

Andrei

February 28, 2013
On 2013-02-28 17:43, Walter Bright wrote:
> Consider the following code from lexer.c:
>
>     p++;
>     switch (*p)
>
> Written using an InputRange:
>
>     popFront();
>     switch (front)
>
> That code is INVALID. This is why a SentinelInputRange is necessary. You can't
> just use an InputRange in an invalid manner by convention.
>


I do not understand... Why make a special type of InputRange when you can achieve exactly that with a normal string with an added extra '\0' at the end and then use it without any calls to empty():

while(1) {
    ...
    popFront();
    switch(front) {
        case '\0': return;
        ...
    }
}

Cstrings are a very special case because we know the terminator beforehand and the check is trivial CPU-wise.
February 28, 2013
On 2/28/2013 9:05 AM, deadalnix wrote:
> I see nothing that can't be done by an optimizing compiler. Maybe something is,
> but I can't find it. And the example you pointed me aren't.

Tell me how front() will be optimized out to this.

            case '>':
                p++;
                if (*p == '=')
                {   p++;
                    t->value = TOKge;                   // >=
                }
                else if (*p == '>')
                {   p++;
                    if (*p == '=')
                    {   p++;
                        t->value = TOKshrass;           // >>=
                    }
                    else if (*p == '>')
                    {   p++;
                        if (*p == '=')
                        {   p++;
                            t->value = TOKushrass;      // >>>=
                        }
                        else
                            t->value = TOKushr;         // >>>
                    }
                    else
                        t->value = TOKshr;              // >>
                }
                else
                    t->value = TOKgt;                   // >
                return;
February 28, 2013
On 2/28/2013 10:52 AM, Jonathan M Davis wrote:
> Notice that it has to check for empty every time that the front is popped, and
> it can't avoid that,


Yes it can avoid it - that is the whole point. Notice there are no checks for the sentinel here - but the code is correct:

            case '>':
                p++;
                if (*p == '=')
                {   p++;
                    t->value = TOKge;                   // >=
                }
                else if (*p == '>')
                {   p++;
                    if (*p == '=')
                    {   p++;
                        t->value = TOKshrass;           // >>=
                    }
                    else if (*p == '>')
                    {   p++;
                        if (*p == '=')
                        {   p++;
                            t->value = TOKushrass;      // >>>=
                        }
                        else
                            t->value = TOKushr;         // >>>
                    }
                    else
                        t->value = TOKshr;              // >>
                }
                else
                    t->value = TOKgt;                   // >
                return;