February 28, 2013 Re: Proposal for SentinelInputRange | ||||
---|---|---|---|---|
| ||||
Posted in reply to Walter Bright | On Thursday, 28 February 2013 at 17:00:51 UTC, Walter Bright wrote:
> On 2/28/2013 6:31 AM, Steven Schveighoffer wrote:
>> If this doesn't translate to the same code, I don't know why not.
>
> Try it and see with your favorite C compiler.
>
> Then try the lookahead cases I also posted.
You are being stubborn here.
You claim that this is significantly faster, but give no numbers.
You claim that the compiler can't do some optimization, when LLVM does it already.
See sample code bellow, which is a typical structure find in lexer.c :
int bar(int n) {
switch(n) {
case 0:
return 0; // Bad case !
case 25:
return 75;
case 42:
return 69;
case 666:
return 999;
default:
return -1;
}
}
int foo(int n) {
switch(n) {
case 1:
return 23;
case 2:
case 3:
return n;
default:
return bar(n);
}
}
As we see, foo don't check for 0, and bar does, but only when necessary, as bar isn't called. Or this is what you think. Let's see how SDC compile this :
define i32 @_D3barFiZi(i32 %arg.n) {
%n = alloca i32
store i32 %arg.n, i32* %n
br label %body
body: ; preds = %0
%1 = load i32* %n
switch i32 %1, label %default [
i32 0, label %.case
i32 25, label %.case1
i32 42, label %.case2
i32 666, label %.case3
]
switchstart: ; No predecessors!
br label %.case
.case: ; preds = %body, %switchstart
ret i32 0
.case1: ; preds = %body
ret i32 75
.case2: ; preds = %body
ret i32 69
.case3: ; preds = %body
ret i32 999
default: ; preds = %body
ret i32 -1
switchend: ; No predecessors!
unreachable
}
define i32 @_D3fooFiZi(i32 %arg.n) {
%n = alloca i32
store i32 %arg.n, i32* %n
br label %body
body: ; preds = %0
%1 = load i32* %n
switch i32 %1, label %default [
i32 1, label %.case
i32 2, label %.case1
i32 3, label %.case2
]
switchstart: ; No predecessors!
br label %.case
.case: ; preds = %body, %switchstart
ret i32 23
.case1: ; preds = %body
br label %.case2
.case2: ; preds = %body, %.case1
%2 = load i32* %n
ret i32 %2
default: ; preds = %body
%3 = load i32* %n
%4 = call i32 @_D3barFiZi(i32 %3)
ret i32 %4
switchend: ; No predecessors!
unreachable
}
Here is the raw result of the frontend in LLVM IR. Now here is what the optimizer give us :
define i32 @_D3barFiZi(i32 %arg.n) nounwind readnone {
body:
switch i32 %arg.n, label %default [
i32 0, label %.case
i32 25, label %.case1
i32 42, label %.case2
i32 666, label %.case3
]
.case: ; preds = %default, %.case3, %.case2, %.case1, %body
%merge = phi i32 [ 0, %body ], [ 75, %.case1 ], [ 69, %.case2 ], [ 999, %.case3 ], [ -1, %default ]
ret i32 %merge
.case1: ; preds = %body
br label %.case
.case2: ; preds = %body
br label %.case
.case3: ; preds = %body
br label %.case
default: ; preds = %body
br label %.case
}
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
switch i32 %arg.n, label %default.i [
i32 1, label %.case
i32 2, label %.case2
i32 3, label %.case2
i32 0, label %_D3barFiZi.exit
i32 25, label %.case1.i
i32 42, label %.case2.i
i32 666, label %.case3.i
]
.case: ; preds = %body, %_D3barFiZi.exit, %.case2
%merge = phi i32 [ 23, %body ], [ %arg.n, %.case2 ], [ %merge.i, %_D3barFiZi.exit ]
ret i32 %merge
.case2: ; preds = %body, %body
br label %.case
.case1.i: ; preds = %body
br label %_D3barFiZi.exit
.case2.i: ; preds = %body
br label %_D3barFiZi.exit
.case3.i: ; preds = %body
br label %_D3barFiZi.exit
default.i: ; preds = %body
br label %_D3barFiZi.exit
_D3barFiZi.exit: ; preds = %body, %.case1.i, %.case2.i, %.case3.i, %default.i
%merge.i = phi i32 [ 75, %.case1.i ], [ 69, %.case2.i ], [ 999, %.case3.i ], [ -1, %default.i ], [ 0, %body ]
br label %.case
}
See, foo don't even call bar anymore. No let's include the null check, as it would have been done if checking for empty on a regular range (that happen to internally use a sentinel) :
int foo(int n) {
if(n != 0) {
switch(n) {
case 1:
return 23;
case 2:
case 3:
return n;
default:
return bar(n);
}
}
return bar(n);
}
I didn't repeated bar as it doesn't change.
Which is optimized as :
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
switch i32 %arg.n, label %default.i [
i32 0, label %_D3barFiZi.exit8
i32 1, label %.case
i32 2, label %.case2
i32 3, label %.case2
i32 666, label %.case3.i
i32 25, label %_D3barFiZi.exit
i32 42, label %.case2.i
]
.case: ; preds = %body, %_D3barFiZi.exit8, %_D3barFiZi.exit, %.case2
%merge = phi i32 [ %arg.n, %.case2 ], [ %merge.i, %_D3barFiZi.exit ], [ 0, %_D3barFiZi.exit8 ], [ 23, %body ]
ret i32 %merge
.case2: ; preds = %body, %body
br label %.case
.case2.i: ; preds = %body
br label %_D3barFiZi.exit
.case3.i: ; preds = %body
br label %_D3barFiZi.exit
default.i: ; preds = %body
br label %_D3barFiZi.exit
_D3barFiZi.exit: ; preds = %body, %.case2.i, %.case3.i, %default.i
%merge.i = phi i32 [ 69, %.case2.i ], [ 999, %.case3.i ], [ -1, %default.i ], [ 75, %body ]
br label %.case
_D3barFiZi.exit8: ; preds = %body
br label %.case
}
The exact damn same thing ! Let's try with a different schemes, we check first an early return :
int foo(int n) {
if(n == 0) {
return bar(n);
}
switch(n) {
case 1:
return 23;
case 2:
case 3:
return n;
default:
return bar(n);
}
}
Guess what ? It is optimized as :
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
switch i32 %arg.n, label %default.i7 [
i32 0, label %_D3barFiZi.exit
i32 1, label %.case
i32 2, label %.case2
i32 3, label %.case2
i32 666, label %.case3.i6
i32 25, label %_D3barFiZi.exit8
i32 42, label %.case2.i5
]
_D3barFiZi.exit: ; preds = %body, %_D3barFiZi.exit8, %.case2, %.case
%merge9 = phi i32 [ 0, %body ], [ 23, %.case ], [ %arg.n, %.case2 ], [ %merge.i3, %_D3barFiZi.exit8 ]
ret i32 %merge9
.case: ; preds = %body
br label %_D3barFiZi.exit
.case2: ; preds = %body, %body
br label %_D3barFiZi.exit
.case2.i5: ; preds = %body
br label %_D3barFiZi.exit8
.case3.i6: ; preds = %body
br label %_D3barFiZi.exit8
default.i7: ; preds = %body
br label %_D3barFiZi.exit8
_D3barFiZi.exit8: ; preds = %body, %.case2.i5, %.case3.i6, %default.i7
%merge.i3 = phi i32 [ 69, %.case2.i5 ], [ 999, %.case3.i6 ], [ -1, %default.i7 ], [ 75, %body ]
br label %_D3barFiZi.exit
}
Yet again the same thing.
A range using a sentinel internally is clearly beneficial for a lexer (like C strings). Defining explicitly a range that does so is plain useless.
|
February 28, 2013 Re: Proposal for SentinelInputRange | ||||
---|---|---|---|---|
| ||||
Posted in reply to Dmitry Olshansky | On 2/28/13 12:20 PM, Dmitry Olshansky wrote: > 28-Feb-2013 19:31, Andrei Alexandrescu пишет: >> On 2/28/13 5:54 AM, Walter Bright wrote: >>> On 2/27/2013 11:55 PM, Jonathan M Davis wrote: >>>>> Again, please see how lexer.c works. I assure you, there is no double >>>>> copying going on, nor is there a double test for the terminating 0. >>>> >>>> I know what the lexer does, and remember that it _doesn't_ operate on >>>> ranges, >>>> and there are subtle differences between being able to just use char* >>>> and >>>> trying to handle generic ranges. >>> >>> Hence the need to invent SentinelInputRange. >> >> I don't think the sentinel input range is a blocker for redoing the >> parser (with ranges) in D. This discussion has probably run its course. > > >> The right thing to do at this point is port the lexer and figure what >> primitives are necessary from its input. >> > > No need to port - look at std.d.lexer again. It was revamped and is > ready for the new round of review I should say. Let's not use the old > source for the new module and go the long path of : > split off --> port --> patch up --> D-ify & re-write to ranges > > Instead we can just tweak the current std.d.lexer a little bit more and > we have good clean-room lexer written in the idiomatic D. Well, it's > getting there w.r.t. idiomaticness but it supports ranges including both > random-access and forward ones (by transparently specializing for each > one). I think that's a good idea but I took a look at https://github.com/bhelyer/std.d.lexer/blob/master/std/d/lexer.d and I will destroy it. In a good sense :o). Andrei |
February 28, 2013 Re: Proposal for SentinelInputRange | ||||
---|---|---|---|---|
| ||||
Posted in reply to Timon Gehr | 28-Feb-2013 22:08, Timon Gehr пишет: > On 02/28/2013 05:48 PM, Dmitry Olshansky wrote: >> ... >> >> line 300: >> case 0x1A: // ^Z means end of file >> case 0: >> break; >> >> On the lines you noted it claimed that that 0x1a is outdate. Along with >> the fact that you allocate filesize+2 and fill the last 2 bytes with >> zeros. >> >> In any case I see 0 and 0x1a as 2 values that act like sentinels i.e. a >> tuple. And this is what spec says - any one of them is a sentinel. >> Correct me if I'm wrong. >> > > A sentinel is some data the original data is augmented with in order to > simplify its processing. I thought 0 was proposed as such. Might have misunderstood the proposition. > The lexer acts the same on 0x1A and 0, but only the additional 0 at the > end which does not occur in the input is the sentinel. That would mean that when you see 0 or 0x1A you do a check to see if that's the end of input then decide it's really the end of input. If that's the intended behavior I fail to decipher it from here: http://dlang.org/lex.html#EndOfFile > The lexer may > even encounter a 0 that is not a sentinel. -- Dmitry Olshansky |
February 28, 2013 Re: Proposal for SentinelInputRange | ||||
---|---|---|---|---|
| ||||
Posted in reply to Andrei Alexandrescu | 28-Feb-2013 22:38, Andrei Alexandrescu пишет: > On 2/28/13 12:20 PM, Dmitry Olshansky wrote: >> 28-Feb-2013 19:31, Andrei Alexandrescu пишет: >>> On 2/28/13 5:54 AM, Walter Bright wrote: >>>> On 2/27/2013 11:55 PM, Jonathan M Davis wrote: >>>>>> Again, please see how lexer.c works. I assure you, there is no double >>>>>> copying going on, nor is there a double test for the terminating 0. >>>>> >>>>> I know what the lexer does, and remember that it _doesn't_ operate on >>>>> ranges, >>>>> and there are subtle differences between being able to just use char* >>>>> and >>>>> trying to handle generic ranges. >>>> >>>> Hence the need to invent SentinelInputRange. >>> >>> I don't think the sentinel input range is a blocker for redoing the >>> parser (with ranges) in D. This discussion has probably run its course. >> >> >>> The right thing to do at this point is port the lexer and figure what >>> primitives are necessary from its input. >>> >> >> No need to port - look at std.d.lexer again. It was revamped and is >> ready for the new round of review I should say. Let's not use the old >> source for the new module and go the long path of : >> split off --> port --> patch up --> D-ify & re-write to ranges >> >> Instead we can just tweak the current std.d.lexer a little bit more and >> we have good clean-room lexer written in the idiomatic D. Well, it's >> getting there w.r.t. idiomaticness but it supports ranges including both >> random-access and forward ones (by transparently specializing for each >> one). > > I think that's a good idea but I took a look at > https://github.com/bhelyer/std.d.lexer/blob/master/std/d/lexer.d and I > will destroy it. In a good sense :o). That's the wrong one. This is the one: https://github.com/Hackerpilot/Dscanner/tree/range-based-lexer Though feel free to destroy the other one too ;) But I need your full powers with Dscanner first :o) > Andrei -- Dmitry Olshansky |
February 28, 2013 Re: Proposal for SentinelInputRange | ||||
---|---|---|---|---|
| ||||
Posted in reply to Andrei Alexandrescu | On Thursday, 28 February 2013 at 18:38:59 UTC, Andrei Alexandrescu wrote: > I think that's a good idea but I took a look at https://github.com/bhelyer/std.d.lexer/blob/master/std/d/lexer.d and I will destroy it. In a good sense :o). > > Andrei https://github.com/Hackerpilot/Dscanner/blob/range-based-lexer/std/d/lexer.d He's talking about this one. |
February 28, 2013 Re: Proposal for SentinelInputRange | ||||
---|---|---|---|---|
| ||||
Posted in reply to Walter Bright | On Thursday, February 28, 2013 08:52:45 Walter Bright wrote:
> I've given you two examples (lexer and regexp) where you are certainly not stuck with that, and those two cases matter.
>
> > Pure input ranges fail utterly as you can't save them, so you get _zero_ lookahead [...]
My point is that in almost all cases, what'll end up happening with a sentinel range which is something like this:
struct SRange(R)
if(is(Unqual!(ElementType!R) == char))
{
enum char sentinel = 0;
@property char front() { return _front; }
@property bool empty() { return _front == sentinel; }
void popFront()
{
_range.popFront();
if(_range.empty)
_front = sentinel;
}
this(R range)
{
if(_range.empty)
_front = sentinel;
else
_front = _rang.front;
}
private:
char _front;
R _range;
}
Notice that it has to check for empty every time that the front is popped, and it can't avoid that, because it's wrapping another range which is not a sentinel range. The only time that it can avoid that is if it's managing its own memory internally via an array or pointer or whatnot. And if it's just going to be a string or an array, I'd argue for simply special casing strings or arrays to skip unnecessary empty checks.
Also, with sentinel ranges, you'll be forced to wrap strings because of that sentinel marker that's required for isSentinelRange, meaning that you'll get something like
struct SRange
{
enum char sentinel = 0;
@property char front() { return _front; }
@property bool empty() { return _front == sentinel; }
void popFront() { _str = _str[1 .. $]; }
this(string str)
{
_str = str;
if(_str.empty)
_str = [sentinel];
else if(_str[$ - 1] != sentinel)
_str ~= sentinel;
}
private:
string _str;
}
And while the compiler will hopefully optimize away all of the extra overhead of the wrapper type, any functions which would be able to optimize what they're doing for a string or array will not special case for this wrapper type, because they probably won't even know about it. A prime case where that might be an issue would be with std.utf.decode or std.utf.decodeFront which do some special casing for strings and which any unicode-aware lexer would have to use at least some of the time (even if the vast majority of the time, it can just check ASCII stuff).
So, for the most part, the only time that you'll get any performance boost out of this is with strings or arrays, and you'll take a performance hit if you're using functions which special case strings or arrays but not the wrapper sentinel type. And yet special-casing strings and arrays will give you exactly the same performance boost without this sentinel range concept.
- Jonathan M Davis
|
February 28, 2013 Re: Proposal for SentinelInputRange | ||||
---|---|---|---|---|
| ||||
Posted in reply to Dmitry Olshansky | On 2/28/13 1:42 PM, Dmitry Olshansky wrote:
> That's the wrong one. This is the one:
> https://github.com/Hackerpilot/Dscanner/tree/range-based-lexer
>
> Though feel free to destroy the other one too ;)
> But I need your full powers with Dscanner first :o)
Whoa, that looks really good!
Andrei
|
February 28, 2013 Re: Proposal for SentinelInputRange | ||||
---|---|---|---|---|
| ||||
Posted in reply to Walter Bright | On 2013-02-28 17:43, Walter Bright wrote:
> Consider the following code from lexer.c:
>
> p++;
> switch (*p)
>
> Written using an InputRange:
>
> popFront();
> switch (front)
>
> That code is INVALID. This is why a SentinelInputRange is necessary. You can't
> just use an InputRange in an invalid manner by convention.
>
I do not understand... Why make a special type of InputRange when you can achieve exactly that with a normal string with an added extra '\0' at the end and then use it without any calls to empty():
while(1) {
...
popFront();
switch(front) {
case '\0': return;
...
}
}
Cstrings are a very special case because we know the terminator beforehand and the check is trivial CPU-wise.
|
February 28, 2013 Re: Proposal for SentinelInputRange | ||||
---|---|---|---|---|
| ||||
Posted in reply to deadalnix | On 2/28/2013 9:05 AM, deadalnix wrote:
> I see nothing that can't be done by an optimizing compiler. Maybe something is,
> but I can't find it. And the example you pointed me aren't.
Tell me how front() will be optimized out to this.
case '>':
p++;
if (*p == '=')
{ p++;
t->value = TOKge; // >=
}
else if (*p == '>')
{ p++;
if (*p == '=')
{ p++;
t->value = TOKshrass; // >>=
}
else if (*p == '>')
{ p++;
if (*p == '=')
{ p++;
t->value = TOKushrass; // >>>=
}
else
t->value = TOKushr; // >>>
}
else
t->value = TOKshr; // >>
}
else
t->value = TOKgt; // >
return;
|
February 28, 2013 Re: Proposal for SentinelInputRange | ||||
---|---|---|---|---|
| ||||
Posted in reply to Jonathan M Davis | On 2/28/2013 10:52 AM, Jonathan M Davis wrote:
> Notice that it has to check for empty every time that the front is popped, and
> it can't avoid that,
Yes it can avoid it - that is the whole point. Notice there are no checks for the sentinel here - but the code is correct:
case '>':
p++;
if (*p == '=')
{ p++;
t->value = TOKge; // >=
}
else if (*p == '>')
{ p++;
if (*p == '=')
{ p++;
t->value = TOKshrass; // >>=
}
else if (*p == '>')
{ p++;
if (*p == '=')
{ p++;
t->value = TOKushrass; // >>>=
}
else
t->value = TOKushr; // >>>
}
else
t->value = TOKshr; // >>
}
else
t->value = TOKgt; // >
return;
|
Copyright © 1999-2021 by the D Language Foundation