July 02, 2010
Couldn't some of the overhead come from the overhead of calling back()?

Andrei

David Simcha wrote:
> Sorry, the list wouldn't post my accidental double paste anyhow because it was too big.  Here's the disassembly of partition, showing only back() is getting called and pred seems to be getting inlined, contrary to what I had believed before.  So changing enforce() in back() to assert() gets rid of about half the overhead, but I have no idea where the other half is coming from.
> 
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
> PROC NEAR
> ;  COMDEF
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
>         sub     esp, 36                                 ; 0000 _ 83. EC, 24
>         mov     ecx, dword ptr [esp+2CH]                ; 0003 _ 8B. 4C
> 24, 2C
>         push    ebx                                     ; 0007 _ 53
>         mov     ebx, dword ptr [esp+2CH]                ; 0008 _ 8B. 5C
> 24, 2C
>         test    ebx, ebx                                ; 000C _ 85. DB
>         push    ebp                                     ; 000E _ 55
>         push    esi                                     ; 000F _ 56
>         mov     esi, eax                                ; 0010 _ 89. C6
>         push    edi                                     ; 0012 _ 57
>         jnz     ?_020                                   ; 0013 _ 75, 0E
>         pop     edi                                     ; 0015 _ 5F
>         mov     eax, ebx                                ; 0016 _ 8B. C3
>         mov     edx, ecx                                ; 0018 _ 8B. D1
>         pop     esi                                     ; 001A _ 5E
>         pop     ebp                                     ; 001B _ 5D
>         pop     ebx                                     ; 001C _ 5B
>         add     esp, 36                                 ; 001D _ 83. C4, 24
>         ret     8                                       ; 0020 _ C2, 0008
> 
> ?_020:  mov     dword ptr [esp+3CH], ecx                ; 0023 _ 89. 4C
> 24, 3C
>         mov     edx, dword ptr [esp+3CH]                ; 0027 _ 8B. 54
> 24, 3C
>         mov     dword ptr [esp+38H], ebx                ; 002B _ 89. 5C
> 24, 38
>         mov     ebx, dword ptr [esp+38H]                ; 002F _ 8B. 5C
> 24, 38
>         mov     dword ptr [esp+14H], ebx                ; 0033 _ 89. 5C
> 24, 14
>         mov     dword ptr [esp+18H], edx                ; 0037 _ 89. 54
> 24, 18
>         cmp     dword ptr [esp+38H], 0                  ; 003B _ 83. 7C
> 24, 38, 00
>         je      ?_023                                   ; 0040 _ 0F 84,
> 00000088
> ?_021:  mov     edx, dword ptr [esp+3CH]                ; 0046 _ 8B. 54
> 24, 3C
>         mov     eax, dword ptr [esp+38H]                ; 004A _ 8B. 44
> 24, 38
>         mov     ebx, dword ptr [edx]                    ; 004E _ 8B. 1A
>         push    dword ptr [esi+0CH]                     ; 0050 _ FF. 76, 0C
>         push    dword ptr [esi+8H]                      ; 0053 _ FF. 76, 08
>         mov     dword ptr [esp+18H], edx                ; 0056 _ 89. 54
> 24, 18
>         call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 005A _ E8,
> 00000000(rel)
>         cmp     dword ptr [eax], ebx                    ; 005F _ 39. 18
>         jle     ?_022                                   ; 0061 _ 7E, 31
>         mov     edi, dword ptr [esp+38H]                ; 0063 _ 8B. 7C
> 24, 38
>         mov     ecx, dword ptr [esp+10H]                ; 0067 _ 8B. 4C
> 24, 10
>         dec     edi                                     ; 006B _ 4F
>         lea     edx, [ecx+4H]                           ; 006C _ 8D. 51, 04
>         mov     ebx, dword ptr [esp+14H]                ; 006F _ 8B. 5C
> 24, 14
>         dec     ebx                                     ; 0073 _ 4B
>         mov     dword ptr [esp+3CH], edx                ; 0074 _ 89. 54
> 24, 3C
>         mov     edx, dword ptr [esp+18H]                ; 0078 _ 8B. 54
> 24, 18
>         mov     eax, dword ptr [esp+14H]                ; 007C _ 8B. 44
> 24, 14
>         mov     dword ptr [esp+38H], edi                ; 0080 _ 89. 7C
> 24, 38
>         add     edx, 4                                  ; 0084 _ 83. C2, 04
>         mov     dword ptr [esp+14H], ebx                ; 0087 _ 89. 5C
> 24, 14
>         mov     dword ptr [esp+18H], edx                ; 008B _ 89. 54
> 24, 18
>         jmp     ?_025                                   ; 008F _ E9,
> 000000AA
> 
> ?_022:  push    dword ptr [esp+3CH]                     ; 0094 _ FF. 74
> 24, 3C
>         push    dword ptr [esp+3CH]                     ; 0098 _ FF. 74
> 24, 3C
>         call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 009C _ E8,
> 00000000(rel)
>         mov     ebx, dword ptr [eax]                    ; 00A1 _ 8B. 18
>         push    dword ptr [esi+0CH]                     ; 00A3 _ FF. 76, 0C
>         push    dword ptr [esi+8H]                      ; 00A6 _ FF. 76, 08
>         call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 00A9 _ E8,
> 00000000(rel)
>         cmp     dword ptr [eax], ebx                    ; 00AE _ 39. 18
>         jg      ?_024                                   ; 00B0 _ 7F, 2E
>         mov     edi, dword ptr [esp+38H]                ; 00B2 _ 8B. 7C
> 24, 38
>         dec     edi                                     ; 00B6 _ 4F
>         mov     eax, dword ptr [esp+38H]                ; 00B7 _ 8B. 44
> 24, 38
>         mov     dword ptr [esp+38H], edi                ; 00BB _ 89. 7C
> 24, 38
>         mov     edx, dword ptr [esp+3CH]                ; 00BF _ 8B. 54
> 24, 3C
>         mov     dword ptr [esp+3CH], edx                ; 00C3 _ 89. 54
> 24, 3C
>         cmp     dword ptr [esp+38H], 0                  ; 00C7 _ 83. 7C
> 24, 38, 00
>         jnz     ?_022                                   ; 00CC _ 75, C6
> ?_023:  mov     edx, dword ptr [esp+18H]                ; 00CE _ 8B. 54
> 24, 18
>         mov     eax, dword ptr [esp+14H]                ; 00D2 _ 8B. 44
> 24, 14
>         pop     edi                                     ; 00D6 _ 5F
>         pop     esi                                     ; 00D7 _ 5E
>         pop     ebp                                     ; 00D8 _ 5D
>         pop     ebx                                     ; 00D9 _ 5B
>         add     esp, 36                                 ; 00DA _ 83. C4, 24
>         ret     8                                       ; 00DD _ C2, 0008
> 
> ?_024:  push    dword ptr [esp+3CH]                     ; 00E0 _ FF. 74
> 24, 3C
>         push    dword ptr [esp+3CH]                     ; 00E4 _ FF. 74
> 24, 3C
>         call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 00E8 _ E8,
> 00000000(rel)
>         mov     edi, eax                                ; 00ED _ 89. C7
>         mov     edx, dword ptr [esp+3CH]                ; 00EF _ 8B. 54
> 24, 3C
>         mov     ebp, edx                                ; 00F3 _ 89. D5
>         mov     eax, dword ptr [esp+38H]                ; 00F5 _ 8B. 44
> 24, 38
> ; Note: Zero displacement could be omitted
>         mov     ecx, dword ptr [edx]                    ; 00F9 _ 8B. 4A, 00
>         mov     ebx, dword ptr [edi]                    ; 00FC _ 8B. 1F
>         lea     edx, [edx+4H]                           ; 00FE _ 8D. 52, 04
>         mov     dword ptr [ebp], ebx                    ; 0101 _ 89. 5D, 00
>         mov     eax, dword ptr [esp+38H]                ; 0104 _ 8B. 44
> 24, 38
>         dec     eax                                     ; 0108 _ 48
>         mov     dword ptr [edi], ecx                    ; 0109 _ 89. 0F
>         mov     ebx, dword ptr [esp+14H]                ; 010B _ 8B. 5C
> 24, 14
>         mov     ecx, dword ptr [esp+18H]                ; 010F _ 8B. 4C
> 24, 18
>         mov     dword ptr [esp+38H], eax                ; 0113 _ 89. 44
> 24, 38
>         dec     ebx                                     ; 0117 _ 4B
>         mov     eax, dword ptr [esp+14H]                ; 0118 _ 8B. 44
> 24, 14
>         mov     dword ptr [esp+14H], ebx                ; 011C _ 89. 5C
> 24, 14
>         add     ecx, 4                                  ; 0120 _ 83. C1, 04
>         mov     ebx, dword ptr [esp+38H]                ; 0123 _ 8B. 5C
> 24, 38
>         mov     dword ptr [esp+18H], ecx                ; 0127 _ 89. 4C
> 24, 18
>         dec     ebx                                     ; 012B _ 4B
>         mov     eax, dword ptr [esp+38H]                ; 012C _ 8B. 44
> 24, 38
>         mov     dword ptr [esp+3CH], edx                ; 0130 _ 89. 54
> 24, 3C
>         mov     ecx, edx                                ; 0134 _ 8B. CA
>         mov     dword ptr [esp+38H], ebx                ; 0136 _ 89. 5C
> 24, 38
>         mov     dword ptr [esp+3CH], ecx                ; 013A _ 89. 4C
> 24, 3C
> ?_025:  cmp     dword ptr [esp+38H], 0                  ; 013E _ 83. 7C
> 24, 38, 00
>         jne     ?_021                                   ; 0143 _ 0F 85,
> FFFFFEFD
> ; Note: Immediate operand could be made smaller by sign extension
>         jmp     ?_023                                   ; 0149 _ E9,
> FFFFFF80
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
> ENDP
> 
> On Fri, Jul 2, 2010 at 5:25 PM, David Simcha <dsimcha at gmail.com <mailto:dsimcha at gmail.com>> wrote:
> 
>     Sorry for the double paste.  Please ignore everything before the
>     second time you see
>     _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2...
> 
> 
> 
> 
> ------------------------------------------------------------------------
> 
> _______________________________________________
> phobos mailing list
> phobos at puremagic.com
> http://lists.puremagic.com/mailman/listinfo/phobos
July 02, 2010

Brad Roberts, el  2 de julio a las 13:43 me escribiste:
> On Fri, 2 Jul 2010, Leandro Lucarella wrote:
> 
> > See bug 859 http://d.puremagic.com/issues/show_bug.cgi?id=859
> > 
> > Is not a great bug report but there is a little test case. This is mostly D1 because of ranges, but using opApply with the current state of inlining is almost impossible for performance-aware applications.
> 
> That one is on my radar already.  I'm going to finish working on the migration of the dmd test suite before digging back into ticket, but this one is high on my list for what to tackle next.

Thanks (for both =).

> Is it directly impacting this specific case (I haven't looked at the code involved)?

I don't know, I've hit this problem working in the GC, and as I said in this post, the (lack of) inlining is really showing: http://www.llucax.com.ar/blog/blog/post/-611dc288

I think my main problem was with opApply(), since the delegate passed to
opApply() is used in a loop by definition, inlining that delegate is
really important. Another reason to inline it (even when is a large and
heavy function is it will be always an anonymous delegate (unless you
call opApply() manually), so you are not even saving code size here.
opApply() delegates should be inlined *always* IMHO, I don't know if
it's easy to make a special case though.


-- 
Leandro Lucarella (AKA luca)                     http://llucax.com.ar/
----------------------------------------------------------------------
GPG Key: 5F5A8D05 (F8CD F9A7 BF00 5431 4145  104C 949E BFB6 5F5A 8D05)
----------------------------------------------------------------------
Wenn ist das nunst?ck git und slotermeyer? Ja!
Beiherhund das oder die Flipperwaldt gersput!
	-- Monty Python (no leer si sab?s alem?n)

July 02, 2010
On Friday, July 02, 2010 13:57:33 David Simcha wrote:

> bool evalPred(alias pred)(uint num) {
>     // This line is to make sure that this function doesn't get inlined
> into // main.  DMD doesn't inline functions that could throw.
>     if(num == uint.max) {
>         throw new Exception("");
>     }
> 
>     return pred(num);
> }

DMD doesn't inline functions that could throw? Isn't that rather restrictive? I'm assuming that that's functions with an actual throw statement rather than all functions that aren't nothrow, since that would _really_ be restrictive. But still, you could have a really small function that threw if a condition failed but otherwise just returned a value, and I would have thought that that would be worth inlining. I'm far from an expert once you're dealing with stuff that low- level, so there could be a very good reason that functions that could throw aren't inlined, but it does strike me as rather restrictive if you can't inline such functions.

- Jonathan M Davis
July 02, 2010
See http://d.puremagic.com/issues/show_bug.cgi?id=3490

On 7/2/2010 6:10 PM, Jonathan M Davis wrote:
> On Friday, July 02, 2010 13:57:33 David Simcha wrote:
>
> 
>> bool evalPred(alias pred)(uint num) {
>>      // This line is to make sure that this function doesn't get inlined
>> into // main.  DMD doesn't inline functions that could throw.
>>      if(num == uint.max) {
>>          throw new Exception("");
>>      }
>>
>>      return pred(num);
>> }
>> 
> DMD doesn't inline functions that could throw? Isn't that rather restrictive? I'm assuming that that's functions with an actual throw statement rather than all functions that aren't nothrow, since that would _really_ be restrictive. But still, you could have a really small function that threw if a condition failed but otherwise just returned a value, and I would have thought that that would be worth inlining. I'm far from an expert once you're dealing with stuff that low- level, so there could be a very good reason that functions that could throw aren't inlined, but it does strike me as rather restrictive if you can't inline such functions.
>
> - Jonathan M Davis
> _______________________________________________
> phobos mailing list
> phobos at puremagic.com
> http://lists.puremagic.com/mailman/listinfo/phobos
>
> 

July 03, 2010
On 7/2/2010 6:10 PM, Jonathan M Davis wrote:
>
> DMD doesn't inline functions that could throw? Isn't that rather restrictive? I'm assuming that that's functions with an actual throw statement rather than all functions that aren't nothrow, since that would _really_ be restrictive. But still, you could have a really small function that threw if a condition failed but otherwise just returned a value, and I would have thought that that would be worth inlining. I'm far from an expert once you're dealing with stuff that low- level, so there could be a very good reason that functions that could throw aren't inlined, but it does strike me as rather restrictive if you can't inline such functions.
>
> - Jonathan M Davis
> 

Yeah, I've started looking into this a little more.  DMD will not inline anything with a throw statement, though it will inline functions that are not nothrow.  However, in practice calling enforce() seems to prevent inlining of any function that uses enforce() because enforce() also has a lazy parameter, which isn't always visible because it has a default.  For example:

import std.contracts : enforce;

void main(string[] args) {
     getNum();
}

__gshared bool dummy = true;

uint getNum() {
     enforce(dummy);
     return 1;
}


The fact that getNum() isn't inlined seems ridiculous at first, until you see the disassembly of it.  enforce() is inlined because it doesn't contain a throw statement.  std.contracts.bailOut() actually throws the exception.  Here's the horribly complicated disassembly of getNum():

_D5test96getNumFZk PROC NEAR
;  COMDEF _D5test96getNumFZk
         sub     esp, 24                                 ; 0000 _ 83. EC, 18
         xor     eax, eax                                ; 0003 _ 31. C0
         mov     ecx, offset
FLAT:_D5test96getNumFZk14__dgliteral610MFZAxa; 0005 _ B9, 00000000(segrel)
         push    ebx                                     ; 000A _ 53
         mov     dl, byte ptr [_D5test95dummyb]          ; 000B _ 8A.
15, 00000000(segrel)
         xor     dl, 01H                                 ; 0011 _ 80. F2, 01
         mov     dword ptr [esp+4H], eax                 ; 0014 _ 89. 44
24, 04
         mov     dword ptr [esp+8H], ecx                 ; 0018 _ 89. 4C
24, 08
         jz      ?_007                                   ; 001C _ 74, 21
         mov     edx, ecx                                ; 001E _ 8B. D1
         push    dword ptr [?_003]                       ; 0020 _ FF.
35, 00000014(segrel)
         push    dword ptr [?_002]                       ; 0026 _ FF.
35, 00000010(segrel)
         push    10                                      ; 002C _ 6A, 0A
         mov     eax, dword ptr [esp+10H]                ; 002E _ 8B. 44
24, 10
         mov     ebx, dword ptr [esp+10H]                ; 0032 _ 8B. 5C
24, 10
         call    edx                                     ; 0036 _ FF. D2
         push    edx                                     ; 0038 _ 52
         push    eax                                     ; 0039 _ 50
         call    _D3std9contracts7bailOutFAyaixAaZv      ; 003A _ E8,
00000000(rel)
?_007:  mov     eax, 1                                  ; 003F _ B8,
00000001
         pop     ebx                                     ; 0044 _ 5B
         add     esp, 24                                 ; 0045 _ 83. C4, 18
         ret                                             ; 0048 _ C3
_D5test96getNumFZk ENDP

1 2 3
Next ›   Last »