Thread overview
[Issue 2750] New: Optimize slice copy with size known at compile time
Mar 19, 2009
d-bugmail
Mar 19, 2009
d-bugmail
Mar 20, 2009
d-bugmail
Apr 01, 2009
d-bugmail
March 19, 2009
http://d.puremagic.com/issues/show_bug.cgi?id=2750

           Summary: Optimize slice copy with size known at compile time
           Product: D
           Version: 1.041
          Platform: PC
        OS/Version: Windows
            Status: NEW
          Keywords: patch, wrong-code
          Severity: normal
          Priority: P2
         Component: DMD
        AssignedTo: bugzilla@digitalmars.com
        ReportedBy: snake.scaly@gmail.com


It was discussed recently that the compiler intrinsic for slice copying was
slower than CRT memcpy():

http://tinyurl.com/cfxmva

In that particular case it was generating rep movsb despite the fact that the slice size was known at compile time.

I'm proposing a patch which fixes this problem.  Here is an example.  This code:

void main() {
    auto h = "hello\n";
    char buf[16];
    buf[0 .. h.length] = h;
}

compiled with -O -release -inline by the current 1.041:

__Dmain comdat
        assume  CS:__Dmain
                sub     ESP,020h
                mov     EDX,FLAT:_DATA[0Ch]
                mov     EAX,FLAT:_DATA[08h]
                push    EBX
                push    ESI
                mov     ESI,EDX
                push    EDI
                lea     EDI,0Ch[ESP]
                movsd
                movsb
                movsb
                lea     ECX,01Ch[ESP]
                mov     EBX,0FFFFFFFFh
                mov     [ECX],EBX
                mov     EAX,6
                lea     ESI,0Ch[ESP]
                mov     4[ECX],EBX
                lea     EDI,01Ch[ESP]
                mov     8[ECX],EBX
                mov     0Ch[ECX],EBX
                mov     ECX,EAX
                rep
                movsb
                xor     EAX,EAX
                pop     EDI
                pop     ESI
                pop     EBX
                add     ESP,020h
                ret
__Dmain ends

and by a patched compiler:

__Dmain comdat
        assume  CS:__Dmain
                sub     ESP,020h
                mov     EDX,FLAT:_DATA[0Ch]
                mov     EAX,FLAT:_DATA[08h]
                push    EBX
                push    ESI
                mov     ESI,EDX
                push    EDI
                lea     EDI,0Ch[ESP]
                movsd
                movsb
                movsb
                lea     ECX,01Ch[ESP]
                mov     EBX,0FFFFFFFFh
                mov     [ECX],EBX
                xor     EAX,EAX
                mov     4[ECX],EBX
                mov     8[ECX],EBX
                mov     0Ch[ECX],EBX
                pop     EDI
                pop     ESI
                pop     EBX
                add     ESP,020h
                ret
__Dmain ends

Here is the patch:

-------8<------------------------------
diff --git a/dmd/backend/cgelem.c b/dmd/backend/cgelem.c index a2a4a1f..a80eefb 100644
--- a/dmd/backend/cgelem.c
+++ b/dmd/backend/cgelem.c
@@ -3773,6 +3773,16 @@ STATIC elem * el64_32(elem *e)
            e->E1 = el_selecte1(e->E1);
        }
        break;
+
+    case OPpair:
+       e = el_selecte1(el_selecte1(e));
+       goto L1;
+    case OPrpair:
+       e = el_selecte2(el_selecte1(e));
+       goto L1;
+    L1:
+       e->Ety = ty;
+       break;
   }
   return e;
 }
-------8<------------------------------


-- 

March 19, 2009
http://d.puremagic.com/issues/show_bug.cgi?id=2750





------- Comment #1 from snake.scaly@gmail.com  2009-03-19 15:40 -------
Sorry, bad example.  The patched compiler simply optimized the copy away, which wasn't bad in itself, but neither was what I wanted to demonstrate.  Here's a better example:

void main() {
    auto h = "hello\n";
    auto buf = new char[16];
    buf[0 .. h.length] = h;
}

Original 1.041:

__Dmain comdat
        assume  CS:__Dmain
L0:             sub     ESP,018h
                mov     EDX,FLAT:_DATA[0Ch]
                mov     EAX,FLAT:_DATA[08h]
                push    EBX
                push    ESI
                mov     ESI,EDX
                push    EDI
                lea     EDI,0Ch[ESP]
                movsd
                movsb
                movsb
                lea     ESI,0Ch[ESP]
                mov     ECX,offset FLAT:_D11TypeInfo_Aa6__initZ
                push    010h
                push    ECX
                call    near ptr __d_newarrayiT
                mov     EBX,6
                mov     ECX,EBX
                mov     EDI,EDX
                rep
                movsb
                add     ESP,8
                xor     EAX,EAX
                pop     EDI
                pop     ESI
                pop     EBX
                add     ESP,018h
                ret
__Dmain ends

Assembly produced by a patched compiler:

__Dmain comdat
        assume  CS:__Dmain
L0:             sub     ESP,01Ch
                mov     EDX,FLAT:_DATA[0Ch]
                mov     EAX,FLAT:_DATA[08h]
                push    ESI
                mov     ESI,EDX
                push    EDI
                lea     EDI,0Ch[ESP]
                movsd
                movsb
                movsb
                mov     ECX,offset FLAT:_D11TypeInfo_Aa6__initZ
                push    010h
                push    ECX
                call    near ptr __d_newarrayiT
                lea     ESI,014h[ESP]
                mov     EDI,EDX
                movsd
                movsb
                movsb
                add     ESP,8
                xor     EAX,EAX
                pop     EDI
                pop     ESI
                add     ESP,01Ch
                ret
__Dmain ends


-- 

March 20, 2009
http://d.puremagic.com/issues/show_bug.cgi?id=2750





------- Comment #2 from bugzilla@digitalmars.com  2009-03-20 00:12 -------
Interesting, since I had already introduced almost the identical patch 3 days ago! Anyhow, this will obviously go out in the next update.


-- 

April 01, 2009
http://d.puremagic.com/issues/show_bug.cgi?id=2750


bugzilla@digitalmars.com changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|                            |FIXED




------- Comment #3 from bugzilla@digitalmars.com  2009-04-01 13:49 -------
Fixed DMD 1.042 and 2.027


--