July 10, 2013 [Issue 6829] Unsigned rotate standard function in Phobos | ||||
---|---|---|---|---|
| ||||
Posted in reply to bearophile_hugs@eml.cc | http://d.puremagic.com/issues/show_bug.cgi?id=6829 --- Comment #10 from bearophile_hugs@eml.cc 2013-07-10 04:15:16 PDT --- (In reply to comment #9) > Excellent, so could put these as templates then. :) I have improved your code a little. Follows the 32 bit asm for ldc2 and dmd: import std.traits: isIntegral, isUnsigned; T rol(T)(in T x, in uint y) @safe pure nothrow if (isIntegral!T && isUnsigned!T) { return cast(T)((x << y) | (x >> ((T.sizeof * 8) - y))); } T ror(T)(in T x, in uint y) @safe pure nothrow if (isIntegral!T && isUnsigned!T) { return cast(T)((x >> y) | (x << ((T.sizeof * 8) - y))); } void main() { // Tests to check for assembly output. { __gshared static ubyte xb; __gshared static ushort xs; __gshared static uint xi; __gshared static ulong xl; __gshared static uint yi; rol(xb, yi); // rolb ror(xb, yi); // rorb rol(xs, yi); // rolw ror(xs, yi); // rorw rol(xi, yi); // roll ror(xi, yi); // rorl rol(xl, yi); // version(X86_64) rolq ror(xl, yi); // version(X86_64) rorq } } /* ldmd2 -O -output-s __D5temp210__T3rolThZ3rolFNaNbNfxhxkZh: pushl %esi movzbl 8(%esp), %edx movb %al, %cl movl %edx, %esi shll %cl, %esi movl $8, %ecx subl %eax, %ecx shrl %cl, %edx orl %esi, %edx movzbl %dl, %eax popl %esi ret $4 __D5temp210__T3rorThZ3rorFNaNbNfxhxkZh: pushl %esi movzbl 8(%esp), %edx movb %al, %cl movl %edx, %esi shrl %cl, %esi movl $8, %ecx subl %eax, %ecx shll %cl, %edx orl %esi, %edx movzbl %dl, %eax popl %esi ret $4 __D5temp210__T3rolTtZ3rolFNaNbNfxtxkZt: pushl %esi movzwl 8(%esp), %edx movb %al, %cl movl %edx, %esi shll %cl, %esi movl $16, %ecx subl %eax, %ecx shrl %cl, %edx orl %esi, %edx movzwl %dx, %eax popl %esi ret $4 __D5temp210__T3rorTtZ3rorFNaNbNfxtxkZt: pushl %esi movzwl 8(%esp), %edx movb %al, %cl movl %edx, %esi shrl %cl, %esi movl $16, %ecx subl %eax, %ecx shll %cl, %edx orl %esi, %edx movzwl %dx, %eax popl %esi ret $4 __D5temp210__T3rolTkZ3rolFNaNbNfxkxkZk: movl 4(%esp), %edx movb %al, %cl roll %cl, %edx movl %edx, %eax ret $4 __D5temp210__T3rorTkZ3rorFNaNbNfxkxkZk: movl 4(%esp), %edx movb %al, %cl rorl %cl, %edx movl %edx, %eax ret $4 __D5temp210__T3rolTmZ3rolFNaNbNfxmxkZm: pushl %ebp pushl %ebx pushl %edi pushl %esi movl %eax, %ebx movl $64, %ecx subl %ebx, %ecx movl 24(%esp), %edx movl 20(%esp), %eax movl %eax, %esi shrdl %cl, %edx, %esi movl %edx, %edi shrl %cl, %edi xorl %ebp, %ebp testb $32, %cl cmovnel %edi, %esi cmovnel %ebp, %edi movb %bl, %cl shldl %cl, %eax, %edx movb %bl, %cl shll %cl, %eax testb $32, %bl cmovnel %eax, %edx cmovnel %ebp, %eax orl %esi, %eax orl %edi, %edx popl %esi popl %edi popl %ebx popl %ebp ret $8 __D5temp210__T3rorTmZ3rorFNaNbNfxmxkZm: pushl %ebp pushl %ebx pushl %edi pushl %esi movl %eax, %ecx movl 24(%esp), %edx movl 20(%esp), %eax movl %eax, %esi shrdl %cl, %edx, %esi movl %edx, %edi shrl %cl, %edi xorl %ebp, %ebp testb $32, %cl cmovnel %edi, %esi cmovnel %ebp, %edi movl $64, %ebx subl %ecx, %ebx movb %bl, %cl shldl %cl, %eax, %edx movb %bl, %cl shll %cl, %eax testb $32, %bl cmovnel %eax, %edx cmovnel %ebp, %eax orl %esi, %eax orl %edi, %edx popl %esi popl %edi popl %ebx popl %ebp ret $8 -------------------------------- dmd -O _D5temp210__T3rolThZ3rolFNaNbNfxhxkZh: push EAX push EAX movzx ECX,byte ptr 0Ch[ESP] mov EAX,ECX mov 0[ESP],ECX mov CL,4[ESP] mov EDX,0[ESP] shl AL,CL mov ECX,8 sub ECX,4[ESP] sar EDX,CL add ESP,8 or AL,DL ret 4 _D5temp210__T3rorThZ3rorFNaNbNfxhxkZh: push EAX push EAX movzx ECX,byte ptr 0Ch[ESP] mov EAX,ECX mov 0[ESP],ECX mov ECX,8 sub CL,4[ESP] mov EDX,0[ESP] shl AL,CL mov ECX,4[ESP] sar EDX,CL add ESP,8 or AL,DL ret 4 _D5temp210__T3rolTtZ3rolFNaNbNfxtxkZt: push EAX mov AX,8[ESP] mov CX,[ESP] mov DX,8[ESP] shl EAX,CL mov ECX,010h sub ECX,[ESP] and EDX,0FFFFh sar EDX,CL pop ECX or EAX,EDX ret 4 _D5temp210__T3rorTtZ3rorFNaNbNfxtxkZt: push EAX mov ECX,010h mov AX,8[ESP] sub ECX,[ESP] mov DX,8[ESP] shl EAX,CL mov ECX,[ESP] and EDX,0FFFFh sar EDX,CL or EAX,EDX pop ECX ret 4 _D5temp210__T3rolTkZ3rolFNaNbNfxkxkZk: push EAX mov EAX,8[ESP] mov ECX,[ESP] rol EAX,CL pop ECX ret 4 _D5temp210__T3rorTkZ3rorFNaNbNfxkxkZk: push EAX mov EAX,8[ESP] mov ECX,[ESP] ror EAX,CL pop ECX ret 4 _D5temp210__T3rolTmZ3rolFNaNbNfxmxkZm: push EAX mov ECX,0[ESP] mov EDX,0Ch[ESP] push EBX mov EAX,0Ch[ESP] test CL,020h jne L1A shld EDX,EAX,CL shl EAX,CL jmp short L20 L1A: shl EAX,CL mov EDX,EAX xor EAX,EAX L20: mov ECX,040h mov EBX,0Ch[ESP] push EDX mov EDX,014h[ESP] sub ECX,8[ESP] test CL,020h jne L3E shrd EBX,EDX,CL shr EDX,CL jmp short L44 L3E: shr EDX,CL mov EBX,EDX xor EDX,EDX L44: mov ECX,EDX or EAX,EBX pop EDX or EDX,ECX pop EBX pop ECX ret 8 _D5temp210__T3rorTmZ3rorFNaNbNfxmxkZm: push EAX mov ECX,0[ESP] mov EDX,0Ch[ESP] push EBX mov EAX,0Ch[ESP] test CL,020h jne L1A shrd EAX,EDX,CL shr EDX,CL jmp short L20 L1A: shr EDX,CL mov EAX,EDX xor EDX,EDX L20: mov ECX,040h mov EBX,0Ch[ESP] push EDX mov EDX,014h[ESP] sub ECX,8[ESP] test CL,020h jne L3E shld EDX,EBX,CL shl EBX,CL jmp short L44 L3E: shl EBX,CL mov EDX,EBX xor EBX,EBX L44: mov ECX,EDX or EAX,EBX pop EDX or EDX,ECX pop EBX pop ECX ret 8 */ -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- |
July 10, 2013 [Issue 6829] Unsigned rotate standard function in Phobos | ||||
---|---|---|---|---|
| ||||
Posted in reply to bearophile_hugs@eml.cc | http://d.puremagic.com/issues/show_bug.cgi?id=6829 --- Comment #11 from Iain Buclaw <ibuclaw@ubuntu.com> 2013-07-10 04:57:05 PDT --- (In reply to comment #10) > (In reply to comment #9) > > > Excellent, so could put these as templates then. :) > > I have improved your code a little. Follows the 32 bit asm for ldc2 and dmd: > gdc > (ldc && dmd); It has no problem detecting all those cases. :o) -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- |
July 10, 2013 [Issue 6829] Unsigned rotate standard function in Phobos | ||||
---|---|---|---|---|
| ||||
Posted in reply to bearophile_hugs@eml.cc | http://d.puremagic.com/issues/show_bug.cgi?id=6829 --- Comment #12 from bearophile_hugs@eml.cc 2013-07-10 06:45:05 PDT --- A bit better, with a pre-condition to catch some bugs: T rol(T)(in T x, in uint n) @safe pure nothrow if (isIntegral!T && isUnsigned!T) in { assert(n < (T.sizeof * 8)); } body { return cast(T)((x << n) | (x >> ((T.sizeof * 8) - n))); } T ror(T)(in T x, in uint n) @safe pure nothrow if (isIntegral!T && isUnsigned!T) in { assert(n < (T.sizeof * 8)); } body { return cast(T)((x >> n) | (x << ((T.sizeof * 8) - n))); } -------------- (In reply to comment #11) > gdc > (ldc && dmd); > > It has no problem detecting all those cases. :o) But is that asm generated by gdc actually faster than this asm generated by ldc2? One of the main points of adding those two templated functions to core.bitop is to have a single common standard pair of rotations that all present and future D compilers can recognize and optimize well. -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- |
July 10, 2013 [Issue 6829] Unsigned rotate standard function in Phobos | ||||
---|---|---|---|---|
| ||||
Posted in reply to bearophile_hugs@eml.cc | http://d.puremagic.com/issues/show_bug.cgi?id=6829 --- Comment #13 from bearophile_hugs@eml.cc 2013-07-10 06:46:44 PDT --- (In reply to comment #11) > It has no problem detecting all those cases. :o) Perhaps you want to show the asm generated by gdc for those functions? (Perhaps here there's material for a small enhancement request for LLVM.) -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- |
July 10, 2013 [Issue 6829] Unsigned rotate standard function in Phobos | ||||
---|---|---|---|---|
| ||||
Posted in reply to bearophile_hugs@eml.cc | http://d.puremagic.com/issues/show_bug.cgi?id=6829 --- Comment #14 from Iain Buclaw <ibuclaw@ubuntu.com> 2013-07-10 06:53:27 PDT --- (In reply to comment #12) > (In reply to comment #11) > > > gdc > (ldc && dmd); > > > > It has no problem detecting all those cases. :o) > > But is that asm generated by gdc actually faster than this asm generated by ldc2? > Well, I would have to assume that ro{lr}{bwlq} is faster than shl/shr. Someone should get speed reports in. =) -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- |
July 10, 2013 [Issue 6829] Unsigned rotate standard function in Phobos | ||||
---|---|---|---|---|
| ||||
Posted in reply to bearophile_hugs@eml.cc | http://d.puremagic.com/issues/show_bug.cgi?id=6829 --- Comment #15 from Iain Buclaw <ibuclaw@ubuntu.com> 2013-07-10 06:55:08 PDT --- (In reply to comment #13) > (In reply to comment #11) > > > It has no problem detecting all those cases. :o) > > Perhaps you want to show the asm generated by gdc for those functions? > > (Perhaps here there's material for a small enhancement request for LLVM.) You see the assembler output for roll/rorl. It's identical with exception to the operand suffix - eg: rolb/rorb for ubyte. But to give you a comparison. // LDC __D5temp210__T3rolThZ3rolFNaNbNfxhxkZh: pushl %esi movzbl 8(%esp), %edx movb %al, %cl movl %edx, %esi shll %cl, %esi movl $8, %ecx subl %eax, %ecx shrl %cl, %edx orl %esi, %edx movzbl %dl, %eax popl %esi ret $4 vs. // GDC __D5temp210__T3rolThZ3rolFNaNbNfxhxkZh: movl %edi, %eax movl %esi, %ecx rolb %cl, %al ret -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- |
July 10, 2013 [Issue 6829] Unsigned rotate standard function in Phobos | ||||
---|---|---|---|---|
| ||||
Posted in reply to bearophile_hugs@eml.cc | http://d.puremagic.com/issues/show_bug.cgi?id=6829 --- Comment #16 from Iain Buclaw <ibuclaw@ubuntu.com> 2013-07-10 06:59:38 PDT --- (In reply to comment #13) > (In reply to comment #11) > > > It has no problem detecting all those cases. :o) > > Perhaps you want to show the asm generated by gdc for those functions? > > (Perhaps here there's material for a small enhancement request for LLVM.) Full listing: _D4temp10__T3rolThZ3rolFNaNbNfxhxkZh: movl %edi, %eax movl %esi, %ecx rolb %cl, %al ret _D4temp10__T3rorThZ3rorFNaNbNfxhxkZh: movl %edi, %eax movl %esi, %ecx rorb %cl, %al ret _D4temp10__T3rolTtZ3rolFNaNbNfxtxkZt: movl %edi, %eax movl %esi, %ecx rolw %cl, %ax ret _D4temp10__T3rorTtZ3rorFNaNbNfxtxkZt: movl %edi, %eax movl %esi, %ecx rorw %cl, %ax ret _D4temp10__T3rolTkZ3rolFNaNbNfxkxkZk: movl %edi, %eax movl %esi, %ecx roll %cl, %eax ret _D4temp10__T3rorTkZ3rorFNaNbNfxkxkZk: movl %edi, %eax movl %esi, %ecx rorl %cl, %eax ret _D4temp10__T3rolTmZ3rolFNaNbNfxmxkZm: movq %rdi, %rax movl %esi, %ecx rolq %cl, %rax ret _D4temp10__T3rorTmZ3rorFNaNbNfxmxkZm: movq %rdi, %rax movl %esi, %ecx rorq %cl, %rax ret -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- |
July 10, 2013 [Issue 6829] Unsigned rotate standard function in Phobos | ||||
---|---|---|---|---|
| ||||
Posted in reply to bearophile_hugs@eml.cc | http://d.puremagic.com/issues/show_bug.cgi?id=6829 --- Comment #17 from bearophile_hugs@eml.cc 2013-07-10 09:29:51 PDT --- (In reply to comment #15) > But to give you a comparison. > > // LDC > > __D5temp210__T3rolThZ3rolFNaNbNfxhxkZh: > pushl %esi > movzbl 8(%esp), %edx > movb %al, %cl > movl %edx, %esi > shll %cl, %esi > movl $8, %ecx > subl %eax, %ecx > shrl %cl, %edx > orl %esi, %edx > movzbl %dl, %eax > popl %esi > ret $4 > > vs. > > // GDC > > __D5temp210__T3rolThZ3rolFNaNbNfxhxkZh: > movl %edi, %eax > movl %esi, %ecx > rolb %cl, %al > ret I have asked in the LLVM IRC channel, and it seems a limit/problem of the llvm back-end. So maybe it's worth writing an LLVM enhancement request. -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- |
July 12, 2013 [Issue 6829] Unsigned rotate standard function in Phobos | ||||
---|---|---|---|---|
| ||||
Posted in reply to bearophile_hugs@eml.cc | http://d.puremagic.com/issues/show_bug.cgi?id=6829 --- Comment #18 from hsteoh@quickfur.ath.cx 2013-07-11 21:37:37 PDT --- Hmph. I'm using bearophile's code with DMD git HEAD, running as dmd -O, and *none* of the cases produce a rotate instruction. :-( Could it be an issue with 32bit vs. 64-bit? I'm running in a purely 64-bit environment. -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- |
July 12, 2013 [Issue 6829] Unsigned rotate standard function in Phobos | ||||
---|---|---|---|---|
| ||||
Posted in reply to bearophile_hugs@eml.cc | http://d.puremagic.com/issues/show_bug.cgi?id=6829 --- Comment #19 from hsteoh@quickfur.ath.cx 2013-07-11 21:40:34 PDT --- I can't get GDC git HEAD to recognize this pattern either. What am I doing wrong?? -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: ------- |
Copyright © 1999-2021 by the D Language Foundation