| |
| Posted by Cecil Ward | PermalinkReply |
|
Cecil Ward
| I wrote a very small procedure in D and the x86-64 asm code generated in GDC 12.3 was excellent whereas that from 13.1 was insanely bloated, totally different. Note: the badness is independent of the -On optimisation level (-O3 used initially.)
Here’s the D code and following it, two asm code snippets:
====
public
pragma( inline, true )
cpuid_abcd_t
cpuid_insn( in uint32_t eax ) pure nothrow @nogc @trusted
{ /* ecx arg omitted; absolutely minimal variant wrapper */
assert( ! is_ecx_needed( eax ) ); // since we are not providing an ecx, we had better not be needing to supply one
static assert( eax.sizeof * 8 == 32 ); // optional, exact
static assert( eax.sizeof * 8 >= 32 ); // essential min
const uint32_t in_eax = eax; // really just for type-checking, and constness-assertion
static assert( in_eax.sizeof * 8 == 32 );
cpuid_abcd_t ret = void; /* undefined until the cpuid insn writes it */
static assert( ret.eax.sizeof * 8 == 32 && ret.ebx.sizeof * 8 == 32
&& ret.ecx.sizeof * 8 == 32 && ret.edx.sizeof * 8 == 32 );
asm pure nothrow @nogc
{
".intel_syntax " ~ "\n\t" ~
"cpuid" ~ "\n\t" ~
".att_syntax \n"
: /* outputs : it is guaranteed that all bits 63…32 of rax/rbx/rcx/rdx etc are zeroed in output. */
"=a" ( ret.eax ), // an lhs ref, write-only; and only bits 31…0 are significant
"=b" ( ret.ebx ), // .. ..
"=c" ( ret.ecx ),
"=d" ( ret.edx )
: /* inputs : */
"a" ( in_eax ) // read.
// /* no ecx input - this is the variant with input ecx omitted */
: /* no clobbers apart from the outputs already listed */
/* does cpuid set flags? - think not, so no "cc" clobber reqd */
;
}
return ret;
}
/* ======== */
GDC 12.3:: -O3 -frelease -march=native
push rbx
mov eax, edi
cpuid
mov rsi, rdx
sal rbx, 32
mov eax, eax
mov edx, ecx
sal rsi, 32
or rax, rbx
pop rbx
or rdx, rsi
ret
====
GDC 13.1 = v. bad, same switches: -O3 -frelease -march=native
push bp
mov eax, edi
mov rbp, rsp
push rbx
and rsp, -32
cpuid
vmovd xmm3, eax
vmovd xmm2, ecx
vpinsrd xmm1, xmm2, edx, 1
vpinsrd xmm0, xmm3, rbx, 1
vpunpcklqdq xmm4, xmm0, xmm1
vmovdqa xmmword ptr [rsp-80], xmm4
mov rax, qword ptr [rsp-80]
mov rdx, qword ptr [rsp-72]
mov rbx, qword ptr [rbp-8]
leave
ret
/* ======== */
|