bearophile 
Posted in reply to Philippe Sigaud
| Philippe Sigaud:
> Now, what I found more confusing is that, compiling with DMD or LDC, I got different results. Since Phobos code defining sin and cos in std.math and core.stdc.math is the same for DMD and LDC (duh!), I guess that means different intrinsics are used?
LDC2 optimizes this code even worse than DMD.
I opened a related thread:
http://forum.dlang.org/thread/rrryhcuqdffownpmlaen@forum.dlang.org
--------------------------
import core.stdc.stdio: printf;
import std.math: sin, cos;
double g(in double x) pure nothrow {
return sin(2.3 * x) + cos(3.7 * x);
}
void main() {
double x = 0;
foreach (immutable _; 0 .. 100_000_000)
x = x.g;
printf("%f\n", x);
}
/*
-O -release -inline -noboundscheck
DMD:
_D4test1gFNaNbxdZd comdat
fld qword ptr 4[ESP]
fmul qword ptr FLAT:_DATA[00h]
fsin
fld qword ptr 4[ESP]
fmul qword ptr FLAT:_DATA[08h]
fcos
faddp ST(1),ST
ret 8
__Dmain comdat
L0: sub ESP,0Ch
xor EAX,EAX
mov dword ptr 4[ESP],0
mov dword ptr 8[ESP],0
L15: fld qword ptr 4[ESP]
inc EAX
cmp EAX,05F5E100h
fmul qword ptr FLAT:_DATA[00h]
fsin
fld qword ptr 4[ESP]
fmul qword ptr FLAT:_DATA[08h]
fcos
faddp ST(1),ST
fstp qword ptr 4[ESP]
jb L15
push dword ptr 8[ESP]
mov EAX,offset FLAT:_DATA[010h]
push dword ptr 8[ESP]
push EAX
call near ptr _printf
add ESP,0Ch
add ESP,0Ch
xor EAX,EAX
ret
// -------------------------
LDC2:
__D4test1gFNaNbxdZd:
pushl %ebp
movl %esp, %ebp
andl $-8, %esp
subl $56, %esp
movsd LCPI0_0, %xmm0
mulsd 8(%ebp), %xmm0
movsd %xmm0, 40(%esp)
fldl 40(%esp)
fstpt (%esp)
calll __D3std4math3sinFNaNbNfeZe
subl $12, %esp
fstpt 12(%esp)
movsd 8(%ebp), %xmm0
mulsd LCPI0_1, %xmm0
movsd %xmm0, 48(%esp)
fldl 48(%esp)
fstpt (%esp)
calll __D3std4math3cosFNaNbNfeZe
subl $12, %esp
fldt 12(%esp)
faddp %st(1)
fstpl 32(%esp)
movsd 32(%esp), %xmm0
movsd %xmm0, 24(%esp)
fldl 24(%esp)
movl %ebp, %esp
popl %ebp
ret $8
__Dmain:
pushl %ebp
movl %esp, %ebp
pushl %esi
andl $-8, %esp
subl $72, %esp
xorps %xmm0, %xmm0
movl $100000000, %esi
.align 16, 0x90
LBB1_1:
movsd %xmm0, 16(%esp)
mulsd LCPI1_0, %xmm0
movsd %xmm0, 48(%esp)
fldl 48(%esp)
fstpt (%esp)
calll __D3std4math3sinFNaNbNfeZe
subl $12, %esp
fstpt 28(%esp)
movsd 16(%esp), %xmm0
mulsd LCPI1_1, %xmm0
movsd %xmm0, 56(%esp)
fldl 56(%esp)
fstpt (%esp)
calll __D3std4math3cosFNaNbNfeZe
subl $12, %esp
fldt 28(%esp)
faddp %st(1)
fstpl 40(%esp)
movsd 40(%esp), %xmm0
decl %esi
jne LBB1_1
movsd %xmm0, 4(%esp)
movl $_.str, (%esp)
calll ___mingw_printf
xorl %eax, %eax
leal -4(%ebp), %esp
popl %esi
popl %ebp
ret
--------------------------
import core.stdc.stdio: printf;
import core.stdc.math: sin, cos;
double g(in double x) pure nothrow {
return sin(2.3 * x) + cos(3.7 * x);
}
void main() {
double x = 0;
foreach (immutable _; 0 .. 100_000_000)
x = x.g;
printf("%f\n", x);
}
/*
-O -release -inline -noboundscheck
LDC2:
__D5test21gFNaNbxdZd:
pushl %ebp
movl %esp, %ebp
andl $-8, %esp
subl $40, %esp
movsd LCPI0_0, %xmm0
mulsd 8(%ebp), %xmm0
movsd %xmm0, (%esp)
calll _sin
fstpl 32(%esp)
movsd 32(%esp), %xmm0
movsd %xmm0, 8(%esp)
movsd 8(%ebp), %xmm0
mulsd LCPI0_1, %xmm0
movsd %xmm0, (%esp)
calll _cos
fstpl 24(%esp)
movsd 8(%esp), %xmm0
addsd 24(%esp), %xmm0
movsd %xmm0, 16(%esp)
fldl 16(%esp)
movl %ebp, %esp
popl %ebp
ret $8
__Dmain:
pushl %ebp
movl %esp, %ebp
pushl %esi
andl $-8, %esp
subl $56, %esp
xorps %xmm0, %xmm0
movl $100000000, %esi
.align 16, 0x90
LBB1_1:
movsd %xmm0, 16(%esp)
mulsd LCPI1_0, %xmm0
movsd %xmm0, (%esp)
calll _sin
fstpl 40(%esp)
movsd 40(%esp), %xmm0
movsd %xmm0, 24(%esp)
movsd 16(%esp), %xmm0
mulsd LCPI1_1, %xmm0
movsd %xmm0, (%esp)
calll _cos
fstpl 32(%esp)
movsd 24(%esp), %xmm0
addsd 32(%esp), %xmm0
decl %esi
jne LBB1_1
movsd %xmm0, 4(%esp)
movl $_.str, (%esp)
calll ___mingw_printf
xorl %eax, %eax
leal -4(%ebp), %esp
popl %esi
popl %ebp
ret
--------------------------
import core.stdc.stdio: printf;
version(LDC) {
import ldc.intrinsics;
double g(in double x) pure nothrow {
return llvm_sin(2.3 * x) + llvm_cos(3.7 * x);
}
}
void main() {
double x = 0;
foreach (immutable _; 0 .. 100_000_000)
x = x.g;
printf("%f\n", x);
}
/*
-O -release -inline -noboundscheck
LDC2:
__D5test31gFNaNbxdZd:
pushl %ebp
movl %esp, %ebp
andl $-8, %esp
subl $40, %esp
movsd LCPI0_0, %xmm0
mulsd 8(%ebp), %xmm0
movsd %xmm0, (%esp)
calll _sin
fstpl 24(%esp)
movsd 24(%esp), %xmm0
movsd %xmm0, 8(%esp)
movsd 8(%ebp), %xmm0
mulsd LCPI0_1, %xmm0
movsd %xmm0, (%esp)
calll _cos
fstpl 16(%esp)
movsd 8(%esp), %xmm0
addsd 16(%esp), %xmm0
movsd %xmm0, 32(%esp)
fldl 32(%esp)
movl %ebp, %esp
popl %ebp
ret $8
__Dmain:
pushl %ebp
movl %esp, %ebp
pushl %esi
andl $-8, %esp
subl $56, %esp
xorps %xmm0, %xmm0
movl $100000000, %esi
.align 16, 0x90
LBB1_1:
movsd %xmm0, 16(%esp)
mulsd LCPI1_0, %xmm0
movsd %xmm0, (%esp)
calll _sin
fstpl 40(%esp)
movsd 40(%esp), %xmm0
movsd %xmm0, 24(%esp)
movsd 16(%esp), %xmm0
mulsd LCPI1_1, %xmm0
movsd %xmm0, (%esp)
calll _cos
fstpl 32(%esp)
movsd 24(%esp), %xmm0
addsd 32(%esp), %xmm0
decl %esi
jne LBB1_1
movsd %xmm0, 4(%esp)
movl $_.str, (%esp)
calll ___mingw_printf
xorl %eax, %eax
leal -4(%ebp), %esp
popl %esi
popl %ebp
ret
--------------------------
// C99 code
#include <stdio.h>
#include <math.h>
double g(const double x) {
return sin(2.3 * x) + cos(3.7 * x);
}
int main() {
double x = 0;
for (int i = 0; i < 100000000; i++)
x = g(x);
printf("%f\n", x);
return 0;
}
/*
gcc -fkeep-inline-functions -std=c99 -flto -S -Ofast test4.c -o test4.s
_g:
fldl 4(%esp)
fldl LC0
fmul %st(1), %st
fsin
fxch %st(1)
fmull LC1
fcos
faddp %st, %st(1)
ret
_main:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
subl $16, %esp
call ___main
movl $100000000, %eax
fld1
fldz
fldl LC0
fxch %st(2)
jmp L19
.p2align 4,,7
L22:
fld %st(0)
fmul %st(2), %st
fsin
fxch %st(1)
fmull LC1
fcos
L19:
subl $1, %eax
faddp %st, %st(1)
jne L22
fstp %st(1)
fstpl 4(%esp)
movl $LC5, (%esp)
call _printf
xorl %eax, %eax
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
Bye,
bearophile
|