Thread overview
sin, cos, other languages and DMD/LDC difference
Feb 08, 2014
Philippe Sigaud
Feb 08, 2014
bearophile
Feb 08, 2014
Philippe Sigaud
Feb 08, 2014
Daniel Murphy
Feb 09, 2014
Philippe Sigaud
Feb 08, 2014
Walter Bright
Feb 09, 2014
Philippe Sigaud
February 08, 2014
I was reading this thread on the Clojure Google group:

https://groups.google.com/forum/#!topic/clojure/kFNxGrRPf2k

Where the guy is mostly computing (converting from the C++ code):

import std.math;
import std.stdio;

double g(double x) {
    return sin(2.3*x) + cos(3.7*x);
}

void main() {
    double x = 0;
    foreach(_; 0..100_000_000)
        x = g(x);

    writeln(x);
}

He found different results for Clojure and for (non-JVM) languages, like C++, Lua or Python, which all returned the same value.

So I tested this code using D, and found yet another result. I agree with comments in the original thread that after 100M iterations what I see is mostly numerical noise (if that's not true, please enlighten me!), so I'm not otherwise stressed by this.

Now, what I found more confusing is that, compiling with DMD or LDC, I got different results. Since Phobos code defining sin and cos in std.math and core.stdc.math is the same for DMD and LDC (duh!), I guess that means different intrinsics are used?

February 08, 2014
Philippe Sigaud:

> Now, what I found more confusing is that, compiling with DMD or LDC, I got different results. Since Phobos code defining sin and cos in std.math and core.stdc.math is the same for DMD and LDC (duh!), I guess that means different intrinsics are used?

LDC2 optimizes this code even worse than DMD.

I opened a related thread:
http://forum.dlang.org/thread/rrryhcuqdffownpmlaen@forum.dlang.org

--------------------------

import core.stdc.stdio: printf;
import std.math: sin, cos;

double g(in double x) pure nothrow {
    return sin(2.3 * x) + cos(3.7 * x);
}

void main() {
    double x = 0;
    foreach (immutable _; 0 .. 100_000_000)
        x = x.g;

    printf("%f\n", x);
}



/*
-O -release -inline -noboundscheck

DMD:

_D4test1gFNaNbxdZd  comdat
        fld qword ptr 4[ESP]
        fmul    qword ptr FLAT:_DATA[00h]
        fsin
        fld qword ptr 4[ESP]
        fmul    qword ptr FLAT:_DATA[08h]
        fcos
        faddp   ST(1),ST
        ret 8

__Dmain comdat
L0:     sub ESP,0Ch
        xor EAX,EAX
        mov dword ptr 4[ESP],0
        mov dword ptr 8[ESP],0
L15:    fld qword ptr 4[ESP]
        inc EAX
        cmp EAX,05F5E100h
        fmul    qword ptr FLAT:_DATA[00h]
        fsin
        fld qword ptr 4[ESP]
        fmul    qword ptr FLAT:_DATA[08h]
        fcos
        faddp   ST(1),ST
        fstp    qword ptr 4[ESP]
        jb  L15
        push    dword ptr 8[ESP]
        mov EAX,offset FLAT:_DATA[010h]
        push    dword ptr 8[ESP]
        push    EAX
        call    near ptr _printf
        add ESP,0Ch
        add ESP,0Ch
        xor EAX,EAX
        ret

// -------------------------

LDC2:

__D4test1gFNaNbxdZd:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-8, %esp
    subl    $56, %esp
    movsd   LCPI0_0, %xmm0
    mulsd   8(%ebp), %xmm0
    movsd   %xmm0, 40(%esp)
    fldl    40(%esp)
    fstpt   (%esp)
    calll   __D3std4math3sinFNaNbNfeZe
    subl    $12, %esp
    fstpt   12(%esp)
    movsd   8(%ebp), %xmm0
    mulsd   LCPI0_1, %xmm0
    movsd   %xmm0, 48(%esp)
    fldl    48(%esp)
    fstpt   (%esp)
    calll   __D3std4math3cosFNaNbNfeZe
    subl    $12, %esp
    fldt    12(%esp)
    faddp   %st(1)
    fstpl   32(%esp)
    movsd   32(%esp), %xmm0
    movsd   %xmm0, 24(%esp)
    fldl    24(%esp)
    movl    %ebp, %esp
    popl    %ebp
    ret $8

__Dmain:
    pushl   %ebp
    movl    %esp, %ebp
    pushl   %esi
    andl    $-8, %esp
    subl    $72, %esp
    xorps   %xmm0, %xmm0
    movl    $100000000, %esi
    .align  16, 0x90
LBB1_1:
    movsd   %xmm0, 16(%esp)
    mulsd   LCPI1_0, %xmm0
    movsd   %xmm0, 48(%esp)
    fldl    48(%esp)
    fstpt   (%esp)
    calll   __D3std4math3sinFNaNbNfeZe
    subl    $12, %esp
    fstpt   28(%esp)
    movsd   16(%esp), %xmm0
    mulsd   LCPI1_1, %xmm0
    movsd   %xmm0, 56(%esp)
    fldl    56(%esp)
    fstpt   (%esp)
    calll   __D3std4math3cosFNaNbNfeZe
    subl    $12, %esp
    fldt    28(%esp)
    faddp   %st(1)
    fstpl   40(%esp)
    movsd   40(%esp), %xmm0
    decl    %esi
    jne LBB1_1
    movsd   %xmm0, 4(%esp)
    movl    $_.str, (%esp)
    calll   ___mingw_printf
    xorl    %eax, %eax
    leal    -4(%ebp), %esp
    popl    %esi
    popl    %ebp
    ret

--------------------------

import core.stdc.stdio: printf;
import core.stdc.math: sin, cos;

double g(in double x) pure nothrow {
    return sin(2.3 * x) + cos(3.7 * x);
}

void main() {
    double x = 0;
    foreach (immutable _; 0 .. 100_000_000)
        x = x.g;

    printf("%f\n", x);
}



/*
-O -release -inline -noboundscheck

LDC2:

__D5test21gFNaNbxdZd:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-8, %esp
    subl    $40, %esp
    movsd   LCPI0_0, %xmm0
    mulsd   8(%ebp), %xmm0
    movsd   %xmm0, (%esp)
    calll   _sin
    fstpl   32(%esp)
    movsd   32(%esp), %xmm0
    movsd   %xmm0, 8(%esp)
    movsd   8(%ebp), %xmm0
    mulsd   LCPI0_1, %xmm0
    movsd   %xmm0, (%esp)
    calll   _cos
    fstpl   24(%esp)
    movsd   8(%esp), %xmm0
    addsd   24(%esp), %xmm0
    movsd   %xmm0, 16(%esp)
    fldl    16(%esp)
    movl    %ebp, %esp
    popl    %ebp
    ret $8

__Dmain:
    pushl   %ebp
    movl    %esp, %ebp
    pushl   %esi
    andl    $-8, %esp
    subl    $56, %esp
    xorps   %xmm0, %xmm0
    movl    $100000000, %esi
    .align  16, 0x90
LBB1_1:
    movsd   %xmm0, 16(%esp)
    mulsd   LCPI1_0, %xmm0
    movsd   %xmm0, (%esp)
    calll   _sin
    fstpl   40(%esp)
    movsd   40(%esp), %xmm0
    movsd   %xmm0, 24(%esp)
    movsd   16(%esp), %xmm0
    mulsd   LCPI1_1, %xmm0
    movsd   %xmm0, (%esp)
    calll   _cos
    fstpl   32(%esp)
    movsd   24(%esp), %xmm0
    addsd   32(%esp), %xmm0
    decl    %esi
    jne LBB1_1
    movsd   %xmm0, 4(%esp)
    movl    $_.str, (%esp)
    calll   ___mingw_printf
    xorl    %eax, %eax
    leal    -4(%ebp), %esp
    popl    %esi
    popl    %ebp
    ret

--------------------------

import core.stdc.stdio: printf;

version(LDC) {
    import ldc.intrinsics;

    double g(in double x) pure nothrow {
        return llvm_sin(2.3 * x) + llvm_cos(3.7 * x);
    }
}

void main() {
    double x = 0;
    foreach (immutable _; 0 .. 100_000_000)
        x = x.g;

    printf("%f\n", x);
}



/*
-O -release -inline -noboundscheck

LDC2:

__D5test31gFNaNbxdZd:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-8, %esp
    subl    $40, %esp
    movsd   LCPI0_0, %xmm0
    mulsd   8(%ebp), %xmm0
    movsd   %xmm0, (%esp)
    calll   _sin
    fstpl   24(%esp)
    movsd   24(%esp), %xmm0
    movsd   %xmm0, 8(%esp)
    movsd   8(%ebp), %xmm0
    mulsd   LCPI0_1, %xmm0
    movsd   %xmm0, (%esp)
    calll   _cos
    fstpl   16(%esp)
    movsd   8(%esp), %xmm0
    addsd   16(%esp), %xmm0
    movsd   %xmm0, 32(%esp)
    fldl    32(%esp)
    movl    %ebp, %esp
    popl    %ebp
    ret $8

__Dmain:
    pushl   %ebp
    movl    %esp, %ebp
    pushl   %esi
    andl    $-8, %esp
    subl    $56, %esp
    xorps   %xmm0, %xmm0
    movl    $100000000, %esi
    .align  16, 0x90
LBB1_1:
    movsd   %xmm0, 16(%esp)
    mulsd   LCPI1_0, %xmm0
    movsd   %xmm0, (%esp)
    calll   _sin
    fstpl   40(%esp)
    movsd   40(%esp), %xmm0
    movsd   %xmm0, 24(%esp)
    movsd   16(%esp), %xmm0
    mulsd   LCPI1_1, %xmm0
    movsd   %xmm0, (%esp)
    calll   _cos
    fstpl   32(%esp)
    movsd   24(%esp), %xmm0
    addsd   32(%esp), %xmm0
    decl    %esi
    jne LBB1_1
    movsd   %xmm0, 4(%esp)
    movl    $_.str, (%esp)
    calll   ___mingw_printf
    xorl    %eax, %eax
    leal    -4(%ebp), %esp
    popl    %esi
    popl    %ebp
    ret

--------------------------

// C99 code
#include <stdio.h>
#include <math.h>

double g(const double x) {
    return sin(2.3 * x) + cos(3.7 * x);
}

int main() {
    double x = 0;
    for (int i = 0; i < 100000000; i++)
        x = g(x);

    printf("%f\n", x);
    return 0;
}


/*
gcc -fkeep-inline-functions -std=c99 -flto -S -Ofast test4.c -o test4.s

_g:
    fldl    4(%esp)
    fldl    LC0
    fmul    %st(1), %st
    fsin
    fxch    %st(1)
    fmull   LC1
    fcos
    faddp   %st, %st(1)
    ret

_main:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-16, %esp
    subl    $16, %esp
    call    ___main
    movl    $100000000, %eax
    fld1
    fldz
    fldl    LC0
    fxch    %st(2)
    jmp L19
    .p2align 4,,7
L22:
    fld %st(0)
    fmul    %st(2), %st
    fsin
    fxch    %st(1)
    fmull   LC1
    fcos
L19:
    subl    $1, %eax
    faddp   %st, %st(1)
    jne L22
    fstp    %st(1)
    fstpl   4(%esp)
    movl    $LC5, (%esp)
    call    _printf
    xorl    %eax, %eax
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret

Bye,
bearophile
February 08, 2014
> LDC2 optimizes this code even worse than DMD.

I naively thought that optimizations did not change computation results.

I guess it's no different from C: same ocode, same computer, but different compiler => different results.

Oh well...
February 08, 2014
"Philippe Sigaud"  wrote in message news:mailman.70.1391874989.21734.digitalmars-d@puremagic.com...

> I naively thought that optimizations did not change computation results.
>
> I guess it's no different from C: same ocode, same computer, but
> different compiler => different results.
>
> Oh well...

Floating point sucks like that. 

February 08, 2014
On 2/8/2014 6:13 AM, bearophile wrote:
> Philippe Sigaud:
>
>> Now, what I found more confusing is that, compiling with DMD or LDC, I got
>> different results. Since Phobos code defining sin and cos in std.math and
>> core.stdc.math is the same for DMD and LDC (duh!), I guess that means
>> different intrinsics are used?
>
> LDC2 optimizes this code even worse than DMD.

Even worse? Mind telling me what is bad about this code?

        fld qword ptr 4[ESP]
        fmul    qword ptr FLAT:_DATA[00h]
        fsin
        fld qword ptr 4[ESP]
        fmul    qword ptr FLAT:_DATA[08h]
        fcos
        faddp   ST(1),ST
        ret 8

BTW, the differences in results is not due to optimization, but to dmd keeping intermediate results to 80 bits of precision, while other compilers are doing 64 bit precision on intermediate results.
February 09, 2014
> Floating point sucks like that.

Looks like a mail signature :-)
February 09, 2014
> BTW, the differences in results is not due to optimization, but to dmd keeping intermediate results to 80 bits of precision, while other compilers are doing 64 bit precision on intermediate results.

OK, well noted. It also seems many languages silently use the same C library to power their math calculation, hence the same results.