Jump to page: 1 2
Thread overview
Benchmarking sigmoid function between C and D
Apr 07, 2018
Daniel Kozak
Apr 07, 2018
Daniel N
Apr 07, 2018
Daniel Kozak
Apr 07, 2018
Guillaume Piolat
Apr 09, 2018
Daniel Kozak
Apr 09, 2018
kinke
Apr 07, 2018
kinke
Apr 08, 2018
kinke
April 07, 2018
What am I doing wrong here that makes the D equivalent 2.5 times slower than it's C equivalent?

Compilers used:

LDC2: LDC - the LLVM D compiler (1.8.0)
GCC: gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609

11:36:39 ~/code/c/test2$ ldc2 sigmoid.d -O5 && ./sigmoid
Max deviation is 0.001664
10^7 iterations using sigmoid1: 308 ms
10^7 iterations using sigmoid2: 30 ms
11:36:55 ~/code/c/test2
$ gcc sigmoid.c -o sigmoid-c -O3 -lm 2>/dev/null && ./sigmoid-c
Max deviation is 0.001664
10^7 iterations using sigmoid1: 134 ms
10^7 iterations using sigmoid2: 29 ms
11:37:10 ~/code/c/test2
$

C code, taken from https://stackoverflow.com/questions/412019/math-optimization-in-c-sharp#412176:

```
#include <math.h>
#include <stdio.h>
#include <time.h>

#define SCALE 320.0f
#define RESOLUTION 2047
#define MIN -RESOLUTION / SCALE
#define MAX RESOLUTION / SCALE

static float sigmoid_lut[RESOLUTION + 1];

void init_sigmoid_lut(void) {
    int i;
    for (i = 0; i < RESOLUTION + 1; i++) {
        sigmoid_lut[i] =  (1.0 / (1.0 + exp(-i / SCALE)));
    }
}

static float sigmoid1(const float value) {
    return (1.0f / (1.0f + expf(-value)));
}

static float sigmoid2(const float value) {
    if (value <= MIN) return 0.0f;
    if (value >= MAX) return 1.0f;
    if (value >= 0) return sigmoid_lut[(int)(value * SCALE + 0.5f)];
    return 1.0f-sigmoid_lut[(int)(-value * SCALE + 0.5f)];
}

float test_error() {
    float x;
    float emax = 0.0;

    for (x = -10.0f; x < 10.0f; x+=0.00001f) {
        float v0 = sigmoid1(x);
        float v1 = sigmoid2(x);
        float error = fabsf(v1 - v0);
        if (error > emax) { emax = error; }
    }
    return emax;
}

int sigmoid1_perf() {
    clock_t t0, t1;
    int i;
    float x, y = 0.0f;

    t0 = clock();
    for (i = 0; i < 10; i++) {
        for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
            y = sigmoid1(x);
        }
    }
    t1 = clock();
    printf("", y); /* To avoid sigmoidX() calls being optimized away */
    return (t1 - t0) / (CLOCKS_PER_SEC / 1000);
}

int sigmoid2_perf() {
    clock_t t0, t1;
    int i;
    float x, y = 0.0f;
    t0 = clock();
    for (i = 0; i < 10; i++) {
        for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
            y = sigmoid2(x);
        }
    }
    t1 = clock();
    printf("", y); /* To avoid sigmoidX() calls being optimized away */
    return (t1 - t0) / (CLOCKS_PER_SEC / 1000);
}

int main(void) {
    init_sigmoid_lut();
    printf("Max deviation is %0.6f\n", test_error());
    printf("10^7 iterations using sigmoid1: %d ms\n", sigmoid1_perf());
    printf("10^7 iterations using sigmoid2: %d ms\n", sigmoid2_perf());

    return 0;
}
```

D equivalent:

```
module sigmoid;

import std.stdio;
import std.math;
import std.datetime.stopwatch;

enum SCALE = 320.0f;
enum RESOLUTION = 2047;
enum MIN = -RESOLUTION / SCALE;
enum MAX = RESOLUTION / SCALE;

float[RESOLUTION + 1] sigmoid_lut;

void init_sigmoid_lut() {
    int i;
    for (i = 0; i < RESOLUTION + 1; i++) {
        sigmoid_lut[i] =  (1.0 / (1.0 + exp(-i / SCALE)));
    }
}

private float sigmoid1(const float value) {
    return (1.0f / (1.0f + exp(-value)));
}

private float sigmoid2(const float value) {
    if (value <= MIN) return 0.0f;
    if (value >= MAX) return 1.0f;
    if (value >= 0) return sigmoid_lut[cast(int)(value * SCALE + 0.5f)];
    return 1.0f-sigmoid_lut[cast(int)(-value * SCALE + 0.5f)];
}

private float test_error() {
    float x;
    float emax = 0.0;

    for (x = -10.0f; x < 10.0f; x+=0.00001f) {
        float v0 = sigmoid1(x);
        float v1 = sigmoid2(x);
        float error = fabs(v1 - v0);
        if (error > emax) { emax = error; }
    }
    return emax;
}

private auto sigmoid1_perf() {
    auto sw = StopWatch(AutoStart.yes);
    int i;
    float x, y = 0.0f;

    for (i = 0; i < 10; i++) {
        for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
            y = sigmoid1(x);
        }
    }
    return sw.peek.total!"msecs";
}

private auto sigmoid2_perf() {
    auto sw = StopWatch(AutoStart.yes);
    int i;
    float x, y = 0.0f;
    for (i = 0; i < 10; i++) {
        for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
            y = sigmoid2(x);
        }
    }
    return sw.peek.total!"msecs";
}

int main() {
    init_sigmoid_lut();
    writefln("Max deviation is %0.6f", test_error());
    writefln("10^7 iterations using sigmoid1: %s ms", sigmoid1_perf());
    writefln("10^7 iterations using sigmoid2: %s ms", sigmoid2_perf());

    return 0;
}
```
April 07, 2018
can you try it with c math functions?

instead of std.math, try to use core.stdc.math

On Sat, Apr 7, 2018 at 8:53 PM, Arun Chandrasekaran via Digitalmars-d-learn <digitalmars-d-learn@puremagic.com> wrote:

> What am I doing wrong here that makes the D equivalent 2.5 times slower than it's C equivalent?
>
> Compilers used:
>
> LDC2: LDC - the LLVM D compiler (1.8.0)
> GCC: gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
>
> 11:36:39 ~/code/c/test2$ ldc2 sigmoid.d -O5 && ./sigmoid
> Max deviation is 0.001664
> 10^7 iterations using sigmoid1: 308 ms
> 10^7 iterations using sigmoid2: 30 ms
> 11:36:55 ~/code/c/test2
> $ gcc sigmoid.c -o sigmoid-c -O3 -lm 2>/dev/null && ./sigmoid-c
> Max deviation is 0.001664
> 10^7 iterations using sigmoid1: 134 ms
> 10^7 iterations using sigmoid2: 29 ms
> 11:37:10 ~/code/c/test2
> $
>
> C code, taken from https://stackoverflow.com/ques tions/412019/math-optimization-in-c-sharp#412176:
>
> ```
> #include <math.h>
> #include <stdio.h>
> #include <time.h>
>
> #define SCALE 320.0f
> #define RESOLUTION 2047
> #define MIN -RESOLUTION / SCALE
> #define MAX RESOLUTION / SCALE
>
> static float sigmoid_lut[RESOLUTION + 1];
>
> void init_sigmoid_lut(void) {
>     int i;
>     for (i = 0; i < RESOLUTION + 1; i++) {
>         sigmoid_lut[i] =  (1.0 / (1.0 + exp(-i / SCALE)));
>     }
> }
>
> static float sigmoid1(const float value) {
>     return (1.0f / (1.0f + expf(-value)));
> }
>
> static float sigmoid2(const float value) {
>     if (value <= MIN) return 0.0f;
>     if (value >= MAX) return 1.0f;
>     if (value >= 0) return sigmoid_lut[(int)(value * SCALE + 0.5f)];
>     return 1.0f-sigmoid_lut[(int)(-value * SCALE + 0.5f)];
> }
>
> float test_error() {
>     float x;
>     float emax = 0.0;
>
>     for (x = -10.0f; x < 10.0f; x+=0.00001f) {
>         float v0 = sigmoid1(x);
>         float v1 = sigmoid2(x);
>         float error = fabsf(v1 - v0);
>         if (error > emax) { emax = error; }
>     }
>     return emax;
> }
>
> int sigmoid1_perf() {
>     clock_t t0, t1;
>     int i;
>     float x, y = 0.0f;
>
>     t0 = clock();
>     for (i = 0; i < 10; i++) {
>         for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
>             y = sigmoid1(x);
>         }
>     }
>     t1 = clock();
>     printf("", y); /* To avoid sigmoidX() calls being optimized away */
>     return (t1 - t0) / (CLOCKS_PER_SEC / 1000);
> }
>
> int sigmoid2_perf() {
>     clock_t t0, t1;
>     int i;
>     float x, y = 0.0f;
>     t0 = clock();
>     for (i = 0; i < 10; i++) {
>         for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
>             y = sigmoid2(x);
>         }
>     }
>     t1 = clock();
>     printf("", y); /* To avoid sigmoidX() calls being optimized away */
>     return (t1 - t0) / (CLOCKS_PER_SEC / 1000);
> }
>
> int main(void) {
>     init_sigmoid_lut();
>     printf("Max deviation is %0.6f\n", test_error());
>     printf("10^7 iterations using sigmoid1: %d ms\n", sigmoid1_perf());
>     printf("10^7 iterations using sigmoid2: %d ms\n", sigmoid2_perf());
>
>     return 0;
> }
> ```
>
> D equivalent:
>
> ```
> module sigmoid;
>
> import std.stdio;
> import std.math;
> import std.datetime.stopwatch;
>
> enum SCALE = 320.0f;
> enum RESOLUTION = 2047;
> enum MIN = -RESOLUTION / SCALE;
> enum MAX = RESOLUTION / SCALE;
>
> float[RESOLUTION + 1] sigmoid_lut;
>
> void init_sigmoid_lut() {
>     int i;
>     for (i = 0; i < RESOLUTION + 1; i++) {
>         sigmoid_lut[i] =  (1.0 / (1.0 + exp(-i / SCALE)));
>     }
> }
>
> private float sigmoid1(const float value) {
>     return (1.0f / (1.0f + exp(-value)));
> }
>
> private float sigmoid2(const float value) {
>     if (value <= MIN) return 0.0f;
>     if (value >= MAX) return 1.0f;
>     if (value >= 0) return sigmoid_lut[cast(int)(value * SCALE + 0.5f)];
>     return 1.0f-sigmoid_lut[cast(int)(-value * SCALE + 0.5f)];
> }
>
> private float test_error() {
>     float x;
>     float emax = 0.0;
>
>     for (x = -10.0f; x < 10.0f; x+=0.00001f) {
>         float v0 = sigmoid1(x);
>         float v1 = sigmoid2(x);
>         float error = fabs(v1 - v0);
>         if (error > emax) { emax = error; }
>     }
>     return emax;
> }
>
> private auto sigmoid1_perf() {
>     auto sw = StopWatch(AutoStart.yes);
>     int i;
>     float x, y = 0.0f;
>
>     for (i = 0; i < 10; i++) {
>         for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
>             y = sigmoid1(x);
>         }
>     }
>     return sw.peek.total!"msecs";
> }
>
> private auto sigmoid2_perf() {
>     auto sw = StopWatch(AutoStart.yes);
>     int i;
>     float x, y = 0.0f;
>     for (i = 0; i < 10; i++) {
>         for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
>             y = sigmoid2(x);
>         }
>     }
>     return sw.peek.total!"msecs";
> }
>
> int main() {
>     init_sigmoid_lut();
>     writefln("Max deviation is %0.6f", test_error());
>     writefln("10^7 iterations using sigmoid1: %s ms", sigmoid1_perf());
>     writefln("10^7 iterations using sigmoid2: %s ms", sigmoid2_perf());
>
>     return 0;
> }
> ```
>


April 07, 2018
On Saturday, 7 April 2018 at 18:53:57 UTC, Arun Chandrasekaran wrote:
> What am I doing wrong here that makes the D equivalent 2.5 times slower than it's C equivalent?
>
> Compilers used:
>
> LDC2: LDC - the LLVM D compiler (1.8.0)
> GCC: gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
>
> 11:36:39 ~/code/c/test2$ ldc2 sigmoid.d -O5 && ./sigmoid

When benchmarking against C you need to add more switches to ldc2, like:
-release -boundscheck=off

April 07, 2018
or for ldc http://docs.algorithm.dlang.io/latest/mir_math_common.html

On Sat, Apr 7, 2018 at 9:10 PM, Daniel Kozak <kozzi11@gmail.com> wrote:

> can you try it with c math functions?
>
> instead of std.math, try to use core.stdc.math
>
> On Sat, Apr 7, 2018 at 8:53 PM, Arun Chandrasekaran via Digitalmars-d-learn <digitalmars-d-learn@puremagic.com> wrote:
>
>> What am I doing wrong here that makes the D equivalent 2.5 times slower than it's C equivalent?
>>
>> Compilers used:
>>
>> LDC2: LDC - the LLVM D compiler (1.8.0)
>> GCC: gcc (Ubuntu 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
>>
>> 11:36:39 ~/code/c/test2$ ldc2 sigmoid.d -O5 && ./sigmoid
>> Max deviation is 0.001664
>> 10^7 iterations using sigmoid1: 308 ms
>> 10^7 iterations using sigmoid2: 30 ms
>> 11:36:55 ~/code/c/test2
>> $ gcc sigmoid.c -o sigmoid-c -O3 -lm 2>/dev/null && ./sigmoid-c
>> Max deviation is 0.001664
>> 10^7 iterations using sigmoid1: 134 ms
>> 10^7 iterations using sigmoid2: 29 ms
>> 11:37:10 ~/code/c/test2
>> $
>>
>> C code, taken from https://stackoverflow.com/ques tions/412019/math-optimization-in-c-sharp#412176:
>>
>> ```
>> #include <math.h>
>> #include <stdio.h>
>> #include <time.h>
>>
>> #define SCALE 320.0f
>> #define RESOLUTION 2047
>> #define MIN -RESOLUTION / SCALE
>> #define MAX RESOLUTION / SCALE
>>
>> static float sigmoid_lut[RESOLUTION + 1];
>>
>> void init_sigmoid_lut(void) {
>>     int i;
>>     for (i = 0; i < RESOLUTION + 1; i++) {
>>         sigmoid_lut[i] =  (1.0 / (1.0 + exp(-i / SCALE)));
>>     }
>> }
>>
>> static float sigmoid1(const float value) {
>>     return (1.0f / (1.0f + expf(-value)));
>> }
>>
>> static float sigmoid2(const float value) {
>>     if (value <= MIN) return 0.0f;
>>     if (value >= MAX) return 1.0f;
>>     if (value >= 0) return sigmoid_lut[(int)(value * SCALE + 0.5f)];
>>     return 1.0f-sigmoid_lut[(int)(-value * SCALE + 0.5f)];
>> }
>>
>> float test_error() {
>>     float x;
>>     float emax = 0.0;
>>
>>     for (x = -10.0f; x < 10.0f; x+=0.00001f) {
>>         float v0 = sigmoid1(x);
>>         float v1 = sigmoid2(x);
>>         float error = fabsf(v1 - v0);
>>         if (error > emax) { emax = error; }
>>     }
>>     return emax;
>> }
>>
>> int sigmoid1_perf() {
>>     clock_t t0, t1;
>>     int i;
>>     float x, y = 0.0f;
>>
>>     t0 = clock();
>>     for (i = 0; i < 10; i++) {
>>         for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
>>             y = sigmoid1(x);
>>         }
>>     }
>>     t1 = clock();
>>     printf("", y); /* To avoid sigmoidX() calls being optimized away */
>>     return (t1 - t0) / (CLOCKS_PER_SEC / 1000);
>> }
>>
>> int sigmoid2_perf() {
>>     clock_t t0, t1;
>>     int i;
>>     float x, y = 0.0f;
>>     t0 = clock();
>>     for (i = 0; i < 10; i++) {
>>         for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
>>             y = sigmoid2(x);
>>         }
>>     }
>>     t1 = clock();
>>     printf("", y); /* To avoid sigmoidX() calls being optimized away */
>>     return (t1 - t0) / (CLOCKS_PER_SEC / 1000);
>> }
>>
>> int main(void) {
>>     init_sigmoid_lut();
>>     printf("Max deviation is %0.6f\n", test_error());
>>     printf("10^7 iterations using sigmoid1: %d ms\n", sigmoid1_perf());
>>     printf("10^7 iterations using sigmoid2: %d ms\n", sigmoid2_perf());
>>
>>     return 0;
>> }
>> ```
>>
>> D equivalent:
>>
>> ```
>> module sigmoid;
>>
>> import std.stdio;
>> import std.math;
>> import std.datetime.stopwatch;
>>
>> enum SCALE = 320.0f;
>> enum RESOLUTION = 2047;
>> enum MIN = -RESOLUTION / SCALE;
>> enum MAX = RESOLUTION / SCALE;
>>
>> float[RESOLUTION + 1] sigmoid_lut;
>>
>> void init_sigmoid_lut() {
>>     int i;
>>     for (i = 0; i < RESOLUTION + 1; i++) {
>>         sigmoid_lut[i] =  (1.0 / (1.0 + exp(-i / SCALE)));
>>     }
>> }
>>
>> private float sigmoid1(const float value) {
>>     return (1.0f / (1.0f + exp(-value)));
>> }
>>
>> private float sigmoid2(const float value) {
>>     if (value <= MIN) return 0.0f;
>>     if (value >= MAX) return 1.0f;
>>     if (value >= 0) return sigmoid_lut[cast(int)(value * SCALE + 0.5f)];
>>     return 1.0f-sigmoid_lut[cast(int)(-value * SCALE + 0.5f)];
>> }
>>
>> private float test_error() {
>>     float x;
>>     float emax = 0.0;
>>
>>     for (x = -10.0f; x < 10.0f; x+=0.00001f) {
>>         float v0 = sigmoid1(x);
>>         float v1 = sigmoid2(x);
>>         float error = fabs(v1 - v0);
>>         if (error > emax) { emax = error; }
>>     }
>>     return emax;
>> }
>>
>> private auto sigmoid1_perf() {
>>     auto sw = StopWatch(AutoStart.yes);
>>     int i;
>>     float x, y = 0.0f;
>>
>>     for (i = 0; i < 10; i++) {
>>         for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
>>             y = sigmoid1(x);
>>         }
>>     }
>>     return sw.peek.total!"msecs";
>> }
>>
>> private auto sigmoid2_perf() {
>>     auto sw = StopWatch(AutoStart.yes);
>>     int i;
>>     float x, y = 0.0f;
>>     for (i = 0; i < 10; i++) {
>>         for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
>>             y = sigmoid2(x);
>>         }
>>     }
>>     return sw.peek.total!"msecs";
>> }
>>
>> int main() {
>>     init_sigmoid_lut();
>>     writefln("Max deviation is %0.6f", test_error());
>>     writefln("10^7 iterations using sigmoid1: %s ms", sigmoid1_perf());
>>     writefln("10^7 iterations using sigmoid2: %s ms", sigmoid2_perf());
>>
>>     return 0;
>> }
>> ```
>>
>
>


April 07, 2018
On Saturday, 7 April 2018 at 19:14:27 UTC, Daniel Kozak wrote:
> or for ldc http://docs.algorithm.dlang.io/latest/mir_math_common.html
>
> On Sat, Apr 7, 2018 at 9:10 PM, Daniel Kozak <kozzi11@gmail.com> wrote:
>
>> can you try it with c math functions?
>>
>> instead of std.math, try to use core.stdc.math
>>
>> On Sat, Apr 7, 2018 at 8:53 PM, Arun Chandrasekaran via Digitalmars-d-learn <digitalmars-d-learn@puremagic.com> wrote:
>>
>>> [...]

Much better with mir.math.common, still a bit slower than C (even with larger loops):

10^7 iterations using sigmoid1: 168 ms
10^7 iterations using sigmoid2: 39 ms

Also LDC optimized away the computation. So I had to modify the code a bit.

```
private auto sigmoid1_perf() {
    auto sw = StopWatch(AutoStart.yes);
    int i;
    float x, y = 0.0f;

    for (i = 0; i < 10; i++) {
        for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
            y += sigmoid1(x);
        }
    }
    auto t = sw.peek.total!"msecs";
    return tuple(y, t);
}

private auto sigmoid2_perf() {
    auto sw = StopWatch(AutoStart.yes);
    int i;
    float x, y = 0.0f;
    for (i = 0; i < 10; i++) {
        for (x = -5.0f; x <= 5.0f; x+=0.00001f) {
            y += sigmoid2(x);
        }
    }
    auto t = sw.peek.total!"msecs";
    return tuple(y, t);
}
```
April 07, 2018
On Saturday, 7 April 2018 at 20:33:13 UTC, Arun Chandrasekaran wrote:
> On Saturday, 7 April 2018 at 19:14:27 UTC, Daniel Kozak wrote:
>> or for ldc http://docs.algorithm.dlang.io/latest/mir_math_common.html
>>
>> On Sat, Apr 7, 2018 at 9:10 PM, Daniel Kozak <kozzi11@gmail.com> wrote:
>>
>>> can you try it with c math functions?
>>>
>>> instead of std.math, try to use core.stdc.math
>>>
>>> On Sat, Apr 7, 2018 at 8:53 PM, Arun Chandrasekaran via Digitalmars-d-learn <digitalmars-d-learn@puremagic.com> wrote:
>>>
>>>> [...]
>
> Much better with mir.math.common, still a bit slower than C (even with larger loops):
>
> 10^7 iterations using sigmoid1: 168 ms
> 10^7 iterations using sigmoid2: 39 ms
>
> Also LDC optimized away the computation. So I had to modify the code a bit.
>


Have you tried LLVM intrinsics? say llvm_exp
April 07, 2018
On Saturday, 7 April 2018 at 20:33:13 UTC, Arun Chandrasekaran wrote:
> Much better with mir.math.common, still a bit slower than C (even with larger loops):

As this appears to be benchmarking mostly the std.math.exp(float) performance - some/many basic algos in std.math, incl. exp(), are currently using the x87 FPU for all 3 floating-point types, although there are treacherous float/double overloads, sacrificing performance in many cases.
See https://github.com/dlang/phobos/pull/6272#issuecomment-373967109 (and the later numbers for current Intel and GCC compilers) for a performance comparison of some std.math algos incl. exp(). For LDC, the double/float versions of the few worked-on algos in that PR were sped-up by an overall factor of 3 [but only by 1.46 for exp(float)].
April 08, 2018
On Saturday, 7 April 2018 at 23:48:36 UTC, kinke wrote:
> On Saturday, 7 April 2018 at 20:33:13 UTC, Arun Chandrasekaran wrote:
>> [...]
>
> As this appears to be benchmarking mostly the std.math.exp(float) performance - some/many basic algos in std.math, incl. exp(), are currently using the x87 FPU for all 3 floating-point types, although there are treacherous float/double overloads, sacrificing performance in many cases.
> See https://github.com/dlang/phobos/pull/6272#issuecomment-373967109 (and the later numbers for current Intel and GCC compilers) for a performance comparison of some std.math algos incl. exp(). For LDC, the double/float versions of the few worked-on algos in that PR were sped-up by an overall factor of 3 [but only by 1.46 for exp(float)].

Interesting to see this, thanks! Did you also generate the bar graph plot using D?
April 08, 2018
On Sunday, 8 April 2018 at 05:35:10 UTC, Arun Chandrasekaran wrote:
> Did you also generate the bar graph plot using D?

Heh nope, I used LibreOffice Calc for that.
April 09, 2018
I would say he has, becaue AFAIK mir.math.common using LLVM intrinsics

On Sat, Apr 7, 2018 at 11:53 PM, Guillaume Piolat via Digitalmars-d-learn < digitalmars-d-learn@puremagic.com> wrote:

> On Saturday, 7 April 2018 at 20:33:13 UTC, Arun Chandrasekaran wrote:
>
>> On Saturday, 7 April 2018 at 19:14:27 UTC, Daniel Kozak wrote:
>>
>>> or for ldc http://docs.algorithm.dlang.io/latest/mir_math_common.html
>>>
>>> On Sat, Apr 7, 2018 at 9:10 PM, Daniel Kozak <kozzi11@gmail.com> wrote:
>>>
>>> can you try it with c math functions?
>>>>
>>>> instead of std.math, try to use core.stdc.math
>>>>
>>>> On Sat, Apr 7, 2018 at 8:53 PM, Arun Chandrasekaran via Digitalmars-d-learn <digitalmars-d-learn@puremagic.com> wrote:
>>>>
>>>> [...]
>>>>>
>>>>
>> Much better with mir.math.common, still a bit slower than C (even with
>> larger loops):
>>
>> 10^7 iterations using sigmoid1: 168 ms
>> 10^7 iterations using sigmoid2: 39 ms
>>
>> Also LDC optimized away the computation. So I had to modify the code a bit.
>>
>>
>
> Have you tried LLVM intrinsics? say llvm_exp
>


« First   ‹ Prev
1 2