Bug in asm version of std.math.poly since 0.143

Feb 04, 2006

Feb 06, 2006

The following program, as extracted from phobos, outputs: $ ./asmbug The result should be 215.130000 The C version outputs 215.130000 The asm version outputs 56.100000 I don't know enough asm to solve this. Test program: import std.stdio; real poly_asm(real x, real[] A) in { assert(A.length > 0); } body { version (D_InlineAsm_X86) { asm // assembler by W. Bright { // EDX = (A.length - 1) * real.sizeof mov ECX,A[EBP] ; // ECX = A.length dec ECX ; lea EDX,[ECX][ECX*8] ; add EDX,ECX ; add EDX,A+4[EBP] ; fld real ptr [EDX] ; // ST0 = coeff[ECX] jecxz return_ST ; fld x[EBP] ; // ST0 = x fxch ST(1) ; // ST1 = x, ST0 = r align 4 ; L2: fmul ST,ST(1) ; // r *= x fld real ptr -10[EDX] ; sub EDX,10 ; // deg-- faddp ST(1),ST ; dec ECX ; jne L2 ; fxch ST(1) ; // ST1 = r, ST0 = x fstp ST(0) ; // dump x align 4 ; return_ST: ; ; } } else { writefl("Sorry, you don't seem to have InlineAsm_X86"); return 0; } } real poly_c(real x, real[] A) in { assert(A.length > 0); } body { int i = A.length - 1; real r = A[i]; while (--i >= 0) { r *= x; r += A[i]; } return r; } int main() { real x = 3.1; static real pp[] = [56.1, 32.7, 6]; writefln("The result should be %f",(56.1L + (32.7L + 6L * x) * x)); writefln("The C version outputs %f", poly_c(x, pp)); writefln("The asm version outputs %f", poly_asm(x, pp)); return 0; }

February 06, 2006

Re: Bug in asm version of std.math.poly since 0.143

Posted by Don Clugston
in reply to Grzegorz Adam Hankiewicz

Permalink

Don Clugston

Posted in reply to Grzegorz Adam Hankiewicz

Permalink

The code works correctly on DMD-Windows 0.145.
I think the problem is the extra alignment for reals  on Linux (12 bytes) compared to Windows (10 bytes). Just a guess, but I think that this line:
>                 sub     EDX,10              ; // deg--
should be
>                 sub     EDX,12              ; // deg--
on Linux.
This also needs to be changed:
>                 lea     EDX,[ECX][ECX*8]    ;
>                 add     EDX,ECX             ;
to
>                 lea     EDX,[ECX][ECX*4]    ;
>                 add     EDX,ECX             ;
                  add     EDX,EDX
which isn't optimal, but see if it works.

If that doesn't work, try changing the line
>                 fld     real ptr -10[EDX]   ;
to
>                 fld     real ptr -12[EDX]   ;


Grzegorz Adam Hankiewicz wrote:
> The following program, as extracted from phobos, outputs:
> 
>     $ ./asmbug
>     The result should be 215.130000
>     The C version outputs 215.130000
>     The asm version outputs 56.100000
> 
> I don't know enough asm to solve this. Test program:
> 
>     import std.stdio;
>         real poly_asm(real x, real[] A)
>     in
>     {
>         assert(A.length > 0);
>     }
>     body
>     {
>         version (D_InlineAsm_X86)
>         {
>             asm     // assembler by W. Bright
>             {
>                 // EDX = (A.length - 1) * real.sizeof
>                 mov     ECX,A[EBP]          ; // ECX = A.length
>                 dec     ECX                 ;
>                 lea     EDX,[ECX][ECX*8]    ;
>                 add     EDX,ECX             ;
>                 add     EDX,A+4[EBP]        ;
>                 fld     real ptr [EDX]      ; // ST0 = coeff[ECX]
>                 jecxz   return_ST           ;
>                 fld     x[EBP]              ; // ST0 = x
>                 fxch    ST(1)               ; // ST1 = x, ST0 = r
>                 align   4                   ;
>         L2:     fmul    ST,ST(1)            ; // r *= x
>                 fld     real ptr -10[EDX]   ;
>                 sub     EDX,10              ; // deg--
>                 faddp   ST(1),ST            ;
>                 dec     ECX                 ;
>                 jne     L2                  ;
>                 fxch    ST(1)               ; // ST1 = r, ST0 = x
>                 fstp    ST(0)               ; // dump x
>                 align   4                   ;
>         return_ST:                          ;
>                 ;
>             }
>         }
>         else
>         {
>             writefl("Sorry, you don't seem to have InlineAsm_X86");
>             return 0;
>         }
>     }
>         real poly_c(real x, real[] A)
>     in
>     {
>         assert(A.length > 0);
>     }
>     body
>     {
>         int i = A.length - 1;
>         real r = A[i];
>         while (--i >= 0)
>         {
>             r *= x;
>             r += A[i];
>         }
>         return r;
>     }
>         int main()
>     {
>         real x = 3.1;
>         static real pp[] = [56.1, 32.7, 6];
>             writefln("The result should be %f",(56.1L + (32.7L + 6L * x) * x));
>         writefln("The C version outputs %f", poly_c(x, pp));
>         writefln("The asm version outputs %f", poly_asm(x, pp));
>                 return 0;
>     }
> 
>

The Mon, 06 Feb 2006 09:28:33 +0100, Don Clugston wrote: > The code works correctly on DMD-Windows 0.145. I think the problem > is the extra alignment for reals on Linux (12 bytes) compared to > Windows (10 bytes). Just a guess, but I think that this line: > > sub EDX,10 ; // deg-- > should be > > sub EDX,12 ; // deg-- > on Linux. > This also needs to be changed: > > lea EDX,[ECX][ECX*8] ; > > add EDX,ECX ; > to > > lea EDX,[ECX][ECX*4] ; > > add EDX,ECX ; > add EDX,EDX > > If that doesn't work, try changing the line > > fld real ptr -10[EDX] ; > to > > fld real ptr -12[EDX] ; Unfortunately none of the proposed changes or their combinations do work. I either get wrong results or nans.

Forums