Thread overview
Issues using the in-line assembler
Apr 04, 2018
solidstate1991
Apr 04, 2018
solidstate1991
Apr 05, 2018
Basile B.
Apr 05, 2018
Basile B.
Apr 05, 2018
solidstate1991
Apr 05, 2018
solidstate1991
April 04, 2018
I have this code:
asm @nogc{
	movq		XMM0, xy;
	paddd		XMM0, sXY;	// xy + sXY
	movq		XMM3, xy0;
	psubd		XMM0, XMM3;	// xy + sXY - x0y0
	movq		XMM1, ac;
	movq		XMM2, bd;
	pmuludq		XMM1, XMM0;	// (ac * (xy + sXY - x0y0))
	psrlq		XMM1, 16;	// (ac * (xy + sXY - x0y0))>>16
	pmuludq		XMM2, XMM0; // (bd * (xy + sXY - x0y0))
	psrlq		XMM2, 16;	// (bd * (xy + sXY - x0y0))>>16
	paddq		XMM1, XMM2; // (bd * (xy + sXY - x0y0))>>16 * (ac * (xy + sXY - x0y0))>>16
	punpckldq	XMM3, XMM7;
	paddq		XMM1, XMM3;	// (bd * (xy + sXY - x0y0))>>16 * (ac * (xy + sXY - x0y0))>>16 + x0y0
	movups		XMM2, XMM1;	// Convert 64 bit vectors into 32 bit ones
	psrldq		XMM2, 4;
	por			XMM2, XMM1;	
	movq		result, XMM2;
}
I'm getting "bad type/size of operand 'movq'" error on xy0, ac, and bd when I try to compile it. All of the values are the type of int[2], xy is function parameter, sXY is created locally. How can I fix it?
April 04, 2018
I forgot to tell, that xy0 ac, and bd are local to the class.
April 05, 2018
On Wednesday, 4 April 2018 at 21:00:44 UTC, solidstate1991 wrote:
> I have this code:
> asm @nogc{
> 	movq		XMM0, xy;
> 	paddd		XMM0, sXY;	// xy + sXY
> 	movq		XMM3, xy0;
> 	psubd		XMM0, XMM3;	// xy + sXY - x0y0
> 	movq		XMM1, ac;
> 	movq		XMM2, bd;
> 	pmuludq		XMM1, XMM0;	// (ac * (xy + sXY - x0y0))
> 	psrlq		XMM1, 16;	// (ac * (xy + sXY - x0y0))>>16
> 	pmuludq		XMM2, XMM0; // (bd * (xy + sXY - x0y0))
> 	psrlq		XMM2, 16;	// (bd * (xy + sXY - x0y0))>>16
> 	paddq		XMM1, XMM2; // (bd * (xy + sXY - x0y0))>>16 * (ac * (xy + sXY - x0y0))>>16
> 	punpckldq	XMM3, XMM7;
> 	paddq		XMM1, XMM3;	// (bd * (xy + sXY - x0y0))>>16 * (ac * (xy + sXY - x0y0))>>16 + x0y0
> 	movups		XMM2, XMM1;	// Convert 64 bit vectors into 32 bit ones
> 	psrldq		XMM2, 4;
> 	por			XMM2, XMM1;	
> 	movq		result, XMM2;
> }
> I'm getting "bad type/size of operand 'movq'" error on xy0, ac, and bd when I try to compile it. All of the values are the type of int[2], xy is function parameter, sXY is created locally. How can I fix it?

The "this" seems to be in R11, so you have to apply the asm syntax for accessing the members using <Type>.offsetof.<member>[R11], example:

```
class Foo
{
    double a = 123456;
    extern(D) double foo()
    {
        asm
        {
            naked;
            movq    XMM0, Foo.a.offsetof[R11];
            ret;
        }
    }
}

void main()
{
    import std.stdio;
    (new Foo).foo(0,0).writeln;
}
```

However i cant find any specification saying that R11 is "this".
With a free function just pass the instance as param and replace R11 by the register where the instance is passed.
April 05, 2018
On Thursday, 5 April 2018 at 04:48:02 UTC, Basile B. wrote:
> On Wednesday, 4 April 2018 at 21:00:44 UTC, solidstate1991 wrote:
> void main()
> {
>     import std.stdio;
>     (new Foo).foo(0,0).writeln;
> }
> ```
>

Ah sorry, the params must be removed ((new Foo).foo().writeln;)...

I was actually trying to play with params and extern linkage to see R11 always work...



April 05, 2018
On Thursday, 5 April 2018 at 04:48:02 UTC, Basile B. wrote:
> The "this" seems to be in R11, so you have to apply the asm syntax for accessing the members using <Type>.offsetof.<member>[R11], example:
>
> ```
> class Foo
> {
>     double a = 123456;
>     extern(D) double foo()
>     {
>         asm
>         {
>             naked;
>             movq    XMM0, Foo.a.offsetof[R11];
>             ret;
>         }
>     }
> }
>
> void main()
> {
>     import std.stdio;
>     (new Foo).foo(0,0).writeln;
> }
> ```
>
> However i cant find any specification saying that R11 is "this".
> With a free function just pass the instance as param and replace R11 by the register where the instance is passed.

It seems that the compiler lets it through if I change it like this:

asm @nogc{
	naked;
	movd		XMM1, dword ptr sX[EBP];
	pslldq		XMM1, 4;
	movss		XMM1, dword ptr sY[EBP];
	movq		XMM0, xy;
	paddd		XMM0, XMM1;	// [x,y] + [sX,sY]
	movq		XMM3, qword ptr xy0[EBP];
	psubd		XMM0, XMM3;	// ([x,y] + [sX,sY] - [x_0,y_0])
	movq		XMM1, qword ptr ac[EBP];
	movq		XMM2, qword ptr bd[EBP];
	pmuludq		XMM1, XMM0;	// [A,0,C,0] * ([x,y] + [sX,sY] - [x_0,y_0])
	psrlq		XMM1, 16;	// ([A,0,C,0] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16
	movups		XMM4, XMM0;
	psrldq		XMM4, 4;
	pslldq		XMM0, 4;
	por			XMM4, XMM0;
	pmuludq		XMM2, XMM4; // [0,B,0,D] * ([x,y] + [sX,sY] - [x_0,y_0])
	psrlq		XMM2, 16;	// ([0,B,0,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16
	paddq		XMM1, XMM2; // ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16
	punpckldq	XMM3, XMM7;
	paddq		XMM1, XMM3;	// ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16 + [x_0,y_0]
	movups		XMM0, XMM1;	// Convert 64 bit vectors into 32 bit ones
	psrldq		XMM0, 4;
	por			XMM0, XMM1;	
	ret			;
}
I wonder if I can return an int[2] in XMM0. I can do some modifications to either move the result to the stack first, or add an import to core.simd (which needs to be refactored completely) and instead make the return type int4 on SIMD enabled CPUs.
April 05, 2018
Seems I found a better solution hidden in the docs:

@nogc protected int[2] transformFunc(int[2] xy){
version(X86){
asm @nogc{
	naked;
	mov			EBX, this;
	movd		XMM1, sX[EBX];
	pslldq		XMM1, 4;
	movss		XMM1, sY[EBX];
	movq		XMM0, xy;
	paddd		XMM0, XMM1;	// [x,y] + [sX,sY]
	movq		XMM3, xy0[EBX];
	psubd		XMM0, XMM3;	// ([x,y] + [sX,sY] - [x_0,y_0])
	movq		XMM1, ac[EBX];
	movq		XMM2, bd[EBX];
	pmuludq		XMM1, XMM0;	// [A,0,C,0] * ([x,y] + [sX,sY] - [x_0,y_0])
	psrlq		XMM1, 16;	// ([A,0,C,0] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16
	movups		XMM4, XMM0;
	psrldq		XMM4, 4;
	pslldq		XMM0, 4;
	por			XMM4, XMM0;
	pmuludq		XMM2, XMM4; // [0,B,0,D] * ([x,y] + [sX,sY] - [x_0,y_0])
	psrlq		XMM2, 16;	// ([0,B,0,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16
	paddq		XMM1, XMM2; // ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16
	punpckldq	XMM3, XMM7;
	paddq		XMM1, XMM3;	// ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16 + [x_0,y_0]
	movups		XMM0, XMM1;	// Convert 64 bit vectors into 32 bit ones
	psrldq		XMM0, 4;
	por			XMM0, XMM1;	
	ret			;
}
}(...)
}