Thread overview
Handling MMX Instructions
Mar 25, 2005
Isma'il Adeniran
Mar 25, 2005
Isma'il Adeniran
Mar 29, 2005
Walter
Mar 29, 2005
Isma'il Adeniran
Mar 30, 2005
Isma'il Adeniran
Mar 30, 2005
Jack
Mar 30, 2005
Jack
Mar 30, 2005
Isma'il Adeniran
Mar 30, 2005
Isma'il Adeniran
Apr 02, 2005
Walter
March 25, 2005
I think there's an error in the way dmc handles inline assembler MMX instructions.

I compiled and ran this code using dmc:

<code>
#include <stdio.h>

int main(void)
{
   	int cnt;
	int a1[4] = {12, 1, 34, 17};
	int b1[4] = {17, 7, 4, 33};
	int c1[4];

	printf(" a1: ");
	for (cnt = 0; cnt < 4;cnt++)
		printf("%d\t", a1[cnt]);
	
	printf("\n b1: ");
    	for (cnt = 0; cnt < 4;cnt++)
		printf("%d\t", b1[cnt]);

	_asm {
		  movq     mm0, qword ptr a1
		  movq     mm1, qword ptr a1+8
		  packssdw mm0, mm1
		
		  movq     mm1, qword ptr b1
		  movq     mm2, qword ptr b1+8
		  packssdw mm1, mm2
		
         	  paddw    mm0, mm1
		
          	  lea      ESI, c1
		  xor      EDI,EDI
		
   		  pextrw   EDI,mm0, 0
		  mov      dword ptr [ESI], EDI
		  add      ESI, 4
		
		  pextrw   EDI,mm0, 1
		  mov      dword ptr [ESI], EDI
		  add      ESI, 4

		  pextrw   EDI,mm0, 2
		  mov      dword ptr [ESI], EDI
		  add      ESI, 4
		
		  pextrw   EDI,mm0, 3
		  mov      dword ptr [ESI], EDI
		  add      ESI, 4
		
	      emms
	};

	printf("\n\n          c1: \n");
	for (cnt = 0; cnt < 4;cnt++)
		printf(" a1[%d] + b1[%d] = %d\n", cnt, cnt, c1[cnt]);
	
	return 0;
}
</code>

<Output on execution:>

D:>pack (using dmc)
 a1: 12 1       34      17
 b1: 17 7       4       33

          c1:
 a1[0] + b1[0] = 0
 a1[1] + b1[1] = 0
 a1[2] + b1[2] = 0
 a1[3] + b1[3] = 0

This is incorrect.

I compiled and ran it with Open Watcom C/C++ and I got the correct resul below:

D:>pack (using Open Watcom)
 a1: 12 1       34      17
 b1: 17 7       4       33

          c1:
 a1[0] + b1[0] = 29
 a1[1] + b1[1] = 8
 a1[2] + b1[2] = 38
 a1[3] + b1[3] = 50

Is this a bug with dmc?

Best regards

Isma'il
-----
March 25, 2005
It also compiles correctly with VC++.NET.

Isma'il Adeniran wrote:
> I think there's an error in the way dmc handles inline assembler MMX instructions.
> 
> I compiled and ran this code using dmc:
> 
> <code>
> #include <stdio.h>
> 
> int main(void)
> {
>        int cnt;
>     int a1[4] = {12, 1, 34, 17};
>     int b1[4] = {17, 7, 4, 33};
>     int c1[4];
> 
>     printf(" a1: ");
>     for (cnt = 0; cnt < 4;cnt++)
>         printf("%d\t", a1[cnt]);
>         printf("\n b1: ");
>         for (cnt = 0; cnt < 4;cnt++)
>         printf("%d\t", b1[cnt]);
> 
>     _asm {
>           movq     mm0, qword ptr a1
>           movq     mm1, qword ptr a1+8
>           packssdw mm0, mm1
>                  movq     mm1, qword ptr b1
>           movq     mm2, qword ptr b1+8
>           packssdw mm1, mm2
>                       paddw    mm0, mm1
>                        lea      ESI, c1
>           xor      EDI,EDI
>                     pextrw   EDI,mm0, 0
>           mov      dword ptr [ESI], EDI
>           add      ESI, 4
>                  pextrw   EDI,mm0, 1
>           mov      dword ptr [ESI], EDI
>           add      ESI, 4
> 
>           pextrw   EDI,mm0, 2
>           mov      dword ptr [ESI], EDI
>           add      ESI, 4
>                  pextrw   EDI,mm0, 3
>           mov      dword ptr [ESI], EDI
>           add      ESI, 4
>                  emms
>     };
> 
>     printf("\n\n          c1: \n");
>     for (cnt = 0; cnt < 4;cnt++)
>         printf(" a1[%d] + b1[%d] = %d\n", cnt, cnt, c1[cnt]);
>         return 0;
> }
> </code>
> 
> <Output on execution:>
> 
> D:>pack (using dmc)
>  a1: 12 1       34      17
>  b1: 17 7       4       33
> 
>           c1:
>  a1[0] + b1[0] = 0
>  a1[1] + b1[1] = 0
>  a1[2] + b1[2] = 0
>  a1[3] + b1[3] = 0
> 
> This is incorrect.
> 
> I compiled and ran it with Open Watcom C/C++ and I got the correct resul below:
> 
> D:>pack (using Open Watcom)
>  a1: 12 1       34      17
>  b1: 17 7       4       33
> 
>           c1:
>  a1[0] + b1[0] = 29
>  a1[1] + b1[1] = 8
>  a1[2] + b1[2] = 38
>  a1[3] + b1[3] = 50
> 
> Is this a bug with dmc?
> 
> Best regards
> 
> Isma'il
> -----
March 29, 2005
If you could post assembler code generated by OW C++ for that function, I can compare the two.

"Isma'il Adeniran" <ismail@tamarindseed.com> wrote in message news:d21smg$mi7$1@digitaldaemon.com...
> I compiled and ran it with Open Watcom C/C++ and I got the correct resul


March 29, 2005
This is the assembler code generated with bits cut out. I haven't really combed through it extensively but both compilers generate practically identical code for the inline assembly (as expected)!

<assembly>

_TEXT		SEGMENT	BYTE PUBLIC USE32 'CODE'
		ASSUME CS:_TEXT, DS:DGROUP, SS:DGROUP
L$1:
    DB	0cH, 0, 0, 0, 1, 0, 0, 0
    DB	22H, 0, 0, 0, 11H, 0, 0, 0
L$2:
    DB	11H, 0, 0, 0, 7, 0, 0, 0
    DB	4, 0, 0, 0, 21H, 0, 0, 0
main:
    push        54H
    call        near ptr FLAT:__CHK
    push        ebx
    push        esi
    push        edi
    push        ebp
    mov         ebp,esp
    sub         esp,30H
    lea         edi,-30H[ebp]
    mov         esi,offset FLAT:L$1
    movsd
    movsd
    movsd
    movsd
    lea         edi,-20H[ebp]
    mov         esi,offset FLAT:L$2
    movsd
    movsd
    movsd
    movsd
    push        offset FLAT:L$9
    call        near ptr FLAT:printf
    add         esp,4
    xor         ebx,ebx
L$3:
    mov         edx,dword ptr -30H[ebp+ebx*4]
    push        edx
    push        offset FLAT:L$10
    call        near ptr FLAT:printf
    add         esp,8
    inc         ebx
    cmp         ebx,4
    jl          L$3
    push        offset FLAT:L$11
    call        near ptr FLAT:printf
    add         esp,4
    xor         ebx,ebx
L$4:
    mov         ecx,dword ptr -20H[ebp+ebx*4]
    push        ecx
    push        offset FLAT:L$10
    call        near ptr FLAT:printf
    add         esp,8
    inc         ebx
    cmp         ebx,4
    jl          L$4
    movq        mm0,-30H[ebp]
    movq        mm1,-28H[ebp]
    packssdw    mm0,mm1
    movq        mm1,-20H[ebp]
    movq        mm2,-18H[ebp]
    packssdw    mm1,mm2
    paddw       mm0,mm1
    lea         esi,-10H[ebp]
    xor         edi,edi
    pextrw      edi,mm0,0
    mov         dword ptr [esi],edi
    add         esi,4
    pextrw      edi,mm0,1
    mov         dword ptr [esi],edi
    add         esi,4
    pextrw      edi,mm0,2
    mov         dword ptr [esi],edi
    add         esi,4
    pextrw      edi,mm0,3
    mov         dword ptr [esi],edi
    add         esi,4
    emms
    push        offset FLAT:L$12
    call        near ptr FLAT:printf
    add         esp,4
    xor         ebx,ebx
L$5:
    mov         esi,dword ptr -10H[ebp+ebx*4]
    push        esi
    push        ebx
    push        ebx
    push        offset FLAT:L$13
    call        near ptr FLAT:printf
    add         esp,10H
    inc         ebx
    cmp         ebx,4
    jl          L$5
    mov         edi,dword ptr FLAT:__iob+4
    test        edi,edi
    jle         L$6
    mov         eax,dword ptr FLAT:__iob
    xor         ebx,ebx
    mov         bl,byte ptr [eax]
    sub         ebx,0dH
    cmp         ebx,0dH
    ja          L$7
L$6:
    push        offset FLAT:__iob
    call        near ptr FLAT:fgetc
    add         esp,4
    jmp         L$8
L$7:
    lea         edx,-1[edi]
    mov         dword ptr FLAT:__iob+4,edx
    inc         eax
    mov         dword ptr FLAT:__iob,eax
L$8:
    xor         eax,eax
    mov         esp,ebp
    pop         ebp
    pop         edi
    pop         esi
    pop         ebx
    ret
_TEXT		ENDS

<\assembly>

Walter wrote:
> If you could post assembler code generated by OW C++ for that function, I
> can compare the two.
> 



-- 
Knowledge comes from finding the answers, yes but
understanding what the answers mean
is what brings wisdom.
				  - Lionel Luthor
March 30, 2005
Spotted the bug. There is a bug with the with 'pextrw' instruction in the code compiled with DMC (first operand is always changed when linked).

With obj2asm, everything is correct in the compiled object code (.obj), first operand of 'pextrw' is just same as specified in the source code.

When linked to an excutable, the first operand of 'pextrw' command changed to EAX (happened always, no matter the first operand in the source code change to whatever).

Perhaps a bug with the linker?

In article <d21smg$mi7$1@digitaldaemon.com>, Isma'il Adeniran says...
>
>It also compiles correctly with VC++.NET.
>
>Isma'il Adeniran wrote:
>> I think there's an error in the way dmc handles inline assembler MMX instructions.
>> 
>> I compiled and ran this code using dmc:
>> 
>> <code>
>> #include <stdio.h>
>> 
>> int main(void)
>> {
>>        int cnt;
>>     int a1[4] = {12, 1, 34, 17};
>>     int b1[4] = {17, 7, 4, 33};
>>     int c1[4];
>> 
>>     printf(" a1: ");
>>     for (cnt = 0; cnt < 4;cnt++)
>>         printf("%d\t", a1[cnt]);
>> 
>>     printf("\n b1: ");
>>         for (cnt = 0; cnt < 4;cnt++)
>>         printf("%d\t", b1[cnt]);
>> 
>>     _asm {
>>           movq     mm0, qword ptr a1
>>           movq     mm1, qword ptr a1+8
>>           packssdw mm0, mm1
>> 
>>           movq     mm1, qword ptr b1
>>           movq     mm2, qword ptr b1+8
>>           packssdw mm1, mm2
>> 
>>                paddw    mm0, mm1
>> 
>>                 lea      ESI, c1
>>           xor      EDI,EDI
>> 
>>              pextrw   EDI,mm0, 0
>>           mov      dword ptr [ESI], EDI
>>           add      ESI, 4
>> 
>>           pextrw   EDI,mm0, 1
>>           mov      dword ptr [ESI], EDI
>>           add      ESI, 4
>> 
>>           pextrw   EDI,mm0, 2
>>           mov      dword ptr [ESI], EDI
>>           add      ESI, 4
>> 
>>           pextrw   EDI,mm0, 3
>>           mov      dword ptr [ESI], EDI
>>           add      ESI, 4
>> 
>>           emms
>>     };
>> 
>>     printf("\n\n          c1: \n");
>>     for (cnt = 0; cnt < 4;cnt++)
>>         printf(" a1[%d] + b1[%d] = %d\n", cnt, cnt, c1[cnt]);
>> 
>>     return 0;
>> }
>> </code>
>> 
>> <Output on execution:>
>> 
>> D:>pack (using dmc)
>>  a1: 12 1       34      17
>>  b1: 17 7       4       33
>> 
>>           c1:
>>  a1[0] + b1[0] = 0
>>  a1[1] + b1[1] = 0
>>  a1[2] + b1[2] = 0
>>  a1[3] + b1[3] = 0
>> 
>> This is incorrect.
>> 
>> I compiled and ran it with Open Watcom C/C++ and I got the correct resul below:
>> 
>> D:>pack (using Open Watcom)
>>  a1: 12 1       34      17
>>  b1: 17 7       4       33
>> 
>>           c1:
>>  a1[0] + b1[0] = 29
>>  a1[1] + b1[1] = 8
>>  a1[2] + b1[2] = 38
>>  a1[3] + b1[3] = 50
>> 
>> Is this a bug with dmc?
>> 
>> Best regards
>> 
>> Isma'il
>> -----


March 30, 2005
Just found out not only first operand is different from the source code but second operand too! The second operand of 'pextrw' instruction is always mm7 when linked.

In article <d2egdn$1cge$1@digitaldaemon.com>, Jack says...
>
>Spotted the bug. There is a bug with the with 'pextrw' instruction in the code compiled with DMC (first operand is always changed when linked).
>
>With obj2asm, everything is correct in the compiled object code (.obj), first operand of 'pextrw' is just same as specified in the source code.
>
>When linked to an excutable, the first operand of 'pextrw' command changed to EAX (happened always, no matter the first operand in the source code change to whatever).
>
>Perhaps a bug with the linker?
>
>In article <d21smg$mi7$1@digitaldaemon.com>, Isma'il Adeniran says...
>>
>>It also compiles correctly with VC++.NET.
>>
>>Isma'il Adeniran wrote:
>>> I think there's an error in the way dmc handles inline assembler MMX instructions.
>>> 
>>> I compiled and ran this code using dmc:
>>> 
>>> <code>
>>> #include <stdio.h>
>>> 
>>> int main(void)
>>> {
>>>        int cnt;
>>>     int a1[4] = {12, 1, 34, 17};
>>>     int b1[4] = {17, 7, 4, 33};
>>>     int c1[4];
>>> 
>>>     printf(" a1: ");
>>>     for (cnt = 0; cnt < 4;cnt++)
>>>         printf("%d\t", a1[cnt]);
>>> 
>>>     printf("\n b1: ");
>>>         for (cnt = 0; cnt < 4;cnt++)
>>>         printf("%d\t", b1[cnt]);
>>> 
>>>     _asm {
>>>           movq     mm0, qword ptr a1
>>>           movq     mm1, qword ptr a1+8
>>>           packssdw mm0, mm1
>>> 
>>>           movq     mm1, qword ptr b1
>>>           movq     mm2, qword ptr b1+8
>>>           packssdw mm1, mm2
>>> 
>>>                paddw    mm0, mm1
>>> 
>>>                 lea      ESI, c1
>>>           xor      EDI,EDI
>>> 
>>>              pextrw   EDI,mm0, 0
>>>           mov      dword ptr [ESI], EDI
>>>           add      ESI, 4
>>> 
>>>           pextrw   EDI,mm0, 1
>>>           mov      dword ptr [ESI], EDI
>>>           add      ESI, 4
>>> 
>>>           pextrw   EDI,mm0, 2
>>>           mov      dword ptr [ESI], EDI
>>>           add      ESI, 4
>>> 
>>>           pextrw   EDI,mm0, 3
>>>           mov      dword ptr [ESI], EDI
>>>           add      ESI, 4
>>> 
>>>           emms
>>>     };
>>> 
>>>     printf("\n\n          c1: \n");
>>>     for (cnt = 0; cnt < 4;cnt++)
>>>         printf(" a1[%d] + b1[%d] = %d\n", cnt, cnt, c1[cnt]);
>>> 
>>>     return 0;
>>> }
>>> </code>
>>> 
>>> <Output on execution:>
>>> 
>>> D:>pack (using dmc)
>>>  a1: 12 1       34      17
>>>  b1: 17 7       4       33
>>> 
>>>           c1:
>>>  a1[0] + b1[0] = 0
>>>  a1[1] + b1[1] = 0
>>>  a1[2] + b1[2] = 0
>>>  a1[3] + b1[3] = 0
>>> 
>>> This is incorrect.
>>> 
>>> I compiled and ran it with Open Watcom C/C++ and I got the correct resul below:
>>> 
>>> D:>pack (using Open Watcom)
>>>  a1: 12 1       34      17
>>>  b1: 17 7       4       33
>>> 
>>>           c1:
>>>  a1[0] + b1[0] = 29
>>>  a1[1] + b1[1] = 8
>>>  a1[2] + b1[2] = 38
>>>  a1[3] + b1[3] = 50
>>> 
>>> Is this a bug with dmc?
>>> 
>>> Best regards
>>> 
>>> Isma'il
>>> -----
>
>


March 30, 2005
You're right. The problem's with the 'pextrw' instruction. It uses the wrong register. The edi is zeroed. The word is extracted into the EAX register but this instruction: mov [esi], edi is carried out instead of mov [esi], eax. Consequently, the contents of esi is always 0.

Nice work Jack!!!


Jack wrote:
> Just found out not only first operand is different from the source code but
> second operand too! The second operand of 'pextrw' instruction is always mm7
> when linked.
> 
> In article <d2egdn$1cge$1@digitaldaemon.com>, Jack says...
> 
>>Spotted the bug. There is a bug with the with 'pextrw' instruction in the code
>>compiled with DMC (first operand is always changed when linked).
>>
>>With obj2asm, everything is correct in the compiled object code (.obj), first
>>operand of 'pextrw' is just same as specified in the source code.
>>
>>When linked to an excutable, the first operand of 'pextrw' command changed to
>>EAX (happened always, no matter the first operand in the source code change to
>>whatever).
>>
>>Perhaps a bug with the linker?
>>
>>In article <d21smg$mi7$1@digitaldaemon.com>, Isma'il Adeniran says...
>>
>>>It also compiles correctly with VC++.NET.
>>>
>>>Isma'il Adeniran wrote:
>>>
>>>>I think there's an error in the way dmc handles inline assembler MMX instructions.
>>>>
>>>>I compiled and ran this code using dmc:
>>>>
>>>><code>
>>>>#include <stdio.h>
>>>>
>>>>int main(void)
>>>>{
>>>>       int cnt;
>>>>    int a1[4] = {12, 1, 34, 17};
>>>>    int b1[4] = {17, 7, 4, 33};
>>>>    int c1[4];
>>>>
>>>>    printf(" a1: ");
>>>>    for (cnt = 0; cnt < 4;cnt++)
>>>>        printf("%d\t", a1[cnt]);
>>>>       printf("\n b1: ");
>>>>        for (cnt = 0; cnt < 4;cnt++)
>>>>        printf("%d\t", b1[cnt]);
>>>>
>>>>    _asm {
>>>>          movq     mm0, qword ptr a1
>>>>          movq     mm1, qword ptr a1+8
>>>>          packssdw mm0, mm1
>>>>                movq     mm1, qword ptr b1
>>>>          movq     mm2, qword ptr b1+8
>>>>          packssdw mm1, mm2
>>>>                     paddw    mm0, mm1
>>>>                      lea      ESI, c1
>>>>          xor      EDI,EDI
>>>>                   pextrw   EDI,mm0, 0
>>>>          mov      dword ptr [ESI], EDI
>>>>          add      ESI, 4
>>>>                pextrw   EDI,mm0, 1
>>>>          mov      dword ptr [ESI], EDI
>>>>          add      ESI, 4
>>>>
>>>>          pextrw   EDI,mm0, 2
>>>>          mov      dword ptr [ESI], EDI
>>>>          add      ESI, 4
>>>>                pextrw   EDI,mm0, 3
>>>>          mov      dword ptr [ESI], EDI
>>>>          add      ESI, 4
>>>>                emms
>>>>    };
>>>>
>>>>    printf("\n\n          c1: \n");
>>>>    for (cnt = 0; cnt < 4;cnt++)
>>>>        printf(" a1[%d] + b1[%d] = %d\n", cnt, cnt, c1[cnt]);
>>>>       return 0;
>>>>}
>>>></code>
>>>>
>>>><Output on execution:>
>>>>
>>>>D:>pack (using dmc)
>>>> a1: 12 1       34      17
>>>> b1: 17 7       4       33
>>>>
>>>>          c1:
>>>> a1[0] + b1[0] = 0
>>>> a1[1] + b1[1] = 0
>>>> a1[2] + b1[2] = 0
>>>> a1[3] + b1[3] = 0
>>>>
>>>>This is incorrect.
>>>>
>>>>I compiled and ran it with Open Watcom C/C++ and I got the correct resul below:
>>>>
>>>>D:>pack (using Open Watcom)
>>>> a1: 12 1       34      17
>>>> b1: 17 7       4       33
>>>>
>>>>          c1:
>>>> a1[0] + b1[0] = 29
>>>> a1[1] + b1[1] = 8
>>>> a1[2] + b1[2] = 38
>>>> a1[3] + b1[3] = 50
>>>>
>>>>Is this a bug with dmc?
>>>>
>>>>Best regards
>>>>
>>>>Isma'il
>>>>-----
>>
>>
> 
> 


-- 
Knowledge comes from finding the answers, yes but
understanding what the answers mean
is what brings wisdom.
				  - Lionel Luthor
March 30, 2005
Jack (check the other posts) just found the bug.
It's with the 'pextrw' instruction.
The word's extracted into the EAX register but it's the EDI register (which has been zeroed out) that's actually been copied to ESI.
This produces the zeroes on output.

Comparing the assembly output from DMC and OW compilers confirms this.

Reposting the pertinent assembly listing for the function. Apologies for the one I posted yesterday.

****************DMC************          ********Open Watcom************
-------------------------------          -------------------------------
 X$5:
     movq        mm0,-0x30[ebp]		movq        mm0,-0x30[ebp]
     movq        mm1,-0x28[ebp]		movq        mm1,-0x28[ebp]
     packssdw    mm0,mm1		packssdw    mm0,mm1
     movq        mm1,-0x20[ebp]		movq        mm1,-0x20[ebp]
     movq        mm2,-0x18[ebp]		movq        mm2,-0x18[ebp]
     packssdw    mm1,mm2		packssdw    mm1,mm2
     paddw       mm0,mm1		paddw       mm0,mm1
     lea         esi,-0x10[ebp]		lea         esi,-0x10[ebp]
     xor         edi,edi		xor         edi,edi
     pextrw      eax,mm7,0x00		pextrw      edi,mm0,0x00
     mov         [esi],edi		mov         [esi],edi
     add         esi,0x00000004		add         esi,0x00000004
     pextrw      eax,mm7,0x01		pextrw      edi,mm0,0x01
     mov         [esi],edi		mov         [esi],edi
     add         esi,0x00000004		add         esi,0x00000004
     pextrw      eax,mm7,0x02		pextrw      edi,mm0,0x02
     mov         [esi],edi		mov         [esi],edi
     add         esi,0x00000004		add         esi,0x00000004
     pextrw      eax,mm7,0x03		pextrw      edi,mm0,0x03
     mov         [esi],edi		mov         [esi],edi
     add         esi,0x00000004		add         esi,0x00000004
     emms				emms



Walter wrote:
> If you could post assembler code generated by OW C++ for that function, I
> can compare the two.
> 
> "Isma'il Adeniran" <ismail@tamarindseed.com> wrote in message
> news:d21smg$mi7$1@digitaldaemon.com...
> 
>>I compiled and ran it with Open Watcom C/C++ and I got the correct resul
> 
> 
> 


-- 
Knowledge comes from finding the answers, yes but
understanding what the answers mean
is what brings wisdom.
				  - Lionel Luthor
March 30, 2005
I also just posted the assembly output from both DMC and OW for the function to my reply to Walter above. Check it out (skewed).

Isma'il


-- 
Knowledge comes from finding the answers, yes but
understanding what the answers mean
is what brings wisdom.
				  - Lionel Luthor
April 02, 2005
"Isma'il Adeniran" <ismail@tamarindseed.com> wrote in message news:d21qsd$kq1$1@digitaldaemon.com...
> I think there's an error in the way dmc handles inline assembler MMX instructions.

You're right. I have it fixed now, it'll go out in the next update.