Hirdetés

2024. április 26., péntek

Gyorskeresés

Hozzászólások

(#9) P.H.


P.H.
senior tag

Under investigation: 2D gráfpont koordinátaszámítás (nagyítás + eltolás) és vágás több százezer ponton.

- x87-es kiindulási megoldás (extended precision):

@init:
fldz
@coor:
movzx eax,word ptr [edi+STRUCTURE.RSIZE]
mov cx,[LIMIT]
sub edx,01h
lea edi,[edi+eax+STRUCTURESIZE]
fstp st(0)
jle @sections
or byte ptr [edi+STRUCTURE.BITFIELD],$80
cmp [edi+STRUCTURE.RSIZE],cx
fld dword ptr [edi+STRUCTURE.X]
jle @coor
fmul st,st(3)
mov al,[BITFILTER]
fld dword ptr [edi+STRUCTURE.Y]
xor ecx,ecx
mov [esi],edi
fmul st,st(4)
fxch
test [edi+STRUCTURE.BITFIELD],al
fsub st,st(3)
mov eax,[IMAGEWIDTH]
fistp dword ptr [edi+STRUCTURE.XCOOR]
mov ebp,[IMAGEHEIGHT]
fsub st,st(1)
mov [esi+04h],ecx
fist dword ptr [edi+STRUCTURE.YCOOR]
jnz @coor
sub eax,[edi+STRUCTURE.XCOOR]
mov ecx,[edi+STRUCTURE.YCOOR]
or eax,[edi+STRUCTURE.XCOOR]
sub ebp,ecx
or eax,ecx
or eax,ebp
js @coor
and byte ptr [edi+STRUCTURE.BITFIELD],$7F
add esi,04h
jmp @coor
@sections:
...

- SSE2 (double precision):

@initSSE:
mov bp,[LIMIT]
mov cl,[BITFILTER]
mov bl,[edi+STRUCTURE.BITFIELD]
@coorSSE:
movzx eax,word ptr [edi+STRUCTURE.RSIZE]
sub edx,01h
mov [edi+STRUCTURE.BITFIELD],bl
movapd xmm3,xmm4
lea edi,[edi+eax+STRUCTURESIZE]
mov bl,$80
jle @sectionSSE
cvtps2pd xmm0,[edi+STRUCTURE.X]
or bl,[edi+STRUCTURE.BITFIELD]
cmp [edi+STRUCTURE.RSIZE],bp
mulpd xmm0,xmm2
jle @coorSSE
subpd xmm0,xmm1
cvtpd2dq xmm0,xmm0
test bl,cl
psubd xmm3,xmm0
movq [edi+STRUCTURE.XCOOR],xmm0
jnz @coorSSE
por xmm3,xmm0
pmovmskb eax,xmm3
test al,88h
jnz @coorSSE
and bl,$7F
movnti [esi],edi
add esi,04h
jmp @coorSSE
@sectionSSE:
xor eax,eax
sfence
mov [esi],eax
...

- 3DNow! (single precision):

@init3DNow:
mov bp,[LIMIT]
mov cl,[BITFILTER]
mov bl,[edi+STRUCTURE.BITFIELD]
@coor3DNow:
movq mm0,mm2
movzx eax,word ptr [edi+STRUCTURE.RSIZE]
sub edx,01h
mov [edi+STRUCTURE.BITFIELD],bl
movq mm3,mm4
lea edi,[edi+eax+STRUCTURESIZE]
mov bl,$80
jle @section3DNow
pfmul mm0,[edi+STRUCTURE.X]
or bl,[edi+STRUCTURE.BITFIELD]
cmp [edi+STRUCTURE.RSIZE],bp
pfsub mm0,mm1
jle @coor3DNow
pf2id mm0,mm0
test bl,cl
psubd mm3,mm0
movq [edi+STRUCTURE.XCOOR],mm0
jnz @coor3DNow
por mm3,mm0
pmovmskb eax,mm3
test al,88h
jnz @coor3DNow
and bl,$7F
mov [esi],edi
add esi,04h
jmp @coor3DNow
@section3DNow:
xor eax,eax
emms
mov [esi],eax
...

A sorrend (TSC alapján):
- K8: 1.SSE2 2:3DNow! 3:x87 (kb. 5% eltéréssel)
- Netburst: 1:x87 2.SSE2 (elhanyagolható eltéréssel)

:F

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Copyright © 2000-2024 PROHARDVER Informatikai Kft.