Hirdetés

2024. április 26., péntek

Gyorskeresés

Hozzászólások

(#28) P.H.


P.H.
senior tag

Utolsó felvonás, két bevezető ciklus összevonása után.

Néhány IPC-mérés a lépések ciklusaira (végtelen ciklusban mérve):
@@REDUCE_ROWS:1.8 IPC
@@REDUCE_COLUMNS:1.6 IPC
@@2ND_STEP: 0.9 IPC (ez a leggyakrabban lefutó ciklus)
@@5TH_STEP: 2.2 IPC
@@1ST_STEP: 1.5 IPC

Úgy tűnik, az AMD-n (a Bulldozer-ig bezárólag) a legjobb stratégia az, ha a ciklusokban az utasítások fele [ ] referenciát tartalmaz, azaz a memóriahivatkozások mellett bizonyos ADD reg,imm és MOV reg,reg utasítások helyett azok LEA reg,[reg+imm] vagy LEA reg,[reg] megfelelőiket használom, ezek méretre ugyanakkorák, viszont a 3 AGU valamelyikében futnak az ALU-k helyett.

pushad
mov ebx,offset(MARKS)
lea edx,[ebp+ebp]
xor ecx,ecx
lea edi,[ebx+ebp]
neg ebp
@mark0:
sub edx,04h
mov [ebx],ecx
lea ebx,[ebx+04h]
jg @mark0
@@REDUCE_ROWS:
mov [esp+__SYS0],ebp
mov ebx,ebp
sub esp,_SAVE
@rowmin:
mov ecx,ebp
mov esi,01000000h
xor edx,edx
@findrowmin:
cmp esi,[eax]
cmovz edx,ecx
cmova esi,[eax]
add ecx,01h
lea eax,[eax+04h]
jnz @findrowmin
sub ecx,ebp
cmp esi,01000000h
jz @specific
lea eax,[eax+ebp*04h]
@subrow:
xor edx,edx
cmp byte ptr [eax+03h],00h
cmovz edx,esi
sub [eax],edx
sub ecx,01h
lea eax,[eax+04h]
jnz @subrow
jmp @reducenxrow
@specific:
test edx,edx
jz @@ABNORMAL_EXIT
test byte ptr [edi+edx],01h
jz @mark
@@ABNORMAL_EXIT:
add esp,40h
xor eax,eax
mov edx,7FFFFFFFh
stc
ret
@mark:
or byte ptr [edi+ebx],10h
add ecx,ebx
or byte ptr [edi+edx],01h
add dword ptr [esp+_SAVE+__SYS0],01h
mov [edi+ecx],dl
jz @count_result_STACK
@reducenxrow:
add ebx,01h
jnz @rowmin
@@RECUDE_COLUMNS:
neg ebp
@nxcolmin:
mov edx,ebp
sub ebx,01h
sub eax,04h
add edx,ebx
js @@2ND_STEP
test byte ptr [edi+ebx],01h
jnz @nxcolmin
neg ebp
mov edx,01000000h
mov ecx,ebp
@findcolmin:
cmp edx,[eax]
cmova edx,[eax]
add ecx,01h
lea eax,[eax+ebp*04h]
jnz @findcolmin
lea ecx,[ebp-01h]
neg ebp
cmp edx,01000000h
jz @@ABNORMAL_EXIT
@subcol:
xor esi,esi
add ecx,01h
jz @nxcolmin
lea eax,[eax+ebp*04h]
cmp byte ptr [eax+03h],00h
cmovz esi,edx
sub [eax],esi
jnz @subcol
bts dword ptr [edi+ecx],04h
jc @subcol
bts dword ptr [edi+ebx],00h
lea esi,[ecx+ebp]
jc @subcol
add dword ptr [esp+_SAVE+__SYS0],01h
mov byte ptr [eax+03h],01h
mov [edi+esi],bl
jnz @subcol
jmp @count_result_STACK
@@3RD_STEP:
or byte ptr [edi+ebx],02h
mov byte ptr [esi+03h],02h
and byte ptr [edi+edx],11111110b
@@2ND_STEP:
xor ebx,ebx
mov esi,[esp+_SAVE+__MTX]
xor ecx,ecx
mov edx,00FFFFFFh
sub ebx,ebp
@free0:
sub ecx,ebp
@freerow:
test byte ptr [edi+ebx],02h
jz @zeroinrow
add ebx,01h
lea esi,[esi+ebp*04h]
jnz @freerow
jmp @@5TH_STEP
@zeroinrow:
xor eax,eax
test byte ptr [edi+ecx],01h
jnz @nx2col
add eax,[esi]
jz @@DECIDE_NEXT_STEP
cmp edx,eax
jbe @nx2col
add esp,_SAVE
lea edx,[eax] //mov edx,eax
pushad
@nx2col:
add ecx,01h
lea esi,[esi+04h]
jnz @zeroinrow
add ebx,01h
jnz @free0
@@5TH_STEP:
xor ecx,ecx
mov esi,[esp+_SAVE+__MTX]
sub ebx,ebp
@nx5row:
sub ecx,ebp
test byte ptr [edi+ebx],02h
jnz @increase_double_markeds
@decrease_row_free:
bt dword ptr [edi+ecx],00h
mov al,[esi+03h]
adc al,00h
mov eax,00000000h
cmovz eax,edx
sub [esi],eax
add ecx,01h
lea esi,[esi+04h]
jnz @decrease_row_free
jmp @step5row
@increase_double_markeds:
mov al,[esi+03h]
and al,11111100b
bt dword ptr [edi+ecx],00h
sbb al,00h
mov eax,00000000h
cmovc eax,edx
add [esi],eax
add ecx,01h
lea esi,[esi+04h]
jnz @increase_double_markeds
@step5row:
add ebx,01h
jnz @nx5row
jmp @@5TH_STEP
popad
sub esp,20h
@@DECIDE_NEXT_STEP:
mov edx,0FFFFFF00h
lea eax,[ebx+ebp]
add dl,[edi+eax]
jnz @@3RD_STEP
@@4TH_STEP:
mov edx,[esp+_SAVE+__MTX]
@colon_to_star:
mov [edi+eax],cl
add ecx,ebp
mov byte ptr [esi+03h],01h
xor eax,eax
lea esi,[edx+ecx*04h]
shl ecx,02h
and byte ptr [edi+ebx],11111101b
sub eax,ebp
@search_star_in_column:
test byte ptr [esi+03h],01h
jz @nxstar
cmp eax,ebx
jnz @0_star
@nxstar:
add eax,01h
lea esi,[esi+ebp*04h]
jnz @search_star_in_column
jmp @@1ST_STEP
@0_star:
mov ebx,eax
mov byte ptr [esi+03h],00h
add eax,ebp
sub esi,ecx
xor ecx,ecx
mov byte ptr [edi+eax],00h
sub ecx,ebp
@search_colon_in_row:
test byte ptr [esi+03h],02h
jnz @colon_to_star
add ecx,01h
lea esi,[esi+04h]
jnz @search_colon_in_row
@error:
nop
@@1ST_STEP:
xor ebx,ebx
xor eax,eax
add dword ptr [esp+_SAVE+__SYS0],01h
jz @count_result_STACK
sub ebx,ebp
mov cl,[edi+00h]
jmp @nxclear
@clear_colon:
and byte ptr [esi+03h],11111101b
add eax,01h
lea esi,[esi+04h]
jnz @clear_colon
@nxclear:
sub eax,ebp
@markedrow:
test byte ptr [edi+ebx],02h
mov esi,edx
mov byte ptr [edi+ebx],00h
jnz @clear_colon
add ebx,01h
lea edx,[edx+ebp*04h]
jnz @markedrow
@markcol:
movsx edx,byte ptr [edi+ebx]
add eax,01h
lea ebx,[ebx+01h]
mov byte ptr [edi+edx],01h
jnz @markcol
mov [edi+00h],cl
jmp @@2ND_STEP
@count_result_STACK:
{ EDI -> MARKS memory end
EBP: row/column quantity
add esp,_SAVE
xor ecx,ecx
xor eax,eax
mov esi,[esp+__SAVE]
mov ebx,[esp+__MARKS]
add esp,20h
@results:
movsx edx,byte ptr [edi+ecx]
lea ecx,[ecx+01h]
add edx,ebp
add eax,[esi+edx*04h]
cmp ecx,ebp
mov [ebx],dl
lea esi,[esi+ebp*04h]
lea ebx,[ebx+01h]
jnz @results

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Copyright © 2000-2024 PROHARDVER Informatikai Kft.