Hirdetés

2024. április 25., csütörtök

Gyorskeresés

Hozzászólások

(#26) P.H.


P.H.
senior tag

Magyar módszer, legfejlebb 126x126-os mátrixokra, a mátrixelemek nagysága legfeljebb 00FFFFFFh. A sebességkülönbség kb. 3-szoros az új kód javára úgy, hogy a mátrix teljesen belefér (és belefért régen is) az L1D-be.

Jelenlegi kód:
pushad
@@REDUCE_ROWS:
mov esi,00FFFFFFh
mov edi,eax
mov edx,ebp
@rowmin:
mov eax,esi
mov ebx,edi
mov ecx,ebp
@findrowmin:
cmp eax,[edi]
cmova eax,[edi]
sub ecx,01h
lea edi,[edi+04h]
jnz @findrowmin
cmp eax,esi
mov ecx,ebp
jz @@ABNORMAL_EXIT
@decrow:
xor edi,edi
cmp [ebx],esi
cmovbe edi,eax
sub [ebx],edi
sub ecx,01h
lea ebx,[ebx+04h]
jnz @decrow
sub edx,01h
mov edi,ebx
jnz @rowmin
jmp @@REDUCE_COLUMNS
@@ABNORMAL_EXIT:
add esp,20h
xor eax,eax
mov edx,7FFFFFFFh
stc
ret
@@REDUCE_COLUMNS:
mov ebx,ebp
mov edi,[esp+__MTX]
@colmin:
mov eax,esi
mov ecx,ebp
@findcolmin:
cmp eax,[edi]
cmova eax,[edi]
sub ecx,01h
lea edi,[edi+ebp*04h]
jnz @findcolmin
neg ebp
cmp eax,esi
mov ecx,ebp
jz @@ABNORMAL_EXIT
@deccol:
lea edi,[edi+ebp*04]
xor edx,edx
cmp [edi],esi
cmovbe edx,eax
sub [edi],edx
add ecx,01h
jnz @deccol
neg ebp
sub ebx,01h
lea edi,[edi+04h]
jnz @colmin
@@INITMARKS:
mov ebx,offset(MARKS)
lea ecx,[ebp+ebp]
xor eax,eax
lea edi,[ebx+ebp]
mov esi,[esp+__MTX]
@clrmark:
sub ecx,04h
mov [ebx],eax
lea ebx,[ebx+04h]
jg @clrmark
@@START0_SYSTEM:
xor ebx,ebx
lea esi,[esi+ebp*04h]
add eax,01h
xor ecx,ecx
@start0system:
sub esi,04h
xor edx,edx
sub ebx,eax
push esi
sub edx,ebp
@findfree0:
cmp [esi],eax
jb @markitem
@nxrow:
add edx,eax
lea esi,[esi+ebp*04h]
jnz @findfree0
jmp @nxcol
@markitem:
bts dword ptr [edi+edx],01h
jc @nxrow
add edx,ebp
mov [esi+03h],al
add ecx,eax
mov [edi+edx],bl
or [edi+ebx],al
@nxcol:
lea edx,[ebx+ebp]
pop esi
test edx,edx
jnz @start0system
mov [esp+__SYS0],ecx
@@CLEARROWMARKS:
cmp ebp,ecx
jz @count_result_MTX
@clear_row_mark:
and [edi+ebx],al
add ebx,eax
jnz @clear_row_mark
sub esp,_SAVE
jmp @@2ND_STEP
@@3RD_STEP:
mov byte ptr [esi+03h],02h
and byte ptr [edi+edx],11111110b
@@2ND_STEP:
xor ebx,ebx
mov esi,[esp+_SAVE+__MTX]
xor ecx,ecx
mov edx,00FFFFFFh
sub ebx,ebp
@free0:
sub ecx,ebp
@freerow:
test byte ptr [edi+ebx],02h
jz @zeroinrow
add ebx,01h
lea esi,[esi+ebp*04h]
jnz @freerow
jmp @@5TH_STEP
@zeroinrow:
test byte ptr [edi+ecx],01h
jnz @handlecol
cmp edx,[esi]
jbe @handlecol
mov edx,[esi]
test edx,edx
jz @@DECIDE_NEXT_STEP
add esp,_SAVE
pushad
@handlecol:
add ecx,01h
lea esi,[esi+04h]
jnz @zeroinrow
add ebx,01h
jnz @free0
@@5TH_STEP:
xor ecx,ecx
mov esi,[esp+_SAVE+__MTX]
sub ebx,ebp
@nx5row:
sub ecx,ebp
test byte ptr [edi+ebx],02h
jz @decrease_row_free
@increase_double_markeds:
mov al,[esi+03h]
and al,11111100b
bt dword ptr [edi+ecx],00h
sbb al,00h
sbb eax,eax
and eax,edx
add [esi],eax
add ecx,01h
lea esi,[esi+04h]
jnz @increase_double_markeds
jmp @step5row
@decrease_row_free:
bt dword ptr [edi+ecx],00h
mov al,[esi+03h]
adc al,00h
mov eax,00000000h
cmovz eax,edx
sub [esi],eax
add ecx,01h
lea esi,[esi+04h]
jnz @decrease_row_free
@step5row:
add ebx,01h
jnz @nx5row
popad
sub esp,20h
@@DECIDE_NEXT_STEP:
mov edx,0FFFFFF00h
lea eax,[ebx+ebp]
or byte ptr [edi+ebx],02h
add dl,[edi+eax]
jnz @@3RD_STEP
@@4TH_STEP:
mov edx,[esp+_SAVE+__MTX]
add dword ptr [esp+_SAVE+__SYS0],01h
@0colon:
mov [edi+eax],cl
add ecx,ebp
mov byte ptr [esi+03h],01h
xor eax,eax
lea esi,[edx+ecx*04h]
shl ecx,02h
sub eax,ebp
@search_star_in_column:
test byte ptr [esi+03h],01h
jz @nxstar
cmp eax,ebx
jnz @0_star
@nxstar:
add eax,01h
lea esi,[esi+ebp*04h]
jnz @search_star_in_column
jmp @@1ST_STEP
@0_star:
mov ebx,eax
mov byte ptr [esi+03h],00h
add eax,ebp
sub esi,ecx
xor ecx,ecx
mov byte ptr [edi+eax],00h
sub ecx,ebp
@search_colon_in_row:
test byte ptr [esi+03h],02h
jnz @0colon
add ecx,01h
lea esi,[esi+04h]
jnz @search_colon_in_row
@error:
nop
@@1ST_STEP:
cmp ebp,[esp+_SAVE+__SYS0]
jz @count_result_STACK
lea edx,[edx+ebp*04h]
push ebp
@remove_row_marks_set_cols:
lea esi,[edx-04h]
sub edi,01h
xor bl,bl
mov ecx,ebp
sub edx,04h
@chkstarmark:
mov al,[esi+03h]
and al,11111101b
mov [esi+03h],al
and al,01h
or bl,al
sub ecx,01h
lea esi,[esi+ebp*04h]
jnz @chkstarmark
sub dword ptr [esp],01h
mov [edi],bl
jnz @remove_row_marks_set_cols
pop ebx
add edi,ebp
jmp @@2ND_STEP
@count_result_STACK:
add esp,_SAVE
@count_result_MTX:
xor ecx,ecx
xor eax,eax
mov esi,[esp+__SAVE]
mov ebx,[esp+__MARKS]
@results:
movsx edx,byte ptr [edi+ecx]
mov byte ptr [edi+ecx],00h
add ecx,01h
add edx,ebp
add eax,[esi+edx*04h]
cmp ecx,ebp
mov [ebx],dl
lea esi,[esi+ebp*04h]
lea ebx,[ebx+01h]
jnz @results

6-7 évvel ezelőtti kód:
mov ecx,[mtxdblsize]
mov edi,[marks]
xor eax,eax
mov ebp,[mtxsize]
mov edx,ecx
rep stosd
lea edi,[tempmarks]
mov ecx,edx
rep stosd

@reduce_rows_with_minimum:
mov edi,[mtx]
mov edx,ebp
mov esi,00FFFFFFh
@row_minimum:
mov ecx,ebp
mov ebx,edi
mov eax,esi
mov ch,cl
@find_row_minimum:
scasd
jb @after_row_test
mov eax,[edi-04h]
@after_row_test:
dec cl
jnz @find_row_minimum
@start_reduce_row:
test eax,eax
jnz @force_reduce_row
dec edx
jnz @row_minimum
jmp @reduce_columns_with_m
@force_reduce_row:
cmp eax,esi
jz @abnormal_exit
@reduce_row_elements:
cmp [ebx],esi
ja @after_row_reduction
sub [ebx],eax
@after_row_reduction:
add ebx,04h
dec ch
jnz @reduce_row_elements
dec edx
jnz @row_minimum

@reduce_columns_with_minimum
mov esi,[mtx]
mov ebx,ebp
lea edi,[ebp*04h]
@column_minimum:
mov ecx,ebp
mov eax,00FFFFFFh
mov edx,esi
mov ch,cl
@find_column_minimum:
cmp [esi],eax
ja @after_column_test
mov eax,[esi]
test eax,eax
jz @next_decrease_column
@after_column_test:
add esi,edi
dec cl
jnz @find_column_minimum
@start_reduce_column:
cmp eax,00FFFFFFh
jz @abnormal_exit
mov esi,edx
@reduce_column_elements:
cmp dword ptr [esi],00FFFF
ja @after_column_reduction
sub [esi],eax
@after_column_reduction:
add esi,edi
dec ch
jnz @reduce_column_element
@next_decrease_column:
lea esi,[edx+04h]
dec ebx
jnz @column_minimum

@determine_start0_system:
mov [sys0],ebx
mov edi,[marks]
mov ebx,ebp
mov eax,0102h
mov esi,[mtx]
xor ecx,ecx
dec ebx
push esi
xor edx,edx
@start_0system:
lea esi,[esi+ebx*04h]
@find_free0:
test [edi+edx],al
jnz @check_next_item
cmp [esi],ecx
jnz @check_next_item
or [edi+edx],al
or [edi+ebx],ah
or [esi+03h],ah
inc [sys0]
jmp @next_column
@check_next_item:
inc edx
lea esi,[esi+ebp*04h]
cmp edx,ebp
jnz @find_free0
@next_column:
mov esi,[esp]
xor edx,edx
dec ebx
jns @start_0system
pop ebx

cmp ebp,[sys0]
jz @count_result
mov ecx,[mtxdblsize]
mov eax,01010101h
@clear_line_marks:
and [edi],eax
add edi,04h
dec ecx
jnz @clear_line_marks
pushad
jmp @@2nd_step
@@1st_step:
mov edi,[marks]
cmp ebp,[sys0]
jz @count_result_with_POPA
mov ebx,ebp
add edi,ebp
mov al,0FDh
lea edx,[edx+ebx*04h-01h]
dec edi
@remove_line_marks_set_cols:
mov esi,edx
mov [edi],bh
mov ecx,ebp
@check_star_mark:
and [esi],al
jz @marks_tested
mov ah,[esi]
and ah,01h
or [edi],ah
@marks_tested:
lea esi,[esi+ebp*04h]
dec ecx
jnz @check_star_mark
@star_next_column:
sub edx,04h
dec edi
dec ebx
jnz @remove_line_marks_set
@@2nd_step:
mov esi,[mtx]
mov ecx,ebp
mov ebx,[marks]
mov edx,00FFFFFFh
@search_free0:
mov edi,[marks]
xor eax,eax
mov ch,byte ptr [mtxsize]
@search_free0_prev_marked:
test byte ptr [ebx],02h
jz @look_for_zero
inc ebx
dec cl
jz @@5th_step
lea esi,[esi+ebp*04h]
jmp @search_free0_prev_mar
@look_for_zero:
test byte ptr [edi],01h
jnz @handle_column_counter
cmp edx,[esi]
jle @handle_column_counter
mov edx,[esi]
test edx,edx
jz @decide_next_step
add esp,20h
pushad
jmp @step_next_item
@handle_column_counter:
test byte ptr [esi+03h],01
jz @step_next_item
mov eax,edi
@step_next_item:
add esi,04h
inc edi
dec ch
jnz @look_for_zero
inc ebx
dec cl
jnz @search_free0
jmp @@5th_step
@decide_next_step:
mov edx,0102h
or [ebx],dl
test eax,eax
jz @continue_search
@@3rd_step:
and byte ptr [eax],0FEh
mov [esi+03h],dl
jmp @@2nd_step
@continue_search:
mov ebx,ecx
lea eax,[esi+03h]
dec ch
jz @@4th_step
add esi,07h
inc edi
@search_0star:
test [esi],dh
jz @next_0star_test
and byte ptr [edi],0FEh
or [eax],dl
jmp @@2nd_step
@next_0star_test:
add esi,04h
inc edi
dec ch
jnz @search_0star
@@4th_step:
or [eax],dh
xor ecx,ecx
xor edx,edx
inc [sys0]
mov cl,bl
mov edi,ebp
mov dl,bh
sub edi,ecx
mov ecx,ebp
mov eax,[mtx]
lea ebx,[ebp*04h]
sub ecx,edx
mov dl,01h
push eax
@check_column:
shl ecx,02h
lea esi,[eax+ecx+03h]
xor eax,eax
@search_star_in_column:
test [esi],dl
jz @prepare_for_next_item
cmp eax,edi
jnz @found_0_star
@prepare_for_next_item:
inc eax
add esi,ebx
cmp ebp,eax
jnz @search_star_in_column
pop edx
jmp @@1st_step
@found_0_star:
mov byte ptr [esi],00h
sub esi,ecx
mov edi,eax
xor ecx,ecx
mov eax,0102h
@search_colon_in_row:
test [esi],al
jnz @found_0_colon
inc ecx
add esi,04h
cmp ecx,ebp
jnz @search_colon_in_row
@found_0_colon:
mov [esi],ah
mov eax,[esp]
mov dl,01h
jmp @check_column
@@5th_step:
mov esi,[mtx]
sub ebx,ebp
mov ecx,ebp
mov al,01h
mov ebp,ebx
add esi,03h
@decrease_next_row:
mov ch,[esp+08h]
mov edi,ebp
test byte ptr [ebx],02h
jz @start_decrease_current
mov ah,0FCh
@increase_double_markeds:
test [edi],al
jz @step_next_element
test [esi],ah
jnz @step_next_element
add [esi-03h],edx
@step_next_element:
add esi,04h
inc edi
dec ch
jnz @increase_double_marke
inc ebx
dec cl
jnz @decrease_next_row
popad
pushad
jmp @decide_next_step
@start_decrease_current_row:
mov ah,0FFh
@decrease_current_row:
test [edi],al
jnz @jump_next_element
test [esi],ah
jnz @jump_next_element
sub [esi-03h],edx
@jump_next_element:
add esi,04h
inc edi
dec ch
jnz @decrease_current_row
@step_on_next_row:
inc ebx
dec cl
jnz @decrease_next_row
popad
pushad
jmp @decide_next_step
@count_result_with_POPAD:
add esp,20h
@count_result:
mov esi,[save]
shl ebp,02h
mov ebx,edi
xor ecx,ecx
mov edi,[mtx]
mov eax,01000000h
push edi
mov edx,[sys0]
@count_optimum:
@test_elements:
scasd
jnz @test_elements
@increase_optimum:
sub edi,04h
sub edi,[esp]
add [esp],ebp
add ecx,[esi+edi]
add esi,ebp
shr edi,02h
mov [ebx],edi
inc ebx
dec edx
mov edi,[esp]
jnz @count_optimum
@store_optimum:
pop eax
shr ebp,02h
mov eax,ecx

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Copyright © 2000-2024 PROHARDVER Informatikai Kft.