Hirdetés

2024. április 26., péntek

Gyorskeresés

Hozzászólások

(#48) P.H.


P.H.
senior tag

Macro-op fusion

The Sandy Bridge can fuse two instructions into one µop in more cases than previous processors can [...].

The decoders will fuse an arithmetic or logic instruction and a subsequent conditional jump instruction into a single compute-and-branch µop in certain cases. The compute-and-branch µop is not split in two at the execution units but executed as a single µop by the branch unit at execution port 5.
The CMP, ADD and SUB instructions can fuse with signed and unsigned branch instructions. INC and DEC can fuse with signed branch instructions, and TEST and AND instructions can fuse with all branch instructions (including useless combinations), [...]
The first instruction can have an immediate operand or a memory source operand, but not both. It cannot have memory destination operand.

Megcsillagoztam azokat az utasításpárokat, amelyek elméletileg egysíthetők, nem véve figyelembe a szükséges 3 utasításos teret köztük (egyrészt az elágazás-tévesztések miatt, másrészt nem nagyon lehet előre kiszámolni, mi tartozik decode-nál ugyanabba a 4-es utasításcsoportba):

The programmer should keep fuseable arithmetic instructions together with a subsequent conditional jump rather than scheduling other instructions in-between; and there should preferably be at least three other instructions between one fuseable pair and the next fuseable pair in order to take advantage of macro-op fusion.

Az ADD és SUB bevonása a Sandy Bridge-nél a macro-op fusion-be igen hatékony lépés volt az Intel-től.

mov eax,edi
pushad
shl ebp,02h
xor ecx,ecx
lea edx,[ebp+ebp*02h]
lea edi,[ebx+ebp]
neg ebp
@mark0:
sub edx,04h
mov [ebx+edx],ecx
jg @mark0
mov byte ptr [edi+00h],01h
@@REDUCE_ROWS:
mov ebx,ebp
@rowmin:
mov esi,02000000h
mov ecx,ebp
xor edx,edx
@findrowmin:
cmp esi,[eax]
cmovz edx,ecx
cmova esi,[eax]
add eax,04h
* add ecx,04h
* jnz @findrowmin
sub ecx,ebp
* cmp esi,02000000h
* jz @specific
add eax,ebp
@subrow:
xor edx,edx
cmp byte ptr [eax+03h],00h
cmovz edx,esi
sub [eax],edx
add eax,04h
* sub ecx,04h
* jnz @subrow
* add ebx,04h
* jnz @rowmin
jmp @columns
@specific:
cmp byte ptr [edi+edx],00h
mov byte ptr [edi+edx],01h
jnz @@ABNORMAL_EXIT
add ecx,ebx
sub dword ptr [esp+__SYS0],01h
mov byte ptr [edi+ebx+02h],01h
mov [edi+ecx*02h+__0STAR],edx
jz @count_result_STACK
* add ebx,04h
* jnz @rowmin
@columns:
mov [edi+00h],bl
@@RECUDE_COLUMNS:
sub ebx,04h
sub eax,04h
* cmp ebx,ebp
* jl @@2ND_STEP
test byte ptr [edi+ebx],01h
jnz @@RECUDE_COLUMNS
mov esi,02000000h
mov ecx,ebp
@findcolmin:
cmp esi,[eax]
cmova esi,[eax]
add eax,ebp
* add ecx,04h
* jnz @findcolmin
cmp esi,02000000h
lea ecx,[ebp-04h]
jz @@ABNORMAL_EXIT
@subcol:
xor edx,edx
* add ecx,04h
* jz @@RECUDE_COLUMNS
sub eax,ebp
cmp [eax+03h],dl
cmovz edx,esi
* sub [eax],edx
* jnz @subcol
mov dl,[edi+ecx+02h]
mov byte ptr [edi+ecx+02h],01h
or dl,[edi+ebx]
mov edx,ecx
jnz @subcol
mov byte ptr [edi+ebx],01h
sub edx,ebp
mov byte ptr [edi+ecx+02h],01h
sub dword ptr [esp+__SYS0],01h
mov [edi+edx*02h+__0STAR],ebx
jnz @subcol
jmp @count_result_STACK
@@ABNORMAL_EXIT:
add esp,20h
xor eax,eax
mov edx,7FFFFFFFh
stc
ret

@@3RD_STEP:
mov byte ptr [edi+ebx+03h],0FFh
mov byte ptr [edi+edx],00h
mov [edi+eax*02h+__COLON],ecx
@@2ND_STEP:
lea ecx,[ebp-04h]
mov edx,00FFFFFFh
jmp @c2col
@zeroincol:
cmp edx,[esi]
mov bl,[edi+eax+03h]
sbb bl,00h
jz @@DECIDE_NEXT_STEP
@nx2mtx:
sub esi,ebp
* add eax,04h
* jnz @zeroincol
@c2col:
mov esi,ecx
add esi,[esp+__MTX]
sub esi,ebp
@check2col:
add esi,04h
* add ecx,04h
* jz @@5TH_STEP
cmp byte ptr [edi+ecx],00h
mov eax,ebp
jnz @check2col
jmp @zeroincol
@@5TH_STEP:
lea ebx,[ebp+03h]
mov esi,[esp+__MTX]
@nx5row:
mov eax,[edi+ebx-03h]
sub ecx,edx
xor eax,edx
cmovs edx,ecx
mov ecx,ebp
@decrease_row_free:
bt dword ptr [edi+ecx],00h
mov al,[esi+03h]
adc al,[edi+ebx]
mov eax,00000000h
cmovz eax,edx
sub [esi],eax
add esi,04h
* add ecx,04h
* jnz @decrease_row_free
* add ebx,04h
* js @nx5row
mov eax,[esp+__FREE0]
xor edx,edx
mov esi,eax
sub eax,[esp+__MTX]
idiv ebp
neg eax
lea ecx,[ebp+edx]
lea eax,[ebp+eax*04h]
@@DECIDE_NEXT_STEP:
xor edx,edx
mov [esp+__FREE0],esi
* add edx,[esi]
* jnz @nx2mtx
mov ebx,eax
sub eax,ebp
* add edx,[edi+eax*02h+__0STAR]
* jnz @@3RD_STEP
@@4TH_STEP:
sub edx,ebp
jmp @newstar
@0_star:
mov [edi+ebx*02h+__0STAR],ecx
mov ecx,[edi+eax*02h+__COLON]
@newstar:
mov ebx,eax
lea eax,[edx-04h]
@starincol:
* cmp [edi+eax*02h+__0STAR],ecx
* jz @0_star
* sub eax,04h
* jns @starincol
mov [edi+ebx*02h+__0STAR],ecx
@@1ST_STEP:
sub dword ptr [esp+__SYS0],01h
mov ebx,edi
mov ecx,ebp
jz @count_result_STACK
mov edx,[edi]
@restructure:
mov esi,[ebx+__0STAR]
mov byte ptr [edi+ecx+03h],00h
add ebx,08h
mov byte ptr [edi+esi],01h
* add ecx,04h
* jnz @restructure
mov [edi],edx
jmp @@2ND_STEP
@count_result_STACK:
xor ecx,ecx
neg ebp
xor eax,eax
mov esi,[esp+__SAVE]
mov ebx,[esp+__MARKS]
add esp,20h
@results:
mov edx,[edi+ecx*02h+__0STAR]
add ecx,04h
add edx,ebp
add eax,[esi+edx]
shr edx,02h
add esi,ebp
cmp ecx,ebp
mov [ebx],dl
lea ebx,[ebx+01h]
jnz @results

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Copyright © 2000-2024 PROHARDVER Informatikai Kft.