Hirdetés

2024. április 16., kedd

Gyorskeresés

Hozzászólások

(#101) P.H.


P.H.
senior tag

Továbbra is egy szálas program.

Core2 (2.5 GHz): 60 sec alatt 507000 mátrix
Sandy Bridge (G1620 2.7 GHz): a K10-es ciklusverzióval 59 sec, a Core2-essel 60 sec

Akkor is meg fogja oldani 2500 MHz-en bármely Core egy szálon ezt a feladatot; ha kell, fél évig mászok fel hetente 1000 mátrixot, de meg fogja.

{@04-} { x1 } movsx ecx,byte ptr es:[edx]
{1-} xor eax,eax
{2-} mov esi,ebp
{0} and esi,-8
@init:
{@0F} mov [edi+esi*08h+(00h*08h)+__0STARROW],eax
{1} mov [edi+esi*08h+(04h*08h)+__0STARROW],eax
{2*} add esi,08h
{0*} jnz @init { clears ESI register }
{ } add edx,01h
{ -} mov ebx,ebp
@@ARGUMENT: { K10:2.6 Core2:2.9 - 3.3 uop/clk - 1640*2+6550 }
{@20} cmp ecx,esi { 4 AGU + 9 EX uops on Kaveri }
{1} lea eax,[ebx+ecx*04h] { 3 clk 8 ALU ops on Core 2 }
{2} movsx ecx,[edx]
{0} lea edx,[edx+01h] { db $8D,$52,$00 }
{1} mov [edi+eax*08h+__0STARROW],ebx { __0COUNTER <- EBP }
{2} cmovs eax,esi
{0} mov [edi+ebp*08h+__FIXEDROW],eax
{1*} add ebp,04h
{2*} jnz @@ARGUMENT { clears EBP register }
{ -} mov eax,edi
{ -} mov ebp,ebx
{ -} xor ecx,ecx
{@40} add esp,ebx
{ } lea edx,[ebx-04h]
@@REDUCE_ROWS:
{@45} mov [edi+edx*08h+__ROWMODIFIER],ecx
{1} mov esi,[edi+edx*08h+(04h*08h)+__FIXEDROW]
{2*} add edx,04h
{0*} jz @@REDUCE_COLUMNS
{1} mov [edi+edx*08h+__0STAR],esi
{2-} xor ecx,ecx
{0} sub eax,ebp
{1**} test esi,esi
{2**} js @@REDUCE_ROWS
{0-} mov ecx,ebp
{@60} @findrowmin: { 2 AGU + 5 EX uops on Kaveri }
{0} mov esi,[eax+ebx] { 2 clk 6 ALU ops on Core 2 }
{1} or esi,[edi+ebx*08h+__0STARROW]
{2} cmp esi,ecx
{0} cmovb ecx,esi
{1*} add ebx,04h
{2*} jnz @findrowmin
{@70-} mov ebx,ebp
{ } neg ecx
{ } jle @@REDUCE_ROWS
@@ABNORMAL_EXIT:
{@76} or edx,0FFFFFFFFh
{1} sub esp,ebp
{@7E} mov esi,[esp+__MARKS]
{0} mov [esi+TRESULT.OPTIMUM],edx
{1} mov ebx,[esi+TRESULT.NEXTIVALUE]
{2} jmp dword ptr [esp+_INVALIDRESULT]
{ x4 } xor eax,eax; xor edx,edx
{@90} @initcol:
{0} mov [edi+__INITCOL],ecx
{1-} mov esi,ebp
{2} neg ebp
{0} push ebp
{1} or ebx,-1
{2} jmp @@1ST_STEP { long jump instruction }
{@A0} { x2 } xor eax,eax
{@A2} @free0col:
{ } lea ecx,[edx-04h]
{@A5} @setcolmod:
{ } mov [edi+edx*08h+__COLMODIFIER],esi
@@REDUCE_COLUMNS: { no need to initialize -initcol in ECX }
{0**} cmp edx,ebp
{1**} jz @initcol
{0} sub edx,04h
{@B0-} xor esi,esi
{1**} test [edi+edx*08h+__0STARROW],ebp
{2**} js @setcolmod
{ } lea ebx,[edi+edx]
{ -} mov ecx,ebp
{ -} mov eax,ebp
{ } sub ebx,ebp
{@C0} @findcolmin:
{0} mov esi,[ebx] { 3 AGU + 8 EX uops on Kaveri }
{1} add esi,[edi+ecx*08h+__ROWMODIFIER] { 3 clk 9 ALU ops on Core 2 }
{2} or esi,[edi+ecx*08h+__FIXEDROW]
{0} jz @test0row
{1} sub ebx,ebp
{2} cmp esi,eax
{@D0} cmovb eax,esi
{1*} add ecx,04h
{2*} jnz @findcolmin
{ } lea ecx,[ebp-04h]
{ -} mov esi,eax
{ } lea ebx,[edi+edx]
{@E0**} test eax,eax { JS/JNS can only fuse with TEST }
{ **} js @@ABNORMAL_EXIT
{@E4} @seekcol0:
{0} mov eax,[edi+ecx*08h+(04h*08h)+__ROWMODIFIER]
{1*} add ecx,04h
{2*} jz @free0col
{0} sub ebx,ebp
{1} add eax,[ebx]
{@F1**} cmp eax,esi { maximum data value = 00FFFFFFh -> marked elements stay negative }
{0**} jnz @seekcol0
@test0row:
{ **} test [edi+ecx*08h+__0STAR],ebp
{ **} js @seekcol0
{ } mov [edi+edx*08h+__0STARROW],ecx
{@FE} mov [edi+ecx*08h+__0STAR],edx
{@02} jns @free0col { forced conditional jump for Sandy Bridge }
{ ----------------------------------------------------------------------------------------------- }
{@04} { x12 } mov eax,00000000h; mov edx,00000000h; xor ecx,ecx
{@10} { x2 } xor ebp,ebp
@@5TH_STEP: { K10:2.5 Core2:2.4 - 2.8 uop/clk - 1900*2+4800
{@12} mov eax,[edi+__INITCOL] { lea ebx,[ebp-04h] }
{1} mov esi,[esp+__SIZE]
{2} add eax,04h
{0} movsx ebx,word ptr [edi+__MINCOLROW]
{@20} @DEC5_free_col: { 3 AGU + 6 EX uops on Kaveri }
{0} mov ebp,[edi+eax*08h+__COLMARK] { 2 clk 5 ALU ops on Core 2 }
{1} sar ebp,1Fh
{2} and ebp,edx
{0} add [edi+eax*08h+__COLMODIFIER],ebp
{1*} add eax,04h
{@30*} jnz @DEC5_free_col { clears EAX register }
{ } mov eax,[esp+__SIZE+esi*04h]
{ } movsx ecx,word ptr [edi+__MINCOLROW+02h]
{ } jmp @INC5_marked_row
{ x4 } xor ebp,ebp; xor esi,esi
{@40} @inc5row:
{0} add [edi+eax*08h+__ROWMODIFIER],edx { 4 AGU + 4 EX uops on Kaveri }
{1-} mov eax,ebp
@INC5_marked_row:
{2} mov ebp,[esp+esi*04h]
{0*} sub esi,01h
{1*} jge @inc5row { sets ESI to 0FFFFFFFFh }
@@3RD_STEP:
{@4E*} and esi,[edi+ebx*08h+__0STAR]
{@52*} jz @4TH_STEP { long jump instruction }
{@58} @re3start:
{ } mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
{ } { x1 } mov ecx,es:[edi+__INITCOL] { lea ecx,es:[ebp-04h] }
{@60-} mov edx,ebx
{@62} @mark3row:
{ } mov [esp+__OFFS+eax*04h],ebx
{ -} xor ebx,ebx
{ } mov [edi+esi*08h+__COLMARK],esi { unmark column with negative }
{ } add dword ptr [esp+__SIZE],01h
{@71} @chk2col:
{0*} add ecx,04h
{1*} jz @@5TH_STEP
{2**} test [edi+ecx*08h+__COLMARK],ecx { STORE FORWARDED from @mark3row }
{0**} jns @chk2col
@@2ND_STEP:
{12} push dword ptr [edi+ecx*08h+__COLMODIFIER]
{@80} lea eax,[ecx+edi]
{ } sub ebx,ebp
{ } sal ecx,10h
{ } mov esi,[edi+ebx*08h+__ROWMODIFIER]
{@8C} @ZERO2col: { K10:3.0 Core2:2.5 - 2.9 uop/clk - 1500*2+5600 { 4 AGU + 11 EX uops on Kaveri }
{0} sub esi,[esp+00h] { 4 clk 13 ALU ops on Core 2 }
{@8F} add esi,[eax+ebp]
{C2D} lea eax,[eax+ebp]
{2} jo @over2flow { overflow: (-x)+(-y)=(+z) or (+x)+(+y)=(-z) }
{0} or esi,[edi+ebx*08h+__0COLON___ROWMARK]
{1} jz @zero
{K10}// lea eax,[eax+ebp]
{0} cmp esi,edx
{@9F} cmovb edx,esi
{@A2} cmovb cx,bx
@over2flow:
{0} mov esi,[edi+ebx*08h+(04h*08h)+__ROWMODIFIER]
{1*} add ebx,04h
{2*} jnz @ZERO2col
{@AF} @zero:
{0} pop eax { add esp,04h } { forces ESP handling to AGU/memory pipe on Kaveri/Core }
{@B0-} mov eax,ecx
{2} sar ecx,10h
{0} cmovnc eax,[edi+__MINCOLROW]
{1} mov [edi+__MINCOLROW],eax
{2**} test ebx,ebx
{0**} jz @chk2col
{@C0*} add esi,[edi+ebx*08h+__0STAR] { zero found -> ESI=0 }
{2*} jz @4TH_STEP
{0} mov eax,[esp+__SIZE]
{1**} cmp word ptr [edi+__MINCOLROW],bx { STORE FORWARDED }
{2**} jz @re3start
{@D0} cmp esi,ecx
{1} mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
{2} cmovl ecx,esi
{0*} sub ecx,04h { never clears ECX register }
{1*} jnz @mark3row { forced conditional jump for Sandy Bridge }
{ x2 } xor esi,esi
{@E0} { x4 } lea eax,[ebp+ebp+00h]
@@4TH_STEP: { 5 AGU + 3 EX uops on Kaveri }
{@E4-} mov ebx,edx
@4TH_STEP:
{@E6} mov edx,[edi+ecx*08h+__0STARROW]
{2} mov [edi+ebx*08h+__0STAR],ecx
{0} mov [edi+ecx*08h+__0STARROW],ebx
{@F0} mov ecx,[edi+edx*08h+__0COLON___ROWMARK]
{2**} cmp edx,00h
{0**} jnz @@4TH_STEP
{ } sub esi,ebp
{ } sub edx,ebp
{ } lea ecx,[esi-04h] { mov ecx,[edi+__INITCOL] }
@@1ST_STEP: { K10:2.8 Core2:2.9 - 3.2 uop/clk - 1500*2+6100 }
{@00} mov eax,[edi+esi*08h+__0STARROW] { 4 AGU + 7 EX uops on Kaveri }
{1} and ebx,eax { 2 clk 6 ALU ops on Core 2 }
{2} not eax
{0} mov [edi+esi*08h+__COLMARK],eax
{1} mov eax,[edi+esi*08h+__FIXEDROW]
{2} cmovs ecx,esi
{0} mov [edi+esi*08h+__0COLON___ROWMARK],eax
{1*} add esi,04h
{2*} jnz @@1ST_STEP { clears ESI register }
{ } mov [esp+__SIZE],esi
{ -} xor ebx,ebx
{@21*} add ecx,04h { long jump instruction }
{ *} jnz @@2ND_STEP { ===>>> EBX: 00h EDX:negative ECX:initcol (>= EBP) }
{ } mov esi,[esp+ebp+04h+__MARKS]
{ -} mov ebx,edi { work matrix unmodified } { [esp+__SAVE] }
@@results:

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

(#102) P.H. válasza P.H. (#101) üzenetére


P.H.
senior tag

Eheti 1000-mátrix-hetente rovat :)

Core2 (2.5 GHz): 60 sec alatt 510000 mátrix

{@04-} { x1 } movsx ebx,byte ptr es:[edx]
{1-} xor eax,eax
{2-} mov esi,ebp
{0} and esi,-8
@init:
{@0F} mov [edi+esi*08h+(00h*08h)+__0STARROW],eax
{1} mov [edi+esi*08h+(04h*08h)+__0STARROW],eax
{2*} add esi,08h
{0*} jnz @init { clears ESI register }
{ } add edx,01h
{ -} mov ecx,ebp
@@ARGUMENT: { K10:2.6 Core2:2.9 - 3.3 uop/clk - 1640*2+6550 }
{@20} cmp ebx,esi { 4 AGU + 9 EX uops on Kaveri }
{1} lea eax,[ebp+ebx*04h+00h] { 3 clk 8 ALU ops on Core 2 }
{2} movsx ebx,[edx]
{0} lea edx,[edx+01h]
{1} mov [edi+eax*08h+__0STARROW],ebp { __0COUNTER <- EBP }
{2} cmovs eax,esi
{0} mov [edi+ecx*08h+__FIXEDROW],eax
{1*} add ecx,04h
{2*} jnz @@ARGUMENT { clears ECX register }
{ } add esp,ebp
{ -} mov eax,edi
{ -} push ebp
{@40-} lea edx,[ebp-04h]
@@REDUCE_ROWS:
{@43} mov [edi+edx*08h+__ROWMODIFIER],ecx
{1} mov esi,[edi+edx*08h+(04h*08h)+__FIXEDROW]
{2*} add edx,04h
{0*} jz @@REDUCE_COLUMNS
{@50} mov [edi+edx*08h+__0STAR],esi
{2-} xor ecx,ecx
{0} sub eax,ebp
{1**} test esi,esi { JS/JNS can only fuse with TEST }
{2**} js @@REDUCE_ROWS
{ -} mov ebx,ebp { EBX < 0 for even minimum }
{ } mov ecx,[eax+ebp]
{@61} or ecx,[edi+ebp*08h+__0STARROW]
{ } and ebp,04h
{ } add ebp,ebx
{@69} @findrowmin: { K10:2.8 Core2:2.2 - 2.6 uop/clk - 1100*2+5000 }
{0} mov esi,[eax+ebp+00h] { 4 AGU + 8 EX uops on Kaveri }
{1} or esi,[edi+ebp*08h+(00h*08h)+__0STARROW] { 3 clk 10 ALU ops on Core 2 }
{2} add ebp,08h
{@72} cmp esi,ebx
{1} cmovb ebx,esi
{2} mov esi,[eax+ebp-04h]
{0} or esi,[edi+ebp*08h-(04h*08h)+__0STARROW]
{1} cmp esi,ecx
{@81} cmovb ecx,esi
{0**} test ebp,ebp
{1**} jnz @findrowmin
{ } mov ebp,[esp+00h]
{ } cmp ebx,ecx
{ } cmovb ecx,ebx
{@90} neg ecx
{ } jle @@REDUCE_ROWS
@@ABNORMAL_EXIT:
{@94} pop eax
{1} sub esp,ebp
{2} mov edx,0FFFFFFFFh
{0} mov esi,[esp+__MARKS]
{@A0} mov [esi+TRESULT.OPTIMUM],edx
{2} mov ebx,[esi+TRESULT.NEXTIVALUE]
{0} jmp dword ptr [esp+_INVALIDRESULT]
{ } { x6 } test ebp,0FFFFFFFFh
{@90} @initcol:
{0} neg dword ptr [esp+00h]
{1-} mov esi,ebp
{2} neg ebp
{0} mov [edi+__INITCOL],ecx
{1} or ebx,-1
{2} jmp @@1ST_STEP { long jump instruction }
{@A2} @free0col:
{ } lea ecx,[edx-04h]
{@A5} @setcolmod:
{ } mov [edi+edx*08h+__COLMODIFIER],esi
@@REDUCE_COLUMNS:
{1**} jz @initcol
{0} sub edx,04h
{@B0-} xor esi,esi
{1**} test [edi+edx*08h+__0STARROW],ebp
{2**} js @setcolmod
{ } lea ebx,[edi+edx]
{ -} mov ecx,ebp
{ -} mov eax,ebp
{ } sub ebx,ebp
{@C0} @findcolmin: { K10:3.0 Core2:_._ - _._ uop/clk - ____*2+____
{0} mov esi,[ebx] { 3 AGU + 8 EX uops on Kaveri }
{1} add esi,[edi+ecx*08h+__ROWMODIFIER] { 3 clk 9 ALU ops on Core 2 }
{2} or esi,[edi+ecx*08h+__FIXEDROW]
{0} jz @test0row
{1} sub ebx,ebp
{2} cmp esi,eax
{@D0} cmovb eax,esi
{1*} add ecx,04h
{2*} jnz @findcolmin
{ } lea ecx,[ebp-04h]
{ -} mov esi,eax
{ } lea ebx,[edi+edx]
{@E0**} test eax,eax { JS/JNS can only fuse with TEST }
{ **} js @@ABNORMAL_EXIT
{@E4} @seekcol0:
{0} mov eax,[edi+ecx*08h+(04h*08h)+__ROWMODIFIER]
{1*} add ecx,04h
{2*} jz @free0col
{0} sub ebx,ebp
{1} add eax,[ebx]
{@F1**} cmp eax,esi { maximum data value = 00FFFFFFh -> marked elements stay negative }
{0**} jnz @seekcol0
@test0row:
{ **} test [edi+ecx*08h+__0STAR],ebp
{ **} js @seekcol0
{ } mov [edi+edx*08h+__0STARROW],ecx
{@FE} mov [edi+ecx*08h+__0STAR],edx
{@02} jns @free0col { forced conditional jump for Sandy Bridge }
{ ----------------------------------------------------------------------------------------------- }
{@04} { x12 } mov eax,00000000h; mov edx,00000000h; xor ebp,ebp
{@10} { x5 } mov ecx,00000000h
@@5TH_STEP: { K10:2.6 Core2:2.4 - 2.8 uop/clk - 2000*2+5100
{@15} mov eax,[edi+__INITCOL] { lea eax,[ebp+04h]; neg eax }
{1} mov esi,[esp+__SIZE]
{2} movsx ebx,word ptr [edi+__MINCOLROW]
{@20} @DEC5_free_col: { 3 AGU + 6 EX uops on Kaveri }
{0} add [edi+eax*08h+__COLMODIFIER],ecx { 2 clk 5 ALU ops on Core 2 }
{1} mov ecx,[edi+eax*08h+(04h*08h)+__COLMARK]
{2} sar ecx,1Fh
{0} and ecx,edx
{1*} add eax,04h
{@30*} jnz @DEC5_free_col { clears EAX register [NOT USED] }
{ } mov eax,[esp+__SIZE+esi*04h]
{ } movsx ecx,word ptr [edi+__MINCOLROW+02h]
{ } jmp @INC5_marked_row
{ x4 } xor ebp,ebp; xor esi,esi
{@40} @inc5row:
{0} add [edi+eax*08h+__ROWMODIFIER],edx { 4 AGU + 4 EX uops on Kaveri }
{1-} mov eax,ebp
@INC5_marked_row:
{2} mov ebp,[esp+esi*04h]
{0*} sub esi,01h
{1*} jge @inc5row { sets ESI to 0FFFFFFFFh }
@@3RD_STEP:
{@4E*} and esi,[edi+ebx*08h+__0STAR]
{@52*} jz @4TH_STEP { long jump instruction }
{@58} @re3start:
{ } mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
{ } { x1 } mov ecx,es:[edi+__INITCOL] { lea ecx,es:[ebp-04h] }
{@60-} mov edx,ebx
{@62} @mark3row:
{ } mov [esp+__OFFS+eax*04h],ebx
{ -} xor ebx,ebx
{ } mov [edi+esi*08h+__COLMARK],esi { unmark column with negative }
{ } inc eax
{ } mov [esp+__SIZE],eax
{@71} @chk2col:
{0*} add ecx,04h
{1*} jz @@5TH_STEP { clears ECX register }
{2**} test [edi+ecx*08h+__COLMARK],ecx { STORE FORWARDED from @mark3row }
{0**} jns @chk2col
@@2ND_STEP:
{12} push dword ptr [edi+ecx*08h+__COLMODIFIER]
{@80} lea eax,[ecx+edi]
{ } sub ebx,ebp
{ } sal ecx,10h
{ } mov esi,[edi+ebx*08h+__ROWMODIFIER]
{@8C} @ZERO2col: { K10:3.0 Core2:2.5 - 2.9 uop/clk - 1500*2+5600 { 4 AGU + 11 EX uops on Kaveri }
{0} sub esi,[esp+00h] { 4 clk 13 ALU ops on Core 2 }
{@8F} add esi,[eax+ebp]
{C2D} lea eax,[eax+ebp] { Core 2, Kaveri }
{2} jo @over2flow { overflow: (-x)+(-y)=(+z) or (+x)+(+y)=(-z) }
{0} or esi,[edi+ebx*08h+__0COLON___ROWMARK]
{1} jz @zero
{K10}// lea eax,[eax+ebp] { K10, Sandy Bridge, Ivy Bridge }
{0} cmp esi,edx
{@9F} cmovb edx,esi
{@A2} cmovb cx,bx
@over2flow:
{0} mov esi,[edi+ebx*08h+(04h*08h)+__ROWMODIFIER]
{1*} add ebx,04h
{2*} jnz @ZERO2col { clears EBX register }
{@AF} @zero:
{0} pop eax { add esp,04h } { forces ESP handling to AGU/memory pipe on Kaveri/Core }
{@B0-} mov eax,ecx
{2} sar ecx,10h
{0} cmovnc eax,[edi+__MINCOLROW]
{1} mov [edi+__MINCOLROW],eax
{2**} test ebx,ebx
{0**} jz @chk2col
{@C0*} add esi,[edi+ebx*08h+__0STAR] { zero found -> ESI=0 }
{2*} jz @4TH_STEP
{0} cmp ax,bx
{1} { x1 } mov eax,ss:[esp+__SIZE]
{2} jz @re3start
{@D0} cmp esi,ecx
{1} mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
{2} cmovl ecx,esi
{0*} sub ecx,04h { never clears ECX register }
{1*} jnz @mark3row { forced conditional jump for Sandy Bridge }
{ x2 } xor esi,esi
{@E0} { x4 } lea eax,[ebp+ebp+00h]
@@4TH_STEP: { 5 AGU + 3 EX uops on Kaveri }
{@E4-} mov ebx,edx { 2 clk 2 ALU ops on Core 2 }
@4TH_STEP:
{@E6} mov edx,[edi+ecx*08h+__0STARROW]
{2} mov [edi+ebx*08h+__0STAR],ecx
{0} mov [edi+ecx*08h+__0STARROW],ebx
{@F0} mov ecx,[edi+edx*08h+__0COLON___ROWMARK]
{2**} cmp edx,00h
{0**} jnz @@4TH_STEP { clears EDX register }
{ } sub esi,ebp
{ } sub edx,ebp
{ } lea ecx,[esi-04h] { mov ecx,[edi+__INITCOL] }
@@1ST_STEP: { K10:2.8 Core2:2.9 - 3.2 uop/clk - 1500*2+6100 }
{@00} mov eax,[edi+esi*08h+__0STARROW] { 4 AGU + 7 EX uops on Kaveri }
{1} and ebx,eax { 3 clk 6 ALU ops on Core 2 }
{2} not eax
{0} mov [edi+esi*08h+__COLMARK],eax
{1} mov eax,[edi+esi*08h+__FIXEDROW]
{2} cmovs ecx,esi
{0} mov [edi+esi*08h+__0COLON___ROWMARK],eax
{1*} add esi,04h
{2*} jnz @@1ST_STEP { clears ESI register }
{ } mov [esp+__SIZE],esi
{ -} xor ebx,ebx
{@21*} add ecx,04h { long jump instruction }
{ *} jnz @@2ND_STEP { ===>>> EBX: 00h EDX:negative ECX:initcol (>= EBP) }
{ } mov esi,[esp+ebp+04h+__MARKS]
{ -} mov ebx,edi { work matrix unmodified } { [esp+__SAVE] }
@@results:
{@30} mov eax,[edi+edx*08h+__0STAR] { 3 AGU + 8 EX uops on Kaveri }
{1} add ebx,ebp
{2} add ecx,[ebx+eax]
{0} add eax,ebp
{1} shr eax,02h
{2} mov [esi],al
{@40} add esi,01h
{1*} add edx,04h
{2*} jnz @@results { clears EDX register ( DL=0 as head, DH=0 as length ) }
{0} pop eax
{1} add esp,ebp
{2} neg ebp
{0} or eax,-1
{@50} lea ebx,[edi+ebp*04h]
{1} sar ebp,02h
{2} mov [esi+ebp+TRESULT.OPTIMUM],ecx
{0} add esi,ebp
{1-} xor ecx,ecx
{2} jmp @onchain

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

(#103) P.H. válasza P.H. (#102) üzenetére


P.H.
senior tag

Core2 (2.5 GHz): 59 sec alatt megoldja a feladatot
K10 (2.9 GHz): Core2-nek tetsző ciklusverzióval 46 sec oldja meg a feladatot
Prescott (2.26 GHz): 60 sec alatt 220000 mátrix

{@04-} { x1 } movsx ebx,byte ptr es:[edx]
{1-} xor eax,eax
{2-} mov esi,ebp
{0} and esi,-8
@init:
{@0F} mov [edi+esi*08h+(00h*08h)+__0STARROW],eax
{1} mov [edi+esi*08h+(04h*08h)+__0STARROW],eax
{2*} add esi,08h
{0*} jnz @init { clears ESI register }
{ } add edx,01h
{ -} mov ecx,ebp
@@ARGUMENT: { K10:2.6 Core2:2.9 - 3.3 uop/clk - 1640*2+6550 }
{@20} cmp ebx,esi { 4 AGU + 9 EX uops on Kaveri }
{1} lea eax,[ebp+ebx*04h+00h] { 3 clk 8 ALU ops on Core 2 }
{2} movsx ebx,[edx]
{0} lea edx,[edx+01h]
{1} mov [edi+eax*08h+__0STARROW],ebp { __0COUNTER <- EBP }
{2} cmovs eax,esi
{0} mov [edi+ecx*08h+__FIXEDROW],eax
{1*} add ecx,04h
{2*} jnz @@ARGUMENT { clears ECX register }
{ -} { x2 } xor ecx,ecx
{ -} mov eax,edi
{ -} push ebp
{@40-} lea edx,[ebp-04h]
@@REDUCE_ROWS:
{@43} mov [edi+edx*08h+__ROWMODIFIER],ecx
{1} mov esi,[edi+edx*08h+(04h*08h)+__FIXEDROW]
{2*} add edx,04h
{0*} jz @@REDUCE_COLUMNS
{@50} mov [edi+edx*08h+__0STAR],esi
{2-} xor ecx,ecx
{0} sub eax,ebp
{1**} test esi,esi { JS/JNS can only fuse with TEST }
{2**} js @@REDUCE_ROWS
{ -} mov ebx,ebp { EBX < 0 for even minimum }
{ } mov ecx,[eax+ebp]
{@61} or ecx,[edi+ebp*08h+__0STARROW]
{ } and ebp,04h
{ } add ebp,ebx
{@69} @findrowmin: { K10:2.8 Core2:2.2 - 2.6 uop/clk - 1100*2+5000 }
{0} mov esi,[eax+ebp+00h] { 4 AGU + 8 EX uops on Kaveri }
{1} or esi,[edi+ebp*08h+(00h*08h)+__0STARROW] { 3 clk 10 ALU ops on Core 2 }
{2} add ebp,08h
{@72} cmp esi,ebx
{1} cmovb ebx,esi
{2} mov esi,[eax+ebp-04h]
{0} or esi,[edi+ebp*08h-(04h*08h)+__0STARROW]
{1} cmp esi,ecx
{@81} cmovb ecx,esi
{0**} test ebp,ebp
{1**} jnz @findrowmin
{ } mov ebp,[esp+00h]
{ } cmp ebx,ecx
{ } cmovb ecx,ebx
{@90} neg ecx
{ } jle @@REDUCE_ROWS
{ -} nop
@@ABNORMAL_EXIT:
{@95} pop eax
{1} or edx,-1
{2} mov esi,[esp+__MARKS]
{0} mov [esi+TRESULT.OPTIMUM],edx
{@A0} mov ebx,[esi+TRESULT.NEXTIVALUE]
{2} jmp dword ptr [esp+_INVALIDRESULT]
{ } { x6 } test ebp,0FFFFFFFFh
{@AD} @init0col:
{0} mov [edi+__INITCOL],ecx
{@B0-} mov esi,ebp
{2} neg ebp
{0} or ebx,-1
{1*} sub ecx,04h
{2*} jnz @@1ST_STEP { long jump instruction } { forced conditional jump for Sandy Bridge }
{@C0} { x3 } cmp ebp,00h
{@C3} @free0col:
{ -} mov ecx,edx
{@C5} @setcolmod:
{ } mov [edi+edx*08h+__COLMODIFIER],esi
@@REDUCE_COLUMNS: { no need to initialize -initcol in ECX }
{0**} cmp edx,ebp
{1**} jz @init0col
{0} sub edx,04h
{@D0-} xor esi,esi
{1**} test [edi+edx*08h+__0STARROW],ebp
{2**} js @setcolmod
{ } lea ebx,[edi+edx]
{ -} mov ecx,ebp
{ -} mov eax,ebp
{ } sub ebx,ebp
{@E0} @findcolmin: { K10:3.0 Core2:_._ - _._ uop/clk - ____*2+____
{0} mov esi,[ebx] { 3 AGU + 8 EX uops on Kaveri }
{1} add esi,[edi+ecx*08h+__ROWMODIFIER] { 3 clk 9 ALU ops on Core 2 }
{2} or esi,[edi+ecx*08h+__FIXEDROW]
{0} jz @test0row
{1} sub ebx,ebp
{2} cmp esi,eax
{@F0} cmovb eax,esi
{1*} add ecx,04h
{2*} jnz @findcolmin
{ } lea ecx,[ebp-04h]
{ -} mov esi,eax
{ } lea ebx,[edi+edx]
{@00**} test eax,eax { JS/JNS can only fuse with TEST }
{ **} js @@ABNORMAL_EXIT
{@04} @seekcol0:
{0} mov eax,[edi+ecx*08h+(04h*08h)+__ROWMODIFIER]
{1*} add ecx,04h
{2*} jz @free0col
{0} sub ebx,ebp
{1} add eax,[ebx]
{@11**} cmp eax,esi { maximum data value = 00FFFFFFh -> marked elements stay negative }
{0**} jnz @seekcol0
@test0row:
{ **} test [edi+ecx*08h+__0STAR],ebp
{ **} js @seekcol0
{ } mov [edi+edx*08h+__0STARROW],ecx
{@1E} mov [edi+ecx*08h+__0STAR],edx
{@22} jns @free0col { forced conditional jump for Sandy Bridge }
{ ----------------------------------------------------------------------------------------------- }
{@24} { x12 } test ebp,0FFFFFFFFh; test edi,0FFFFFFFFh
{@30} { x9 } mov ecx,00000000h; xor esi,esi; xor edi,edi
@@5TH_STEP: { K10:2.6 Core2:_._ - _._ uop/clk - ____*2+____
{@39} mov ecx,[edi+__MINCOLROW]
{ } sub ebx,ebp
{ } neg edx
{@40} @DEC5_free_col: { 5 AGU + 11 EX uops on Kaveri }
{0} mov eax,[edi+ebx*08h+__COLMARK] { 3 clk 8 ALU ops on Core 2 }
{1} sar eax,1Fh
{2} mov [edi+ebx*08h+__COLMARK],eax
{0} and eax,edx
{1} sub [edi+ebx*08h+__COLMODIFIER],eax
{@51} mov eax,[edi+ebx*08h+__0COLON___ROWMARK]
{0} sar eax,1Fh
{1} and eax,edx
{2} sub [edi+ebx*08h+__ROWMODIFIER],eax
{0*} add ebx,04h
{@61*} jnz @DEC5_free_col { clears EBX register [NOT USED] }
{@63} movsx ebx,cx
{1} sar ecx,10h
{2} mov esi,[edi+ebx*08h+__0STAR]
{0**} cmp esi,00h
{@70**} jz @4TH_STEP { long jump instruction }
{2} mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
{0} mov dword ptr [edi+esi*08h+__COLMARK],0FFFFFFFFh { unmark column with -1 }
{1} mov esi,[edi+__INITCOL]
{@85} @mark3row:
{ -} xor ebx,ebx
{ } lea ecx,[esi-04h]
{ } jmp @chk2col
@pass2col:
{ } mov [edi+ecx*08h+__COLMARK],ecx { re-mark column with column index <> -1 }
{@90} @chk2col:
{0*} add ecx,04h
{1*} jz @@5TH_STEP { clears ECX register }
{2**} cmp [edi+ecx*08h+__COLMARK],ecx
{0**} jbe @chk2col
@@2ND_STEP:
{ } lea eax,[ecx+edi]
{ } sub ebx,ebp
@continue:
{@A0} { x1 } push dword ptr es:[edi+ecx*08h+__COLMODIFIER]
{ } sal ecx,10h
{ } mov esi,[edi+ebx*08h+__ROWMODIFIER]
{@AC} @ZERO2col: { K10:3.0 Core2:2.5 - 2.9 uop/clk - 1500*2+5600 { 4 AGU + 11 EX uops on Kaveri }
{0} sub esi,[esp+00h] { 4 clk 13 ALU ops on Core 2 }
{@AF} add esi,[eax+ebp]
{C2D} lea eax,[eax+ebp]
{2} jo @over2flow { overflow: (-x)+(-y)=(+z) or (+x)+(+y)=(-z) }
{0} or esi,[edi+ebx*08h+__0COLON___ROWMARK]
{1} jz @@3RD_STEP
{K10}// lea eax,[eax+ebp]
{0} cmp esi,edx
{@BF} cmovb edx,esi
{@C2} cmovb cx,bx
@over2flow:
{0} mov esi,[edi+ebx*08h+(04h*08h)+__ROWMODIFIER]
{1*} add ebx,04h
{2*} jnz @ZERO2col { clears EBX register }
@@3RD_STEP:
{@CF} pop esi { add esp,04h } { enforces ESP handling to AGU/load pipe on Kaveri/Core }
{@D0-} mov esi,ecx
{2} sar ecx,10h
{0} cmovnc esi,[edi+__MINCOLROW]
{1} mov [edi+__MINCOLROW],esi
{2**} test ebx,ebx
{0**} jz @pass2col
{@E0} mov esi,[edi+ebx*08h+__0STAR]
{2**} test esi,esi
{0**} jz @4TH_STEP
{1} mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
{2} or dword ptr [edi+esi*08h+__COLMARK],-1 { unmark column with -1 }
{@F1**} cmp word ptr [edi+__MINCOLROW],bx
{1**} jz @re2start
{2**} cmp esi,ecx { jb = jl for 2 negative numbers }
{0**} jb @mark3row
{1*} add ebx,04h
{2*} jnz @continue
{@00} jmp @pass2col
{1} { x2 } xor eax,eax
{@04} @re2start:
{0} mov ecx,[edi+__INITCOL]
{1-} mov ebx,ebp
{2} neg ebx
@initcol:
{0} sar dword ptr [edi+ebx*08h+__COLMARK],1Fh
{@10*} add ebx,04h
{2*} jnz @initcol { clears EBX register }
{ } or edx,-1
{ *} sub ecx,04h
{ *} jnz @chk2col { long jump instruction }
{@20} { x4 } lea eax,[ebp+ebp+00h]
@@4TH_STEP: { 5 AGU + 3 EX uops on Kaveri }
{@24-} mov ebx,edx { 2 clk 2 ALU ops on Core 2 }
@4TH_STEP:
{@26} mov edx,[edi+ecx*08h+__0STARROW]
{2} mov [edi+ebx*08h+__0STAR],ecx
{0} mov [edi+ecx*08h+__0STARROW],ebx
{@30} mov ecx,[edi+edx*08h+__0COLON___ROWMARK]
{2**} cmp edx,00h
{0**} jnz @@4TH_STEP { clears EDX register }
{ } sub esi,ebp
{ } sub edx,ebp
{ } lea ecx,[esi-04h]
@@1ST_STEP: { K10:2.8 Core2:2.9 - 3.2 uop/clk - 1500*2+6100 }
{@40} mov eax,[edi+esi*08h+__0STARROW] { 4 AGU + 7 EX uops on Kaveri }
{1} and ebx,eax { 3 clk 6 ALU ops on Core 2 }
{2} not eax
{0} mov [edi+esi*08h+__COLMARK],eax
{1} mov eax,[edi+esi*08h+__FIXEDROW]
{2} cmovs ecx,esi
{0} mov [edi+esi*08h+__0COLON___ROWMARK],eax
{1*} add esi,04h
{2*} jnz @@1ST_STEP { clears ESI register }
{ } { x3 } mov ebx,00000000h
{@60*} add ecx,04h { long jump instruction }
{ *} jnz @@2ND_STEP { ===>>> EBX: 00h EDX:negative = -EBP ECX:initcol (>= EBP) }
{ } { x1 } mov esi,ss:[esp+04h+__MARKS]
{ -} mov ebx,edi { work matrix unmodified } { [esp+__SAVE] }
@@results:
{@70} mov eax,[edi+edx*08h+__0STAR] { 3 AGU + 8 EX uops on Kaveri }
{1} add ebx,ebp
{2} add ecx,[ebx+eax]
{0} add eax,ebp
{1} shr eax,02h
{2} mov [esi],al
{@80} add esi,01h
{1*} add edx,04h
{2*} jnz @@results { clears EDX register ( DL=0 as head, DH=0 as length ) }

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

(#104) P.H.


P.H.
senior tag

Több, mint 1 Kbyte-os (1124 karakter) Java utasítás: arg egész pozitív számot szöveggé konvertálja dest tömbbe i pozíciótól kezdődően, legfejlebb size karakter méretben; 0-k helyett space-ekkel felvezetve, ha a szám kevesebb karakterből áll, mint size.

public static byte[] inttoSPACEstr(int arg, int size, int i, byte[] dest) {
int x, j, nr, z;


dest[(i=(i=(i=(i=(i=(i=(i=(
i+=((dest[i]=(byte)((((z =(j=((nr=1+(int)(((x=(int)(((arg=(arg^(x=arg>>31))-x)*0x00000000A7C5AC47L)>>>32)>>>16)*0x00000000D1B71759L)>>>30))>>>15)))-1)&0xFFFFFFF0)+j+48)) | (10-1-size))>>>31)
+(((dest[i]=(byte)((((z|=(j=((nr=(nr&0x00007FFF)*5)>>>14)))-1)&0xFFFFFFF0)+j+48)) | (10-2-size)) >>>31))
+(((dest[i]=(byte)((((z|=(j=((nr=(nr&0x00003FFF)*5)>>>13)))-1)&0xFFFFFFF0)+j+48)) | (10-3-size)) >>>31))
+(((dest[i]=(byte)((((z|=(j=((nr=(nr&0x00001FFF)*5)>>>12)))-1)&0xFFFFFFF0)+j+48)) | (10-4-size)) >>>31))
+(((dest[i]=(byte)((((z|=(j=(( (nr&0x00000FFF)*5)>>>11)))-1)&0xFFFFFFF0)+j+48)) | (10-5-size)) >>>31))
+(((dest[i]=(byte)((((z|=(j=((nr=1+((int)(((arg-x*100000)*0x00000000D1B71759L)>>>30)))>>>15)))-1)&0xFFFFFFF0)+j+48)) | (10-6-size)) >>>31))
+(((dest[i]=(byte)((((z|=(j=((nr=(nr&0x00007FFF)*5)>>>14)))-1)&0xFFFFFFF0)+j+48)) | (10-7-size)) >>>31))
+(((dest[i]=(byte)((((z|=(j=((nr=(nr&0x00003FFF)*5)>>>13)))-1)&0xFFFFFFF0)+j+48)) | (10-8-size)) >>>31))
+(((dest[i]=(byte)((((z| (j=((nr=(nr&0x00001FFF)*5)>>>12)))-1)&0xFFFFFFF0)+j+48)) | (10-9-size)) >>>31)]
=(byte)((((nr&0x00000FFF)*5)>>>11)+48);


return dest; }

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

(#105) P.H.


P.H.
senior tag

Nincs többé külön K10-nek tetsző és külön Core2-nek tetsző verzió: egyetlen van, amely mindkettőn jó, kb. 0.5% veszteséggel.

Core2 (2.5 GHz): 58 sec alatt megoldja a feladatot
K10 (2.9 GHz): 44 sec alatt megoldja a feladatot
Prescott (2.26 GHz): 60 sec alatt 220000 mátrix
Northwood (2.4 GHz): 60 sec alatt 248000 mátrix

{@04-} { x1 } movsx ebx,byte ptr es:[edx]
{1-} mov esi,ebp
{2-} xor eax,eax
{0} and esi,-8
@init:
{@0F} mov [edi+esi*08h+(00h*08h)+__0STARROW],eax
{1} mov [edi+esi*08h+(04h*08h)+__0STARROW],eax
{2*} add esi,08h
{0*} jnz @init { clears ESI register }
{ } add edx,01h
{ -} mov ecx,ebp
@@ARGUMENT: { K10:2.6 Core2:2.9 - 3.3 uop/clk - 1640*2+6550 }
{@20} cmp ebx,esi { 4 AGU + 9 EX uops on Kaveri }
{1} lea eax,[ebp+ebx*04h+00h] { 3 clk 8 ALU ops on Core 2 }
{2} movsx ebx,byte ptr [edx]
{0} lea edx,[edx+01h] { db $8D,$52,$00 }
{1} mov [edi+eax*08h+__0STARROW],ebp { __0COUNTER <- EBP }
{2} cmovs eax,esi
{0} mov [edi+ecx*08h+__FIXEDROW],eax
{1*} add ecx,04h
{2*} jnz @@ARGUMENT { clears ECX register }
{ -} { x2 } xor ecx,ecx
{ -} mov eax,edi
{ -} push ebp
{@40-} lea edx,[ebp-04h]
@@REDUCE_ROWS:
{@43} mov [edi+edx*08h+__ROWMODIFIER],ecx
{1} mov esi,[edi+edx*08h+(04h*08h)+__FIXEDROW]
{2*} add edx,04h
{0*} jz @@REDUCE_COLUMNS
{@50} mov [edi+edx*08h+__0STAR],esi
{2-} xor ecx,ecx
{0} sub eax,ebp
{1**} test esi,esi { JS/JNS can only fuse with TEST }
{2**} js @@REDUCE_ROWS
{ -} mov ebx,ebp { EBX < 0 for even minimum }
{ } mov ecx,[eax+ebp]
{@61} or ecx,[edi+ebp*08h+__0STARROW]
{ } and ebp,04h
{ } add ebp,ebx
{@69} @findrowmin: { K10:2.8 Core2:2.2 - 2.6 uop/clk - 1100*2+5000 }
{0} mov esi,[eax+ebp] { 4 AGU + 8 EX uops on Kaveri }
{1} or esi,[edi+ebp*08h+(00h*08h)+__0STARROW] { 3 clk 10 ALU ops on Core 2 }
{2} add ebp,08h
{@72} cmp esi,ebx
{1} cmovb ebx,esi
{2} mov esi,[eax+ebp-04h]
{0} or esi,[edi+ebp*08h-(04h*08h)+__0STARROW]
{1} cmp esi,ecx
{@81} cmovb ecx,esi
{0**} test ebp,ebp
{1**} jnz @findrowmin
{ } mov ebp,[esp+00h]
{ } cmp ebx,ecx
{ } cmovb ecx,ebx
{@90} neg ecx
{ } jle @@REDUCE_ROWS
{ -} { x1 } nop
@@ABNORMAL_EXIT:
{@95} pop eax
{1} { x3 } or edx,0FFFFFFFFh
{2} mov esi,[esp+__MARKS]
{@A0} mov [esi+TRESULT.OPTIMUM],edx
{1} mov ebx,[esi+TRESULT.NEXTIVALUE]
{2} jmp dword ptr [esp+_INVALIDRESULT]
{ x1 } nop
{@AB} @init0col:
{0} mov [edi+__INITCOL],ecx
{1-} mov esi,ebp
{@B0} neg ebp
{0} mov ebx,0FFFFFFFFh
{1*} sub ecx,04h
{2*} jnz @@1ST_STEP { long jump instruction } { forced conditional jump for Sandy Bridge }
{@C0} { x2 } xor eax,eax
{@C2} @free0col:
{ } mov [edi+edx*08h+__COLMODIFIER],esi { no need to initialize __COLMODIFIER of fixed column }
{ -} mov ecx,edx
{@C8} @next0col:
{ **} cmp edx,ebp
{ **} jz @init0col
@@REDUCE_COLUMNS: { no need to initialize -initcol in ECX }
{0} mov eax,[edi+edx*08h-(04h*08h)+__0STARROW]
{@D0} sub edx,04h
{2*} sub eax,01h
{0*} jnc @next0col
{ } { x1 } lea ebx,es:[edi+edx]
{ -} mov ecx,ebp
{ } sub ebx,ebp
{@E0} @findcolmin: { K10:3.0 Core2:_._ - _._ uop/clk - ____*2+____
{0} mov esi,[ebx] { 3 AGU + 8 EX uops on Kaveri }
{1} add esi,[edi+ecx*08h+__ROWMODIFIER] { 3 clk 9 ALU ops on Core 2 }
{2} or esi,[edi+ecx*08h+__FIXEDROW]
{0} jz @test0row
{1} sub ebx,ebp
{2} cmp esi,eax
{@F0} cmovb eax,esi
{1*} add ecx,04h
{2*} jnz @findcolmin
{ } lea ecx,[ebp-04h]
{ -} mov esi,eax
{ } lea ebx,[edi+edx]
{@00**} test eax,eax { JS/JNS can only fuse with TEST }
{ **} js @@ABNORMAL_EXIT
{@04} @seekcol0:
{0} mov eax,[edi+ecx*08h+(04h*08h)+__ROWMODIFIER]
{1*} add ecx,04h
{2*} jz @free0col
{0} sub ebx,ebp
{1} add eax,[ebx]
{@11**} cmp eax,esi { maximum data value = 00FFFFFFh -> marked elements stay negative }
{0**} jnz @seekcol0
@test0row:
{ **} test [edi+ecx*08h+__0STAR],ebp { JS/JNS can only fuse with TEST }
{ **} js @seekcol0
{ } mov [edi+edx*08h+__0STARROW],ecx
{@1E} mov [edi+ecx*08h+__0STAR],edx
{@22} jns @free0col { forced conditional jump for Sandy Bridge }
{ ----------------------------------------------------------------------------------------------- }
{@24} { x12 } mov eax,00000000h; mov edx,00000000h; xor ecx,ecx
{@30} { x5 } mov esi,00000000h
@@5TH_STEP: { K10:2.2 Core2:2.2 - 2.7 uop/clk - 3050*2+3700 }
{@35} movsx esi,word ptr [edi+__MINCOLROW+00h]
{ } sub ebx,ebp
{ } movsx eax,word ptr [edi+ebx*08h+__SIGN-(04h*08h)+__COLMARK]
{@40} @5th_step: { 5 AGU + 11 EX uops on Kaveri }
{0} movsx ecx,word ptr [edi+ebx*08h+__SIGN+__0COLON___ROWMARK] { 4 clk 6 ALU ops on Core 2 }
{1} mov [edi+ebx*08h-(04h*08h)+__COLMARK],eax
{2} and eax,edx
{0} add [edi+ebx*08h+__COLMODIFIER],eax
{@4F} and ecx,edx
{2} movsx eax,word ptr [edi+ebx*08h+__SIGN-(04h*08h)+(04h*08h)+__COLMARK] { __MINCOLROW col }
{0} add [edi+ebx*08h+__ROWMODIFIER],ecx
{1*} add ebx,04h
{2*} jnz @5th_step { clears EBX register }
{@5F} mov ecx,[edi+__INITCOL]
{1-} mov edx,esi
{2} mov esi,[edi+esi*08h+__0STAR]
{0**} test esi,esi
{1**} jz @@4TH_STEP { long jump instruction }
{@70} mov [edi+edx*08h+__0COLON___ROWMARK],eax { set row mark }
{0} mov dword ptr [edi+esi*08h-(04h*08h)+__COLMARK],-1 { unmark column with -1 }
{1} jmp @test2col
{ x2 } xor esi,esi
{@80} { x2 } xor eax,eax
{@82} @fast2forward:
{ *} add ebx,04h
{ *} jnz @continue
{@87} @pass2col:
{ } mov [edi+ecx*08h-(04h*08h)+__COLMARK],ecx { re-mark column with its index != -1 }
{@8B} @next2col:
{0*} add ecx,04h
{1*} jz @@5TH_STEP { clears ECX register }
@test2col:
{@90**} cmp [edi+ecx*08h-(04h*08h)+__COLMARK],ecx
{0**} jbe @next2col
@@2ND_STEP:
{ } sub ebx,ebp { ordered for Core2 }
{ } lea eax,[ecx+edi] { ordered for Core2 }
{@9B} @continue:
{ } mov esi,[edi+ecx*08h+__COLMODIFIER]
{ } push esi
{@A0} sal ecx,10h
{ } mov esi,[edi+ebx*08h+__ROWMODIFIER]
{@A7} @ZERO2col: { K10:3.0 Core2:2.5 - 2.9 uop/clk - 1500*2+5600 { 4 AGU + 11 EX uops on Kaveri }
{0} sub esi,[esp+00h] { 4 clk 13 ALU ops on Core 2 }
{1} add esi,[eax+ebp]
{2} lea eax,[eax+ebp]
{@B0} jo @over2flow { overflow: (-x)+(-y)=(+z) or (+x)+(+y)=(-z) }
{1} or esi,[edi+ebx*08h+__0COLON___ROWMARK]
{2} jz @@3RD_STEP
{0} cmp esi,edx
{1} cmovb edx,esi
{2} cmovb cx,bx
{@C1} @over2flow:
{0} mov esi,[edi+ebx*08h+(04h*08h)+__ROWMODIFIER]
{1*} add ebx,04h
{2*} jnz @ZERO2col { clears EBX register }
@@3RD_STEP:
{@CA} pop esi { add esp,04h } { enforces ESP tracking to AGU/load pipe on Bulldozer/Core }
{1-} mov esi,ecx
{2} sar ecx,10h
{@D0} cmovnc esi,[edi+__MINCOLROW]
{1} mov [edi+__MINCOLROW],esi
{2**} { x1 } cmp ebx,00h
{0**} jz @pass2col
{1} mov esi,[edi+ebx*08h+__0STAR]
{@E0**} test esi,esi
{0**} jz @4TH_STEP
{1} mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
{2} mov dword ptr [edi+esi*08h-(04h*08h)+__COLMARK],-1 { unmark column with -1 }
{@F0**} cmp word ptr [edi+__MINCOLROW],bx { STORE FORWARDED }
{1**} jz @re2start
{2**} cmp esi,ecx { jb = jl for 2 negative numbers }
{0**} jae @fast2forward
{1-} xor ebx,ebx
{2-} mov ecx,esi
{0} jmp @@2ND_STEP
{@00} @re2start:
{0} mov ecx,[edi+__INITCOL]
{1-} mov edx,ebx
{2-} mov ebx,ebp
{0} neg ebx
{@09} @init2col:
{0} movsx eax,word ptr [edi+ebx*08h+__SIGN-(04h*08h)+__COLMARK]
{1} mov [edi+ebx*08h-(04h*08h)+__COLMARK],eax
{2*} add ebx,04h
{0*} jnz @init2col { clears EBX register }
{ } jmp @test2col { long jump instruction }
{ } { x4 } lea eax,[ebp+ebp+00h]
{@20} { x2 } test edi,edi
@@4TH_STEP:
{@22-} mov ecx,eax
@re4order:
{@24-} mov ebx,edx
@4TH_STEP:
{@26} mov edx,[edi+ecx*08h+__0STARROW]
{2} mov [edi+ebx*08h+__0STAR],ecx
{0} mov [edi+ecx*08h+__0STARROW],ebx
{@30} mov ecx,[edi+edx*08h+__0COLON___ROWMARK]
{2**} { x1 } cmp edx,00h
{0**} jnz @re4order { clears EDX register }
{ } sub esi,ebp
{ } sub edx,ebp
{ } lea ecx,[esi-04h]
@@1ST_STEP: { K10:2.8 Core2:2.9 - 3.2 uop/clk - 1500*2+6100 }
{@40} mov eax,[edi+esi*08h+__0STARROW] { 4 AGU + 7 EX uops on Kaveri }
{1} and ebx,eax { clears EBX at uncomplete calculation } { 3 clk 6 ALU ops on Core 2 }
{2} not eax
{0} mov [edi+esi*08h-(04h*08h)+__COLMARK],eax
{1} mov eax,[edi+esi*08h+__FIXEDROW]
{2} cmovs ecx,esi
{0} mov [edi+esi*08h+__0COLON___ROWMARK],eax
{1*} add esi,04h
{2*} jnz @@1ST_STEP { clears ESI register [NOT USED] }
{ *} add ecx,04h { long jump instruction }
{ *} jnz @@2ND_STEP { ===>>> EBX: 00h EDX:negative = -EBP ECX:initcol (>= EBP) }
{ -} mov ebx,edi { work matrix unmodified } { [esp+__SAVE] }
{ } mov esi,[esp+04h+__MARKS]
@@results:
{@6A} mov eax,[edi+edx*08h+__0STAR] { 3 AGU + 8 EX uops on Kaveri }
{1} add ebx,ebp
{@70} add ecx,[ebx+eax]
{0} add eax,ebp
{1} shr eax,02h
{2} mov [esi],al
{0} add esi,01h
{1*} add edx,04h
{@80*} jnz @@results { clears EDX register ( DL=0 as head, DH=0 as length ) }

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

(#106) P.H. válasza P.H. (#92) üzenetére


P.H.
senior tag

Egy Pentium 4 1 nagyságrenddel nagyobb teljesítményre képes, mint egy Java-ban írt program ARM-on (legalábbis mert léteznek pointerek, nincs szigorú típusosság, nincs byte-nál automatikus előjeles kiterjesztés 4 byte-ra, amit le kell küzdeni, stb.); még úgy is, hogy a branch prediction success rate 87% körüli.

A ciklus ASM-ben:

@character:
{@65} { } mov [edi],al
{ } add edi,01h
@@DECODE:
{@6A} { } mov eax,[esp+_aMAXMINBITS]
@read_raw: { BPOS may be 20h since decreased soon }
{ } movzx ebp,byte ptr [esi]
{@70} { -} mov ecx,edx
{ } sub edx,(24+1)
{ } shl ebp,cl
{ } shr edx,1Fh
{ } add esi,edx
{ } lea edx,[ecx+edx*08h]
{ } or ebx,ebp
{@81} { **} cmp dl,al
{ **} jb @read_raw
@createABCcode:
{ } movzx ebp,bl
{ } movzx ecx,bh
{ } shr eax,10h
{ } { x1 } mov ebp,dword ptr es:[REVERSE2hi+ebp*04h]
{ } { x1 } add ebp,dword ptr es:[REVERSE2lo+ecx*04h]
{ -} mov ecx,eax
{@A0} { } mov eax,offset(EXT_AMINMAXCODE)
@seekABC:
{@A5} {0**} cmp [eax+ecx*08h+00h+_MAX],ebp
{1**} jnbe @foundABC
{2} cmp [eax+ecx*08h-08h+_MAX],ebp
{0} lea ecx,[ecx-02h]
{@B1} {1} jbe @seekABC
{ } add ecx,01h
@foundABC:
{@B6} {0} mov eax,[eax+ecx*08h+_MIN]
{1} shr ebp,cl
{2} { x1 } mov eax,es:[eax+ebp*04h]
{@C0} {0} neg ecx
{1} add ecx,10h
{2} sub edx,ecx
{0} shr ebx,cl
{1**} cmp eax,255
{2**} jna @character { SHORT jump instruction offset: -6Bh }
@repeatABC:
{@D0} {0-} mov ecx,eax
{1} movzx ebp,ah
{2} shr eax,10h
{0} jz @@SECTION
{1} sub dl,cl
{@E0} {2} and ebp,ebx
{0} shr ebx,cl
{1} add ebp,eax
{2} { x1 } mov eax,ss:[esp+_dMAXMINBITS]
{0} add edi,ebp
{1} neg ebp
{2} mov [esp+_MOVELEN],ebp
@read__raw: { BPOS may be 20h since decreased soon }
{@F3} {0} movzx ebp,byte ptr [esi]
{1-} mov ecx,edx
{2} sub edx,(24+1)
{0} shl ebp,cl
{1} shr edx,1Fh
{@00} {2} add esi,edx
{0} lea edx,[ecx+edx*08h]
{1} or ebx,ebp
{2**} cmp dl,al
{0**} jb @read__raw
@createDISTcode:
{ } movzx ebp,bl
{ } movzx ecx,bh
{@11} { } shr eax,10h
{ } { x1 } mov ebp,dword ptr es:[REVERSE2hi+ebp*04h]
{ } { x1 } add ebp,dword ptr es:[REVERSE2lo+ecx*04h]
{@24} { -} mov ecx,eax
{ } mov eax,offset(EXT_DMINMAXCODE)
@seekDIST:
{0**} cmp [eax+ecx*08h+00h+_MAX],ebp
{1**} jnbe @foundDIST
{@90} {2} cmp [eax+ecx*08h-08h+_MAX],ebp
{0} lea ecx,[ecx-02h]
{1} jbe @seekDIST
{ } add ecx,01h
@foundDIST:
{0} mov eax,[eax+ecx*08h+_MIN]
{@A0} {1} shr ebp,cl
{2} mov eax,[eax+ebp*04h]
{0} neg ecx
{1} add ecx,10h
{2} sub edx,ecx
{0} shr ebx,cl
{1**} cmp al,00h
{@70} {2**} js @xxx
{0} jz @yyy
@DISTbits: { BPOS may be 20h since decreased soon }
{0} movzx ebp,byte ptr [esi]
{1-} mov ecx,edx
{2} sub edx,(24+1)
{@80} {0} shr edx,1Fh
{1} add esi,edx
{2} lea edx,[ecx+edx*08h]
{0} shl ebp,cl
{1} or ebx,ebp
{2**} cmp edx,16
{0**} jb @DISTbits
@srcposition:
{@91} {1} movzx ecx,ax
{2} sar eax,10h
{0} mov ebp,dword ptr [OFF+ecx*04h]
{1} sub edx,ecx
{@A0} {2} add eax,edi
{0} and ebp,ebx
{1} shr ebx,cl
@copy: ...

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

(#107) P.H. válasza P.H. (#106) üzenetére


P.H.
senior tag

és befele:

@program:
{0} movzx ecx,byte ptr [esi+00h]
@x:
{0**} cmp edx,ebp
{1**} jae @init
{2} add edx,01h
{0**} cmp cl,[esi+edx]
{1**} jz @x
@init:
{ **} cmp edx,(03+01)
{ **} jae @long_jump
{ -} xor ebp,ebp
{ } mov [esp+_Q],edi
{ } mov ch,cl
@repeat:
{0-} mov edi,eax
{1} and eax,(__WINDOW-1)
{2} sub edi,ebx
{0} mov eax,[esp+_PREV+eax*04h]
{1**} cmp edi,-(__WINDOW)
{2**} jbe @pre_encode { JLE = JBE for 2 negative numbers }
{0} add edi,esi
{1**} cmp ch,[edi+ebp]
{2**} jnz @repeat
{0**} cmp cl,[edi+00h]
{1**} jnz @repeat
{2-} xor edx,edx
@length:
{0**} cmp edx,[esp+...]
{1**} jae @QQ
{2} movzx ecx,byte ptr [esi+edx+01h]
{0} cmp cl,[edi+edx+01h]
{1} lea edx,[edx+01h]
{2} jz @length
{0} movzx ecx,byte ptr [esi+00h]
{1} sub edi,esi
{2} cmp edx,ebp
{0} cmova ebp,edx
{1} mov ch,[esi+ebp]
{2} jbe @repeat
{0} mov [esp+...],edi
{1} jmp @repeat
@QQ:
{0} movzx ecx,byte ptr [esi+00h]
{1} sub edi,esi
{2} mov ebp,00000028
{0} mov [esp+...],edi
{1} mov edi,[esp+_Q]
{2-} xor eax,eax
{0**} cmp edx,???
{1**} jz @QQQ
{2-} mov ebp,edx
@pre_encode:
{0} mov edi,[esp+_Q]
{1-} xor eax,eax
{2*} sub ebp,03h
{0*} jb @encode_alpha
{1} lea edx,[ebp+03h]
@QQQ:
{ } mov [esp+...],ebx
{ -} mov ebx,eax
{ } jz @indexed
{---} bsr ecx,ebp
{0*} sub ecx,02h
{1*} jle @indexed
{2} mov ebx,dword ptr [...+ecx*04h]
{0-} mov eax,ecx
{1} and ebx,ebp
{2} shr ebp,cl
{0} lea ebp,[ebp+ecx*04h+00h]
@indexed:
{0} mov ecx,dword ptr [...+ebp*04h]
{1} mov ebp,[esp+_ZIPPED]
{2} add al,cl
{0} shl ebx,cl
{1} shr ecx,10h
{2} or ebx,ecx
{0-} mov ecx,edi
{1} and ecx,07h
{2} shr edi,03h
{0} shl ebx,cl
{1} or bl,[edi+ebp]
{2} add ecx,eax
{0} mov eax,[esp+...]
@write:
{0} mov [edi+ebp],bl
{1} add edi,01h
{2} shr ebx,08h
{0*} sub ecx,08h
{1*} jge @write
{ } xor eax,-1
{ } mov ebp,00000005h
{ } lea edi,[edi*08h+ecx]
{ } jz @distindexed
{---} bsr ecx,eax
{0*} sub ecx,01h
{1*} js @distindexed
{2} mov ebx,dword ptr [...+ecx*04h]
{0} add ebp,ecx
{1} and ebx,eax
{2} shr eax,cl
{0} lea eax,[ecx*02h+eax]
{1} shl ebx,05h
@distindexed:
{0-} mov ecx,edi
{1} or ebx,dword ptr [...+eax*04h]
{2} mov eax,[esp+_ZIPPED]
{0} shr edi,03h
{1} and ecx,07h
{2} shl ebx,cl
{0} add ecx,ebp
{1} or bl,[edi+eax+00h]
{2} mov ebp,[esp+_HASH]
@write:
{0} mov [edi+eax],bl
{1} add edi,01h
{2} shr ebx,08h
{0*} sub ecx,08h
{1*} jge @write
{ } add esi,edx
{ } neg edx
{ } mov ebx,[esp+...]
{ -} mov eax,ebp
{ } lea edi,[edi*08h+ecx]
{ } jmp @administration
@encode_alpha:
{0} mov ebp,[esp+_ZIPPED]
{1} movzx edx,cl
{2-} mov ecx,edi
{1} mov edx,dword ptr [...+edx*04h]
{0} shr edi,03h
{2} and ecx,07h
{0-} mov eax,edx
{1} shr edx,10h
{2} shl edx,cl
{0} add cl,al
{1} mov eax,[esp+_HASH]
{2} or dl,[edi+ebp]
@write:
{0} mov [edi+ebp],dl
{1} add edi,01h
{2} shr edx,08h
{0*} sub ecx,08h
{1*} jge @write
{ } xor edx,-1
{ } mov ebp,eax
{ } add esi,01h
{ } lea edi,[edi*08h+ecx]
@administration:
{0} shl eax,__BITS
{1} and eax,__LOOKUP-1
{2} xor al,[esi+edx+03h]
{0} mov ecx,[esp+_LAST+ebp*04h]
{1} mov [esp+_LAST+ebp*04h],ebx
{2-} mov ebp,ebx
{0} add ebx,01h
{1} and ebp,(__WINDOW-1)
{2} mov [esp+_PREV+ebp*04h],ecx
{1} add edx,01h
{0-} mov ebp,eax
{2} jnz @administration
{ } mov [esp+_HASH],eax
{ } mov ebp,[esp+_LEN]
{ } mov eax,[esp+_LAST+eax*04h]
{ *} sub ebp,ebx
{ *} jz @finalize
{ **} cmp ebp,???
{ **} jae @program
{ } mov [esp+...],ebp
{ } jmp @program
@long_jump:

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

(#108) P.H.


P.H.
senior tag

Production code. P4 ready. Core2-n, K10-en, Bulldozeren, P4-en 2.5x gyorsabb, mint a W..R.r.

@FIX_character:
{@A1} {0} mov [edi],al
{1} add edi,01h
@@FIX_DECODE: { bpos >= 9 }
{2} mov eax,[esi]
{0-} mov ecx,edx
{1} mov ebp,((-1 shl ZFIX_aMAXBITS) xor -1)
{@AF} {2} shr edx,03h
{@B2} {0} xor edx,03h
{1} and ebp,ebx
{2} shl eax,cl
{0} add esi,edx
{1} lea edx,[ecx+edx*08h]
{2} or ebx,eax
@FIX_code:
{@C0} {0} mov eax,dword ptr [ZFIXED_RELOC+ebp*08h+_CODE]
{1} mov ecx,dword ptr [ZFIXED_RELOC+ebp*08h+_SIZE]
{2} sub edx,ecx
{@D0} {0} shr ebx,cl
{1**} cmp eax,00000100h
{2**} jb @FIX_character
@FIX_length:
{0-} mov ecx,eax
{1-} movzx ebp,ah
{2} jz @SECTION { short jump instruction }
{@E0} {0} and ebp,ebx
{1} shr eax,10h
{2} add ebp,eax
{0-} mov eax,ebx
{1} add edi,ebp
@FIX_distance:
{2} shr eax,cl
{0} add cl,ZFIX_dBITS
{@F0} {1} neg ebp
{2} sub dl,cl
{0} and eax,((-1 shl ZFIX_dBITS) xor -1)
{1} shr ebx,cl
{2} mov eax,dword ptr [ZFIXED_DIST+eax*04h]
@FIX_load:
{@00} {0} mov [esp+_MOVELEN],ebp
{1} mov ebp,[esi]
{2-} mov ecx,edx
{0} shr edx,03h
{1} xor edx,03h
{2} shl ebp,cl
{@10} {0} add esi,edx
{1} lea edx,[ecx+edx*08h]
{2} or ebx,ebp
{0} mov ebp,[esp+_MOVELEN]
{1**} test al,0FFh
{2**} js @FIX_movechar
{@20} {0} jz @FIX_moveword
@FIX_movedword:
{1} mov ebp,00000001h
{2-} mov ecx,eax
{0} sub dl,al
{1} sar eax,10h
{2} shl ebp,cl
{@30} {0} xor ebp,0FFFFFFFFh
{1} add eax,edi
{2} and ebp,ebx
{0} shr ebx,cl
{1} mov ecx,[esp+_MOVELEN]
{@40} {2} sub eax,ebp
@FIX_move4byte:
{0} mov ebp,dword ptr [ecx+eax+00h]
{1} mov [edi+ecx+00h],ebp
{2} mov ebp,dword ptr [ecx+eax+04h]
{0} mov [edi+ecx+04h],ebp
{@50} {1*} add ecx,08h
{2*} js @FIX_move4byte
{ } jmp @@FIX_DECODE
{ x6 } cmp esi,00h; cmp edx,00h
@FIX_moveword:
{@60} {0} sar eax,10h
{1} add eax,edi
@FIX_move2byte:
{@65} {0} movzx ecx,word ptr [eax+ebp+00h]
{1} mov [edi+ebp+00h],cx
{@6D} {2} movzx ecx,word ptr [eax+ebp+02h]
{@72} {0} mov [edi+ebp+02h],cx
{1*} add ebp,04h
{2*} js @FIX_move2byte
{ } jmp @@FIX_DECODE
@FIX_movechar:
{@81} {1} movzx eax,byte ptr [ebp+edi-01h]
@FIX_repeat:
{@86} {0} mov [edi+ebp+00h],al
{1} mov [edi+ebp+01h],al
{2*} add ebp,02h
{@90} {0*} js @FIX_repeat
{ } jmp @@FIX_DECODE

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

(#109) P.H.


P.H.
senior tag

Procuction Code. P4 ready. Core2-n, K10-en, Bulldozeren, P4-en 12%-kal gyorsabb, mint a W..R.r.

@samechar:
{@80} {0**} cmp edx,ebp
{1**} jae @initrepeat
@compression:
{2} cmp cl,[esi+edx+01h]
{0} lea edx,[edx+01h]
{1} jz @samechar
@initrepeat:
{ } { x1 } and ebp,00h
{@90} { **} cmp edx,(03+01)
{ **} jae @special { long jump instruction }
{ } mov ch,cl
{ } { x1 } mov ss:[esp+_ENCODED],edi
@repeat:
{@A0} {0-} lea edi,[eax+ebx]
{1} and eax,(__WINDOW-1)
{2**} cmp edi,-(__WINDOW)
{0**} jbe @pre_encode
{@B0} {1} add edi,esi
{2} mov eax,[esp+_PREV+eax*04h]
{0**} cmp ch,[edi+ebp]
{1**} jnz @repeat
{2**} cmp cl,[edi+00h]
{0**} jnz @repeat
{@C0} {1-} xor edx,edx
@length:
{@C2} {0**} cmp edx,[esp+_MAXRANGE]
{1**} jae @maximum_repeat
{2} movzx ecx,byte ptr [esi+edx+01h]
{0} cmp cl,[edi+edx+01h]
{@D1} {1} lea edx,[edx+01h]
{2} jz @length
{0} movzx ecx,byte ptr [esi+00h]
{1} mov ch,[esi+ebp]
{2**} cmp edx,ebp
{0**} jbe @repeat
{@E0} {1-} mov ebp,edx
{2} mov ch,[esi+edx]
{0} mov [esp+_MOVEDIST],edi
{2} jmp @repeat
{ x6 } test ebx,00000000h
{@F0} { x4 } xor ebp,ebp; xor esi,esi
@maximum_repeat:
{@F4} {0} mov [esp+_MOVEDIST],edi
{1} mov ebp,00000028
{2-} xor eax,eax
{0-} xor ebx,ebx
{@00} {1} mov edi,[esp+_ENCODED]
{2**} cmp edx,00000258
{0**} jz @indexed
{1} movzx ecx,byte ptr [esi+00h]
{2-} mov ebp,edx
@pre_encode:
{@11} {0} mov edi,[esp+_ENCODED]
{1-} xor ebx,ebx
{2*} sub ebp,03h
{0*} jb @encode_alpha
{@20} {1-} movzx eax,bl
{2} lea edx,[ebp+03h]
{ } jz @indexed
@encode_length:
{---} bsr ecx,ebp
{0*} sub ecx,02h
{1*} jle @indexed
{@30} {2-} mov ebx,ebp
{0} shr ebp,cl
{1-} mov eax,ebp
{2} lea ebp,[ebp+ecx*04h+00h]
{0} shl eax,cl
{1} xor ebx,eax
{2-} mov eax,ecx
@indexed:
{@40} {0} mov ecx,dword ptr [ZFIXED_ENCODE+257*04h+ebp*0
{1} mov ebp,[esp+_ZIPPED]
{2} add al,cl
{0} shl ebx,cl
{1} shr ecx,10h
{@52} {2} or ebx,ecx
{0-} mov ecx,edi
{1} and ecx,07h
{2} shr edi,03h
{0} shl ebx,cl
{1} or bl,[edi+ebp]
{@61} {2} add ecx,eax
{0} mov eax,[esp+_MOVEDIST]
@writelength:
{0} mov [edi+ebp],bl
{1} add edi,01h
{2} shr ebx,08h
{0*} sub ecx,08h
{@72} {1*} jge @writelength
@encode_distance:
{ } { x3 } mov ebp,00000000h
{ } sub eax,esi
{ } xor eax,-1
{@80} { } lea edi,[edi*08h+ecx*08h+00]
{ } jz @distindexed
{---} bsr ecx,eax
{0*} sub ecx,01h
{1*} js @distindexed
{2-} mov ebx,eax
{@90} {0} shr eax,cl
{1-} mov ebp,eax
{2} lea eax,[ecx*02h+eax]
{0} shl ebp,cl
{1} xor ebx,ebp
{2-} mov ebp,ecx
{0} shl ebx,05h
@distindexed:
{@A0} {0} or ebx,dword ptr [REVERSE2_5+eax*04h]
{1-} mov ecx,edi
{0} shr edi,03h
{2} mov eax,[esp+_ZIPPED]
{@B0} {1} and ecx,07h
{2} shl ebx,cl
{0} or bl,[edi+eax]
{1} lea ecx,[ecx+ebp+05h]
{2} mov ebp,[esp+_HASH]
@writedistance:
{@C0} {0} mov [edi+eax],bl
{1} add edi,01h
{2} shr ebx,08h
{0*} sub ecx,08h
{1*} jge @writedistance
{ } add edx,esi
{@D0} { } lea edi,[edi*08h+ecx]
{ -} mov eax,ebp
{ } jmp @administration
{ x9 } xor eax,eax; xor ebp,ebp; mov edx,00000000h
{@00} { x4 } xor edx,edx; xor ecx,ecx
@encode_alpha:
{@04} {0} mov ebp,[esp+_ZIPPED]
{1-} movzx edx,cl
{2-} mov ecx,edi
{0} shr edi,03h
{@10} {1} mov edx,dword ptr [ZFIXED_ENCODE+edx*04h]
{2} and ecx,07h
{0} movzx eax,dx
{1} shr edx,10h
{@20} {2} shl edx,cl
{0} add ecx,eax
{1} mov eax,[esp+_HASH]
{2} or dl,[edi+ebp]
@writechar:
{0} mov [edi+ebp],dl
{1} add edi,01h
{@31} {2} shr edx,08h
{0*} sub ecx,08h
{1*} jge @writechar
{ } lea edx,[esi+01h]
{ } lea edi,[edi*08h+ecx+00h]
@administration:
{@20} {0} mov ecx,[esp+_LAST+eax*04h]
{1} mov [esp+_LAST+eax*04h],esi
{2} shl eax,__BITS
{0} and eax,__LOOKUP-1
{1-} mov ebp,esi
{2} xor al,[esi+03h]
{0} add esi,01h
{1} and ebp,(__WINDOW-1)
{2} mov [esp+_PREV+ebp*04h],ecx
{0**} cmp esi,edx
{1**} jnz @administration
@nextloop:
{0} mov [esp+_HASH],eax
{@70} {1} mov ebp,[esp+_LEN]
{2} sub ebx,esi
{0} mov eax,[esp+_LAST+eax*04h]
{1-} xor edx,edx
{2} movzx ecx,byte ptr [esi+00h]
{0*} sub ebp,esi
{1*} jz @finalize
{2**} cmp ebp,00000258
{@90} {0**} jae @compression
{ } mov [esp+_MAXRANGE],ebp
{ } jmp @compression
{ x1 } nop
{@80} { x1 } nop
@special:

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

(#110) P.H. válasza P.H. (#108) üzenetére


P.H.
senior tag

Egy program sosincs befejezve, csak abbahagyva van.

@FIX_character:
{@A2} {0} mov [edi],al
{1-} mov ecx,edx
{2} add edi,01h
@@FIX_symbol:
{@A9} {0} mov eax,(($1 shl ZFIX_aMAXBITS)-1)
{1} shl ebp,cl
{@B0} {2} xor ecx,(03h shl 03h)
{0} and ax,bx
{1} or edx,(03h shl 03h)
{2} shr ecx,03h
{0} or ebx,ebp
{1} add esi,ecx
@@FIX_code:
{@C0} {2} mov ecx,dword ptr [ZFIXED_RELOC+eax*08h+_SIZE]
{0} mov ebp,[esi]
{1} mov eax,dword ptr [ZFIXED_RELOC+eax*08h+_CODE]
{@D0} {2} shr ebx,cl
{0*} sub edx,ecx
{1*} jnc @FIX_character
@FIX_length:
{2} sar ecx,10h
{0} and al,bl
{1} sub dl,cl
{2} shr ebx,cl
{@FF} {0-} movzx ecx,al
{@02} {1} sar eax,10h
{2} jz @NXSECTION
{0} sub eax,ecx
{1-} movzx ecx,bl
@FIX_distance:
{@10} {2} sub edx,ZFIX_dBITS
{0} mov [esp+_MOVELEN],eax
{1} sub edi,eax
{2} mov eax,dword ptr [ZFIXED_DISTx8+ecx*04h]
{@20} {0-} movzx ecx,dl
{1} shr ebx,ZFIX_dBITS
@FIX_load:
{2} or edx,(03h shl 03h)
{0} shl ebp,cl
{1} xor ecx,(03h shl 03h)
{2} shr ecx,03h
{@31} {0} or ebx,ebp
{1} mov ebp,[esp+_MOVELEN]
{2} movzx edx,dl
{0} add esi,ecx
{1-} mov ecx,eax
{2} sar eax,10h
{@41} {0} jns @FIX_movechar
{1} jc @FIX_moveword
{2} mov ebp,00000001h
@FIX_movedword:
{0} sub dl,cl
{1} shl ebp,cl
{2} add eax,edi
{@50} {0} add ebp,0FFFFFFFFh
{1} and ebp,ebx
{2} shr ebx,cl
{0} mov ecx,[esp+_MOVELEN]
{1} sub eax,ebp
@FIX_move4byte:
{@60} {0} mov ebp,[ecx+eax+00h]
{1} mov [edi+ecx+00h],ebp
{2} mov ebp,[ecx+eax+04h]
{0} mov [edi+ecx+04h],ebp
{1*} add ecx,08h
{@71} {2*} js @FIX_move4byte
{0-} mov ecx,edx
{1} mov ebp,[esi]
{2} jmp @@FIX_symbol
{ x4 } lea eax,[edx+edx+01h]
@FIX_movechar:
{@60} {0} movzx eax,byte ptr [ebp+edi-01h]
{0} imul ecx,eax
{1-} mov eax,ebx
@FIX_move1byte:
{0} mov [edi+ebp+00h],ecx
{@70} {1} mov [edi+ebp+04h],ecx
{2*} add ebp,08h
{0*} js @FIX_move1byte
{1} and eax,(($1 shl ZFIX_aMAXBITS)-1)
{2} jmp @@FIX_code
@FIX_moveword:
{@80} {0} add eax,edi
@FIX_move2byte:
{@82} {0} movzx ecx,word ptr [eax+ebp+00h]
{1} mov [edi+ebp+00h],cx
{2} movzx ecx,word ptr [eax+ebp+02h]
{@8F} {0} mov [edi+ebp+02h],cx
{@94} {1*} add ebp,04h
{2*} js @FIX_move2byte
{0-} mov eax,ebx
{1} and eax,(($1 shl ZFIX_aMAXBITS)-1)
{@A0} {2} jmp @@FIX_code

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

(#111) P.H. válasza P.H. (#105) üzenetére


P.H.
senior tag

Örömmel jelentem, hogy lassabb nem lett

.............................

{ } movsx ebx,byte ptr [edx]
{ } xor eax,eax
{ } mov esi,ebp
{ } mov [edi+__N],ebp
{ } and esi,-8
{ } mov ecx,ebp
@init:
{ } mov [edi+esi*08h+(00h*08h)+__K],eax
{ } mov [edi+esi*08h+(04h*08h)+__K],eax
{ } add esi,08h
{ } jnz @init
@argument
{ } cmp ebx.00h
{ } lea eax,[ebp+ebx*04h]
{ } movsx ebx,byte ptr [edx+01h]
{ } lea edx,[edx+01h]
{ } mov [edi+eax*08h+__K],ebp
{ } cmovs eax,esi
{ } mov [edi+ecx*08h+__F],eax
{ } add ecx,04h
{ } jnz @argument
{ } mov [edi+ebp*08h-(04h*08h)+__K],esi
{ } lea edx,[ebp-04h]
{ } mov ebx,edi
{ } jmp @next0row
@ROWS:
{ } mov [edi+__LEFT1+edx*08h+__W],esi
@next0row:
{ } mov eax,[edi+edx*08h+(04h*08h)+__S]
{ } add edx,04h
{ } jz @@COLUMNS
{ } mov [edi+edx*08h+B],eax
{ } xor esi,esi
{ } mov [edi+edx*08h+_R],eax
{ } sub ebx,ebp
{ **} test eax,eax
{ **} jnz @@ROWS
{ } lea ecx,[ebp+04h]
{ } mov esi,[ebx+ebp]
{ } or esi,[edi+ebp*08h+__K]
{ } and ecx,-8
@findrowmin:
{ } mov eax,[ebx+ecx]
{ } or eax,[edi+ecx*08h+__K]
{ } add ecx,08h
{ } cmp eax,ebp
{ } cmovb ebp,eax
{ } mov eax,[ebx+ecx-04h]
{ } or eax,[edi+ecx*08h-(04h*08h)+__K]
{ } cmp eax,esi
{ } cmovb esi,eax
{ **} cmp ecx,00h
{ **} jnz @findrowmin
{ } cmp ebp,esi
{ } cmovb esi,ebp
{ } mov ebp,[edi+__N]
{ } neg esi
{ } jle @@ROWS
@@XXX:
{ } mov esi,[esp+_A]
{ } mov ecx,[esp+_I]
{ } mov dword ptr [esi+A.OPTIMUM],?
{ } mov ebx,[esi+TRESULT.NEXT]
{ } jmp ecx
@free0col:
{ } add dword ptr [edi+__0COUNTER],-1
{ } mov [edi+__CCOLMIN],esi
{ } mov ecx,0FFFFFFFFh
{ } mov [edi+ebp*08h-(04h*08h)+__K],edx
@0col:
{ } mov [edi+__LEFT1+edx*08h+__COLMOD],esi
@@COLUMNS:
{ -} mov ebx,ebp
@next0col:
{ } mov [edi+__LEFT1+edx*08h+__C],ecx
{ } mov ecx,edi+edx*08h-(04h*08h)+__K]
{ } lea eax,[edx-04h]
{ } sub eax,ebp
{ } js @@INIT0COL
{ } sub edx,04h
{ } xor ecx,-1
{ } jns @next0col
@findcolmin:
{ } mov esi,[eax+edi]
{ } add esi,[edi+__LEFT1+ebx*08h+__U]
{ } or esi,[edi+ebx*08h+__S]
{ } jz @test0row
{ } sub eax,ebp
{ } cmp esi,ecx
{ } cmovb ecx,esi
{ } add ebx,04h
{ } jnz @findcolmin
{ } mov eax,edx
{ } lea ebx,[ebp-04h]
{ } mov esi,ecx
{ } cmp ecx,00h
{ } js @@XXX
@seek0col:
{ } mov ecx,[edi+__LEFT1+ebx*08h+(04h*08h)+__W]
{ } sub eax,ebp
{ } add ebx,04h
{ } jz @free0col
{ } add ecx,[eax+edi]
{ **} cmp ecx,esi
{ **} jnz @seek0col
@test0row:
{ } mov ecx,[edi+ebx*08h+B]
{ **} test ecx,ecx
{ **} js @seek0col
{ } mov [edi+ebx*08h+B],edx
{ } mov [edi+edx*08h+__K],ebx
{ } jmp @0col
@@INIT0COL:
{ } lea eax,[edi+ecx]
{ } mov [edi+__L],ecx
{ } neg ebp
{ } sal ecx,10h
{ } jnz @init2col
{ -} xor esi,esi
{ } jmp @@FINISHED
@@5TH_STEP:
{ -} mov edx,esi
{ } movsx esi,si
{ } sar eax,10h
@5TH_STEP:
{ } movsx ecx,byte ptr [edi+__LEFT1+ebx*08h+__SIGN+__C]
{ } and ecx,edx
{ } add [edi+__LEFT1+ebx*08h+__COLMOD],ecx
{ } movsx ecx,byte ptr [edi+ebx*08h+__SIGN+_R]
{ } and ecx,edx
{ } add [edi+__LEFT1+ebx*08h+__W],ecx
{ } add ebx,04h
{ } jnz @5TH_STEP
{ } mov edx,es:[edi+esi*08h+B]
{ } db $8B,$8C,$3B,__L,?,?,?
{ **} test edx,edx
{ **} jz @@4TH_STEP
{ } add dword ptr [edi+__PN],-1
{ } mov [edi+esi*08h+_R],eax
{ } mov [edi+__LEFT1+edx*08h+__C],esi
{ } cmp edx,ecx
{ } cmovb ecx,edx
{ } sub ebx,ebp
{ } mov [edi+__L],ecx
{ } jmp @@9ND_STEP
@fast6forward:
{ } mov esi,[edi+__LEFT1+ebx*08h+(04h*08h)+__W]
{ } add ebx,04h
{ } jz @pass8col
{ } sal ecx,10h
{ } jmp @loop
@pass8col:
{ } mov eax,[edi+__N]
{ } sub ebx,ebp
{ } mov [edi+__LEFT1+ecx*08h+__C],eax
@next20col:
{ } add ecx,04h
{ } jz @@5TH_STEP { clears ECX register
{ **} cmp [edi+__LEFT1+ecx*08h+__C],eax
{0**} jbe @next20col
@@8ND_STEP:
{ } mov esi,[edi+__LEFT1+ecx*08h+__COLMOD]
{ } lea eax,[edi+ecx]
{ } mov [edi+__CCOL],esi
{ } imul ecx,00010000h
@init2col:
{ } add eax,ebp
{ } mov esi,[edi+ebx*08h+__W]
@loop:
{ } sub esi,[edi+__COLMIN]
{ } add esi,[eax]
{ } lea eax,[eax+ebp]
{ } jo @over6flow
{ } or esi,[edi+ebx*08h+_R]
{ } jz @@11D_STEP
{ } cmp esi,edx
{ } cmovb edx,esi
{ } cmovb cx,bx
@over6flow:
{ } mov esi,[edi+__LEFT1+ebx*08h+(04h*08h)+__W]
{ } add ebx,04h
{ } jnz @loop
{ -} mov eax,ecx
{ } sar ecx,10h
{ } cmovc esi,eax
{ } mov [edi+__MC],esi
{ } jmp @pass8col
@@11D_STEP:
{ -} mov esi,ecx
{ } sar ecx,10h
{ } cmovnc esi,[edi+__M]
{ } mov [edi+__MC],esi
{ } mov esi,[edi+ebx*08h+B]
{ **} test esi,esi
{ **} jz @4TH_STEP
{ } mov [edi+ebx*08h+_R],ecx
{ } mov [edi+__LEFT1+esi*08h+__C],-1
{ **} cmp word ptr [edi+__MC],bx
{ **} jz @re2start
{ **} cmp esi,ecx
{ **} jae @fast6forward
{ } mov ecx,esi
{ } xor ebx,ebx
{ } mov eax,[edi+__L]
{ } sub ebx,ebp
{ } cmp esi,eax
{ } cmovb eax,esi
{ } mov [edi+__L],eax
{ } jmp @@7ND_STEP
@re2start:
{ } mov ecx,[edi+__L]
{ } xor ebx,ebx
{ } add dword ptr [edi+__PN],-1
{ } mov edx,esi
{ } cmp esi,ecx
{ } cmovb ecx,esi
{ } sub ebx,ebp
{ } mov [edi+__L],ecx
{ } jmp @@6ND_STEP
@@4TH_STEP:
{ } mov ecx,eax
{ } mov ebx,esi
@4TH_STEP:
{ } mov [edi+ebx*08h+BB],ecx
{ } mov edx,[edi+ecx*08h+__K]
{ } mov [edi+ecx*08h+__K],ebx
{ } mov ebx,edx
{ } mov ecx,[edi+edx*08h+_R]
{ } sub edx,ebp
{ } jnc @4TH_STEP
{ } xor esi,esi
{ } mov ebx,edx
{ } sub dword ptr [edi+__0COUNTER],-1
{ } jz @@FINISHED
@@1ST_STEP:
{ } mov eax,[edi+esi*08h-(04h*08h)+__K]
{ } xor eax,-1
{ } mov [edi+__LEFT1+esi*08h-(04h*08h)+__C],eax
{ } lea esi,[esi-04h]
{ } mov eax,[edi+esi*08h+__S]
{ } cmovs ecx,esi
{ } mov [edi+esi*08h+_R],eax
{ **} cmp edx,esi
{ **} jnz @@1ST_STEP
{ } nop
{ } mov [edi+__L],ecx
{ } jmp @@5ND_STEP

@@FINISHED:
{ } mov eax,edi
{ } mov ecx,[esp+_KIMENET]
{ } mov ebp,edx
@@results:
{ } mov ebx,[edi+edx*08h+__B]
{ } sub eax,ebp
{ } add esi,[eax+ebx]
{ } sub ebx,ebp
{ } shr ebx,02h
{ } mov [ecx],bl
{ } add ecx,01h
{ } add edx,04h
{ } jnz @@results
(CODE)

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

(#112) P.H.


P.H.
senior tag

Skeleton of code adjusted to Zen(1) and Zen+ (i. e. placeholder codes omitted).

Runs constantly at 3.4 IPC (of maximum 4.0 due to the 4 available ALU-s) for 50x50 matrices, this means 85% utilization.

Since more than 90% percent of instructions need ALU, so important theorem is the way of instruction distribution among the ALUs. This is true for all other microarchitectures as well.

{ } movsx ebx,byte ptr [eax+E.FIELD0+00h]
{ } mov esi,ebp
{ } xor edx,edx
{ } and esi,-8
{ } mov [edi+__A],ebp
{ } mov ecx,ebp
@init:
{ } mov [edi+esi*08h+(00h*08h)+__B],edx
{ } mov [edi+esi*08h+(04h*08h)+__B],edx
{ } sub esi,-8
{ } jnz @init
@@a:
{ } cmp ebx,ebp
{ } lea esi,[ebp+ebx*04h]
{ } movsx ebx,byte ptr [eax+E.FIELD0+01h]
{ } lea eax,[eax+01h]
{ } mov [edi+esi*08h+__B],ebp
{ } cmova esi,edx
{ } mov [edi+ecx*08h+__C],esi
{ } add ecx,04h
{ } jnz @@a
{ } mov eax,[edi+ebp*08h+__C]
{ } mov ebx,ebp
{ } mov edx,edi
{ } jmp @b
@@ROWS:
{ } mov eax,[edi+ebx*08h+(04h*08h)+__C]
{ } add ebx,04h
{ } jz @@COLS
@next0b:
{ } mov [edi+ebx*08h+__D],eax
{ } sub edx,ebp
{ } mov [edi+ebx*08h+R],eax
{ } add eax,ebp
{ } jc @@ROWS
{ } lea ecx,[ebp+04h]
{ } mov esi,[edx+ebp]
{ } or esi,[edi+ebp*08h+__B]
{ } and ecx,-8
{ } mov ebp,[edi+ecx*08h+__B]
@findr:
{ } or ebp,[edx+ecx+00h]
{ } cmp ebp,eax
{ } cmovb eax,ebp
{ } mov ebp,[edx+ecx+04h]
{ } or ebp,[edi+ecx*08h+(04h*08h)+__B]
{ } cmp ebp,esi
{ } cmovb esi,ebp
{ } mov ebp,[edi+ecx*08h+(08h*08h)+__B]
{ } add ecx,08h
{ } jnz @findr
{ } cmp eax,esi
{ } mov ebp,[edi+__A]
{ } cmovb esi,eax
{ } neg esi
{ } mov [edi+__L+ebx*08h+__F],esi
{ } jle @ROWS
@@EXIT:
{ } mov esi,[esp+_X]
{ } mov [esi+E.O],7FFFFFFFh
{ } jmp @outside
@free0col:
{ } add [edi+__0],ebp
{ } mov [edi+__Y],esi
{ } mov ecx,0FFFFFFFFh
@@COLS:
{ } mov [edi+ebp*08h-(04h*08h)+__B],ebx
@mark:
{ } mov [edi+__L+ebx*08h+__CC],esi
{ } mov esi,[edi+__L+ebp*08h+__F]
{ } mov edx,ebp
@nextc:
{ } lea eax,[ebx-04h]
{ } mov [edi+__L+ebx*08h+__M],ecx
{ } mov ecx,[edi+ebx*08h-(04h*08h)+__B]
{ } mov ebx,eax
{ } sub eax,ebp
{ } jc @@init0
{ } xor ecx,-1
{ } jns @nextc
@findc:
{ } add esi,[eax+edi]
{ } or esi,[edi+edx*08h+__D]
{ } lea edx,[edx+04h]
{ } jz @testr
{ } sub eax,ebp
{ } cmp esi,ecx
{ } cmovb ecx,esi
{ } mov esi,[edi+__L+edx*08h+__F]
{ **} cmp edx,00h
{ **} jnz @findc
{ } mov eax,ebx
{ } mov edx,ebp
{ } mov esi,ecx
{ **} cmp ecx,00h
{ **} js @@EXIT
@seek0:
{ } mov ecx,[edi+__L+edx*08h+__F]
{ } sub eax,ebp
{ } add edx,04h
{ } jg @free0col
{ } add ecx,[eax+edi]
{ **} cmp ecx,esi
{ **} jnz @seek0
@testr:
{ } lea ecx,[edx-04h]
{ **} test [edi+edx*08h-(04h*08h)+__C],ebx
{ **} js @seek0col
{ } mov [edi+ebx*08h+__B],ecx
{ } mov [edi+edx*08h-(04h*08h)+__C],ebx
{ } xor ecx,ecx
{ } jmp @mark
@@init0:
{ } mov [edi+__I],ecx
{ } mov eax,ecx
{ } sal ecx,10h
{ } jnz @scan
{ } mov eax,edi
{ } mov esi,[esp+_X]
{ } jmp outside
@@1ST_STEP:
{ } movsx esi,ax
{ } mov esi,[edi+esi*08h+__C]
{ } neg edx
@1ST_STEP:
{ } movsx ecx,byte ptr [edi+__L+ebx*08h+__S+__M]
{ } and ecx,edx
{ } sub [edi+__L+ebx*08h+__CC],ecx
{ } movsx ecx,byte ptr [edi+ebx*08h+__S+R]
{ } and ecx,edx
{ } sub [edi+__L+ebx*08h+__F],ecx
{ } add ebx,04h
{ } jnz @1ST_STEP
{ } mov ecx,[edi+__I]
{ } movsx ebx,ax
{ } sar eax,10h
{ **} test esi,esi
{ **} jz @@2ND_STEP
{ } add dword ptr [edi+__A],-1
{ } mov [edi+ebx*08h+R],eax
{ } mov [edi+__L+esi*08h+__M],ebx
{ } cmp esi,ecx
{ } cmovb ecx,esi
{ } mov ebx,ebp
{ } mov [edi+__I],ecx
{ } jmp @@6TH_STEP
@pass:
{ } mov eax,ecx
{ } sar ecx,10h
{ } cmovnc eax,[edi+__W]
{ } mov [edi+__W],eax
{ } lea ebx,[ebp+00h]
{ } mov [edi+__L+ecx*08h+__M],esi
@nx:
{ } mov esi,[edi+__L+ecx*08h+__M +(04h*08h)]
{ } add ecx,04h
{ } jz @@1ST_STEP
{ **} cmp esi,[edi+__A]
{ **} jbe @nx
@@6TH_STEP:
{ } mov esi,[edi+__L+ecx*08h+__CC]
{ } lea ebx,[ebp-04h]
{ } { x1 } nop
{ } mov eax,ecx
{ } mov [edi+__Y],esi
@ffd:
{ } sal ecx,10h
{ } mov esi,[edi+__L+ebx*08h+(04h*08h)+__F]
@z:
{ **} cmp ebx,-4
{ **} jz @pass
@scan:
{ } add ebx,04h
{ } sub eax,ebp
{ } sub esi,[edi+__Y]
{ } add esi,[edi+eax]
{ } or esi,[edi+ebx*08h+R]
{ } jz @@5TH_STEP
{ } cmp esi,edx
{ } cmovb cx,bx
{ } cmovb edx,esi
{ } mov esi,[edi+__L+ebx*08h+(04h*08h)+__F]
{ **} cmp ebx,-4
{ **} jz @pass
{ } add ebx,04h
{ } sub eax,ebp
{ } sub esi,[edi+__Y]
{ } add esi,[edi+eax]
{ } or esi,[edi+ebx*08h+R]
{ } jz @@5TH_STEP
{ } cmp esi,edx
{ } cmovb cx,bx
{ } cmovb edx,esi
{ } mov esi,[edi+__L+ebx*08h+(04h*08h)+__F]
{ } db $66,$66,$66; nop
{ } jmp @zZ
@@5TH_STEP:
{ } mov esi,ecx
{ } sar ecx,10h
{ } cmovnc esi,[edi+__W]
{ } mov [edi+__W],esi
{ } mov esi,[edi+ebx*08h+__C]
{ **} test esi,esi
{ **} jz @2ND_STEP
{ } mov [edi+ebx*08h+R],ecx
{ } mov [edi+__L+esi*08h+__M],0FFFFFFFFh
{ **} cmp word ptr [edi+__W],bx
{ **} jz @re
{ **} cmp esi,ecx
{ **} jae @ffd
{ } mov ecx,esi
{ } mov eax,[edi+__I]
{ } cmp esi,eax
{ } cmovb eax,esi
{ } mov [edi+__I],eax
{ } jmp @@6TH_STEP
@re:
{ } mov ecx,[edi+__I]
{ } add dword ptr [edi+__A],-1
{ } mov edx,esi
{ } cmp esi,ecx
{ } cmovb ecx,esi
{ } mov [edi+__I],ecx
{ } jmp @@6TH_STEP
@@2ND_STEP:
{ } mov ecx,eax
@2ND_STEP:
{ } mov [edi+ebx*08h+__C],ecx
{ } mov edx,[edi+ecx*08h+__B]
{ } mov [edi+ecx*08h+__B],ebx
{ } mov ecx,[edi+edx*08h+R]
{ } mov ebx,edx
{ } add edx,ebp
{ } jc @2ND_STEP
{ } mov ecx,esi
{ } sub [edi+__0],ebp
{ } jz @@outside
@@9ST_STEP:
{ } mov eax,[edi+esi*08h-(04h*08h)+__B]
{ } xor eax,-1
{ } mov [edi+__L+esi*08h-(04h*08h)+__M],eax
{ } lea esi,[esi-04h]
{ } cmovs ecx,esi
{ } mov ebx,[edi+esi*08h+__D]
{ } mov eax,[edi+esi*08h-(04h*08h)+__B]
{ } mov [edi+esi*08h+R],ebx
{ **} cmp ebp,esi
{ **} jz @i9
{ } xor eax,-1
{ } mov [edi+__L+esi*08h-(04h*08h)+__M],eax
{ } lea esi,[esi-04h]
{ } cmovs ecx,esi
{ } mov eax,[edi+esi*08h+__D]
{ } mov [edi+esi*08h+R],eax
{ **} cmp ebp,esi
{ **} jnz @@9ST_STEP
@i9:
{ } mov [edi+__I],ecx
{- } jmp @@6TH_STEP

[ Szerkesztve ]

Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Copyright © 2000-2024 PROHARDVER Informatikai Kft.