?? x64masm.asm
字號:
movdqa xmm4, xmm1
paddd xmm4, xmm2
movdqa xmm5, xmm4
pslld xmm4, 13
psrld xmm5, 32-13
pxor xmm3, xmm4
pxor xmm3, xmm5
movdqa xmm4, xmm2
paddd xmm4, xmm3
movdqa xmm5, xmm4
pslld xmm4, 18
psrld xmm5, 32-18
pxor xmm0, xmm4
pxor xmm0, xmm5
pshufd xmm1, xmm1, 2*64+1*16+0*4+3
pshufd xmm2, xmm2, 1*64+0*16+3*4+2
pshufd xmm3, xmm3, 0*64+3*16+2*4+1
movdqa xmm4, xmm1
paddd xmm4, xmm0
movdqa xmm5, xmm4
pslld xmm4, 7
psrld xmm5, 32-7
pxor xmm3, xmm4
pxor xmm3, xmm5
movdqa xmm4, xmm0
paddd xmm4, xmm3
movdqa xmm5, xmm4
pslld xmm4, 9
psrld xmm5, 32-9
pxor xmm2, xmm4
pxor xmm2, xmm5
movdqa xmm4, xmm3
paddd xmm4, xmm2
movdqa xmm5, xmm4
pslld xmm4, 13
psrld xmm5, 32-13
pxor xmm1, xmm4
pxor xmm1, xmm5
movdqa xmm4, xmm2
paddd xmm4, xmm1
movdqa xmm5, xmm4
pslld xmm4, 18
psrld xmm5, 32-18
pxor xmm0, xmm4
pxor xmm0, xmm5
pshufd xmm1, xmm1, 0*64+3*16+2*4+1
pshufd xmm2, xmm2, 1*64+0*16+3*4+2
pshufd xmm3, xmm3, 2*64+1*16+0*4+3
sub eax, 2
jnz label0
paddd xmm0, [r10 + 0*16]
paddd xmm1, [r10 + 1*16]
paddd xmm2, [r10 + 2*16]
paddd xmm3, [r10 + 3*16]
add dword ptr [r10 + 8*4], 1
adc dword ptr [r10 + 5*4], 0
pcmpeqb xmm6, xmm6
psrlq xmm6, 32
pshufd xmm7, xmm6, 0*64+1*16+2*4+3
movdqa xmm4, xmm0
movdqa xmm5, xmm3
pand xmm0, xmm7
pand xmm4, xmm6
pand xmm3, xmm6
pand xmm5, xmm7
por xmm4, xmm5
movdqa xmm5, xmm1
pand xmm1, xmm7
pand xmm5, xmm6
por xmm0, xmm5
pand xmm6, xmm2
pand xmm2, xmm7
por xmm1, xmm6
por xmm2, xmm3
movdqa xmm5, xmm4
movdqa xmm6, xmm0
shufpd xmm4, xmm1, 2
shufpd xmm0, xmm2, 2
shufpd xmm1, xmm5, 2
shufpd xmm2, xmm6, 2
test rdx, rdx
jz labelSSE2_Salsa_Output_B3
test rdx, 15
jnz labelSSE2_Salsa_Output_B7
pxor xmm4, [rdx+0*16]
pxor xmm0, [rdx+1*16]
pxor xmm1, [rdx+2*16]
pxor xmm2, [rdx+3*16]
add rdx, 4*16
jmp labelSSE2_Salsa_Output_B3
labelSSE2_Salsa_Output_B7:
movdqu xmm3, [rdx+0*16]
pxor xmm4, xmm3
movdqu xmm3, [rdx+1*16]
pxor xmm0, xmm3
movdqu xmm3, [rdx+2*16]
pxor xmm1, xmm3
movdqu xmm3, [rdx+3*16]
pxor xmm2, xmm3
add rdx, 4*16
labelSSE2_Salsa_Output_B3:
test rcx, 15
jnz labelSSE2_Salsa_Output_B8
movdqa [rcx+0*16], xmm4
movdqa [rcx+1*16], xmm0
movdqa [rcx+2*16], xmm1
movdqa [rcx+3*16], xmm2
jmp labelSSE2_Salsa_Output_B9
labelSSE2_Salsa_Output_B8:
movdqu [rcx+0*16], xmm4
movdqu [rcx+1*16], xmm0
movdqu [rcx+2*16], xmm1
movdqu [rcx+3*16], xmm2
labelSSE2_Salsa_Output_B9:
add rcx, 4*16
jmp label5
label4:
movdqa xmm6, [rsp + 0200h]
movdqa xmm7, [rsp + 0210h]
movdqa xmm8, [rsp + 0220h]
movdqa xmm9, [rsp + 0230h]
movdqa xmm10, [rsp + 0240h]
movdqa xmm11, [rsp + 0250h]
movdqa xmm12, [rsp + 0260h]
movdqa xmm13, [rsp + 0270h]
movdqa xmm14, [rsp + 0280h]
movdqa xmm15, [rsp + 0290h]
add rsp, 10*16 + 32*16 + 8
ret
Salsa20_OperateKeystream ENDP
ALIGN 8
Rijndael_Enc_ProcessAndXorBlock PROC FRAME
rex_push_reg rbx
push_reg rsi
push_reg rdi
push_reg r12
push_reg r13
push_reg r14
push_reg r15
.endprolog
mov r11, rcx
mov rdi, [rsp + 5*8 + 7*8] ; inBlock
mov eax, [r8+0*4]
xor eax, [rdi+0*4]
mov r13d, eax
mov ebx, [r8+1*4]
xor ebx, [rdi+1*4]
mov r14d, ebx
and ebx, eax
mov eax, [r8+2*4]
xor eax, [rdi+2*4]
mov r15d, eax
and ebx, eax
mov ecx, [r8+3*4]
xor ecx, [rdi+3*4]
and ebx, ecx
and ebx, 0
mov edi, ebx
label2:
and ebx, [r11+rdi]
add edi, edx
and ebx, [r11+rdi]
add edi, edx
and ebx, [r11+rdi]
add edi, edx
and ebx, [r11+rdi]
add edi, edx
cmp edi, 1024
jl label2
and ebx, [r11+1020]
xor r13d, ebx
xor r14d, ebx
xor r15d, ebx
xor ecx, ebx
mov edi, [r8+4*4]
mov eax, [r8+5*4]
mov ebx, [r8+6*4]
mov edx, [r8+7*4]
add r8, 8*4
movzx esi, cl
xor edx, [r11+0*1024+4*rsi]
movzx esi, ch
xor ebx, [r11+1*1024+4*rsi]
shr ecx, 16
movzx esi, cl
xor eax, [r11+2*1024+4*rsi]
movzx esi, ch
xor edi, [r11+3*1024+4*rsi]
mov ecx, r15d
movzx esi, cl
xor ebx, [r11+0*1024+4*rsi]
movzx esi, ch
xor eax, [r11+1*1024+4*rsi]
shr ecx, 16
movzx esi, cl
xor edi, [r11+2*1024+4*rsi]
movzx esi, ch
xor edx, [r11+3*1024+4*rsi]
mov ecx, r14d
movzx esi, cl
xor eax, [r11+0*1024+4*rsi]
movzx esi, ch
xor edi, [r11+1*1024+4*rsi]
shr ecx, 16
movzx esi, cl
xor edx, [r11+2*1024+4*rsi]
movzx esi, ch
xor ebx, [r11+3*1024+4*rsi]
mov ecx, r13d
movzx esi, cl
xor edi, [r11+0*1024+4*rsi]
movzx esi, ch
xor edx, [r11+1*1024+4*rsi]
shr ecx, 16
movzx esi, cl
xor ebx, [r11+2*1024+4*rsi]
movzx esi, ch
xor eax, [r11+3*1024+4*rsi]
mov r15d, ebx
mov r14d, eax
mov r13d, edi
label0:
mov edi, [r8+0*4]
mov eax, [r8+1*4]
mov ebx, [r8+2*4]
mov ecx, [r8+3*4]
movzx esi, dl
xor edi, [r11+3*1024+4*rsi]
movzx esi, dh
xor eax, [r11+2*1024+4*rsi]
shr edx, 16
movzx esi, dl
xor ebx, [r11+1*1024+4*rsi]
movzx esi, dh
xor ecx, [r11+0*1024+4*rsi]
mov edx, r15d
movzx esi, dl
xor ecx, [r11+3*1024+4*rsi]
movzx esi, dh
xor edi, [r11+2*1024+4*rsi]
shr edx, 16
movzx esi, dl
xor eax, [r11+1*1024+4*rsi]
movzx esi, dh
xor ebx, [r11+0*1024+4*rsi]
mov edx, r14d
movzx esi, dl
xor ebx, [r11+3*1024+4*rsi]
movzx esi, dh
xor ecx, [r11+2*1024+4*rsi]
shr edx, 16
movzx esi, dl
xor edi, [r11+1*1024+4*rsi]
movzx esi, dh
xor eax, [r11+0*1024+4*rsi]
mov edx, r13d
movzx esi, dl
xor eax, [r11+3*1024+4*rsi]
movzx esi, dh
xor ebx, [r11+2*1024+4*rsi]
shr edx, 16
movzx esi, dl
xor ecx, [r11+1*1024+4*rsi]
movzx esi, dh
xor edi, [r11+0*1024+4*rsi]
mov r15d, ebx
mov r14d, eax
mov r13d, edi
mov edi, [r8+4*4]
mov eax, [r8+5*4]
mov ebx, [r8+6*4]
mov edx, [r8+7*4]
movzx esi, cl
xor edi, [r11+3*1024+4*rsi]
movzx esi, ch
xor eax, [r11+2*1024+4*rsi]
shr ecx, 16
movzx esi, cl
xor ebx, [r11+1*1024+4*rsi]
movzx esi, ch
xor edx, [r11+0*1024+4*rsi]
mov ecx, r15d
movzx esi, cl
xor edx, [r11+3*1024+4*rsi]
movzx esi, ch
xor edi, [r11+2*1024+4*rsi]
shr ecx, 16
movzx esi, cl
xor eax, [r11+1*1024+4*rsi]
movzx esi, ch
xor ebx, [r11+0*1024+4*rsi]
mov ecx, r14d
movzx esi, cl
xor ebx, [r11+3*1024+4*rsi]
movzx esi, ch
xor edx, [r11+2*1024+4*rsi]
shr ecx, 16
movzx esi, cl
xor edi, [r11+1*1024+4*rsi]
movzx esi, ch
xor eax, [r11+0*1024+4*rsi]
mov ecx, r13d
movzx esi, cl
xor eax, [r11+3*1024+4*rsi]
movzx esi, ch
xor ebx, [r11+2*1024+4*rsi]
shr ecx, 16
movzx esi, cl
xor edx, [r11+1*1024+4*rsi]
movzx esi, ch
xor edi, [r11+0*1024+4*rsi]
mov r15d, ebx
mov r14d, eax
mov r13d, edi
add r8, 8*4
cmp r9, r8
jne label0
mov eax, [r9+0*4]
mov ecx, [r9+1*4]
mov esi, [r9+2*4]
mov edi, [r9+3*4]
movzx ebx, dl
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 3*8
xor eax, ebx
movzx ebx, dh
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 2*8
xor ecx, ebx
shr edx, 16
movzx ebx, dl
shr edx, 8
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 1*8
xor esi, ebx
movzx ebx, BYTE PTR [r11+1+4*rdx]
xor edi, ebx
mov edx, r15d
movzx ebx, dl
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 3*8
xor edi, ebx
movzx ebx, dh
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 2*8
xor eax, ebx
shr edx, 16
movzx ebx, dl
shr edx, 8
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 1*8
xor ecx, ebx
movzx ebx, BYTE PTR [r11+1+4*rdx]
xor esi, ebx
mov edx, r14d
movzx ebx, dl
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 3*8
xor esi, ebx
movzx ebx, dh
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 2*8
xor edi, ebx
shr edx, 16
movzx ebx, dl
shr edx, 8
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 1*8
xor eax, ebx
movzx ebx, BYTE PTR [r11+1+4*rdx]
xor ecx, ebx
mov edx, r13d
movzx ebx, dl
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 3*8
xor ecx, ebx
movzx ebx, dh
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 2*8
xor esi, ebx
shr edx, 16
movzx ebx, dl
shr edx, 8
movzx ebx, BYTE PTR [r11+1+4*rbx]
shl ebx, 1*8
xor edi, ebx
movzx ebx, BYTE PTR [r11+1+4*rdx]
xor eax, ebx
mov rbx, [rsp + 6*8 + 7*8] ; xorBlock
test rbx, rbx
jz label1
xor eax, [rbx+0*4]
xor ecx, [rbx+1*4]
xor esi, [rbx+2*4]
xor edi, [rbx+3*4]
label1:
mov rbx, [rsp + 7*8 + 7*8] ; outBlock
mov [rbx+0*4], eax
mov [rbx+1*4], ecx
mov [rbx+2*4], esi
mov [rbx+3*4], edi
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbx
ret
Rijndael_Enc_ProcessAndXorBlock ENDP
ALIGN 8
Sosemanuk_OperateKeystream PROC FRAME
rex_push_reg rsi
push_reg rdi
alloc_stack(80*4*2+12*4+8*8 + 2*16+8)
save_xmm128 xmm6, 02f0h
save_xmm128 xmm7, 0300h
.endprolog
mov rdi, r8
mov rax, r9
mov QWORD PTR [rsp+1*8], rdi
mov QWORD PTR [rsp+2*8], rdx
mov QWORD PTR [rsp+6*8], rax
lea rcx, [4*rcx+rcx]
lea rsi, [4*rcx]
mov QWORD PTR [rsp+3*8], rsi
movdqa xmm0, [rax+0*16]
movdqa [rsp + 8*8+0*16], xmm0
movdqa xmm0, [rax+1*16]
movdqa [rsp + 8*8+1*16], xmm0
movq xmm0, QWORD PTR [rax+2*16]
movq QWORD PTR [rsp + 8*8+2*16], xmm0
psrlq xmm0, 32
movd r10d, xmm0
mov ecx, [rax+10*4]
mov edx, [rax+11*4]
pcmpeqb xmm7, xmm7
label2:
lea rdi, [rsp + 8*8 + 12*4]
mov rax, 80
cmp rsi, 80
cmovg rsi, rax
mov QWORD PTR [rsp+7*8], rsi
lea rsi, [rdi+rsi]
mov QWORD PTR [rsp+4*8], rsi
lea rsi, s_sosemanukMulTables
label0:
mov eax, [rsp + 8*8 + ((0+0)-((0+0)/(10))*(10))*4]
mov [rdi + (((0)-((0)/(4))*(4))*20 + (0/4)) * 4 + 80*4], eax
rol eax, 8
lea r11d, [r10d + edx]
xor r11d, ecx
mov [rdi + (((0)-((0)/(4))*(4))*20 + (0/4)) * 4], r11d
mov r11d, 1
and r11d, edx
neg r11d
and r11d, r10d
xor r10d, eax
movzx eax, al
xor r10d, [rsi+rax*4]
mov eax, [rsp + 8*8 + ((0+3)-((0+3)/(10))*(10))*4]
xor r11d, [rsp + 8*8 + ((0+2)-((0+2)/(10))*(10))*4]
add ecx, r11d
movzx r11d, al
shr eax, 8
xor r10d, [rsi+1024+r11*4]
xor r10d, eax
imul edx, 54655307h
rol edx, 7
mov [rsp + 8*8 + ((0+0)-((0+0)/(10))*(10))*4], r10d
mov eax, [rsp + 8*8 + ((1+0)-((1+0)/(10))*(10))*4]
mov [rdi + (((1)-((1)/(4))*(4))*20 + (1/4)) * 4 + 80*4], eax
rol eax, 8
lea r11d, [r10d + ecx]
xor r11d, edx
mov [rdi + (((1)-((1)/(4))*(4))*20 + (1/4)) * 4], r11d
mov r11d, 1
and r11d, ecx
neg r11d
and r11d, r10d
xor r10d, eax
movzx eax, al
xor r10d, [rsi+rax*4]
mov eax, [rsp + 8*8 + ((1+3)-((1+3)/(10))*(10))*4]
xor r11d, [rsp + 8*8 + ((1+2)-((1+2)/(10))*(10))*4]
add edx, r11d
movzx r11d, al
shr eax, 8
xor r10d, [rsi+1024+r11*4]
xor r10d, eax
imul ecx, 54655307h
rol ecx, 7
mov [rsp + 8*8 + ((1+0)-((1+0)/(10))*(10))*4], r10d
mov eax, [rsp + 8*8 + ((2+0)-((2+0)/(10))*(10))*4]
mov [rdi + (((2)-((2)/(4))*(4))*20 + (2/4)) * 4 + 80*4], eax
rol eax, 8
lea r11d, [r10d + edx]
xor r11d, ecx
mov [rdi + (((2)-((2)/(4))*(4))*20 + (2/4)) * 4], r11d
mov r11d, 1
and r11d, edx
neg r11d
and r11d, r10d
xor r10d, eax
movzx eax, al
xor r10d, [rsi+rax*4]
mov eax, [rsp + 8*8 + ((2+3)-((2+3)/(10))*(10))*4]
xor r11d, [rsp + 8*8 + ((2+2)-((2+2)/(10))*(10))*4]
add ecx, r11d
movzx r11d, al
shr eax, 8
xor r10d, [rsi+1024+r11*4]
xor r10d, eax
imul edx, 54655307h
rol edx, 7
mov [rsp + 8*8 + ((2+0)-((2+0)/(10))*(10))*4], r10d
mov eax, [rsp + 8*8 + ((3+0)-((3+0)/(10))*(10))*4]
mov [rdi + (((3)-((3)/(4))*(4))*20 + (3/4)) * 4 + 80*4], eax
rol eax, 8
lea r11d, [r10d + ecx]
xor r11d, edx
mov [rdi + (((3)-((3)/(4))*(4))*20 + (3/4)) * 4], r11d
mov r11d, 1
and r11d, ecx
neg r11d
and r11d, r10d
xor r10d, eax
movzx eax, al
xor r10d, [rsi+rax*4]
mov eax, [rsp + 8*8 + ((3+3)-((3+3)/(10))*(10))*4]
xor r11d, [rsp + 8*8 + ((3+2)-((3+2)/(10))*(10))*4]
add edx, r11d
movzx r11d, al
shr eax, 8
xor r10d, [rsi+1024+r11*4]
xor r10d, eax
imul ecx, 54655307h
rol ecx, 7
mov [rsp + 8*8 + ((3+0)-((3+0)/(10))*(10))*4], r10d
mov eax, [rsp + 8*8 + ((4+0)-((4+0)/(10))*(10))*4]
mov [rdi + (((4)-((4)/(4))*(4))*20 + (4/4)) * 4 + 80*4], eax
rol eax, 8
lea r11d, [r10d + edx]
xor r11d, ecx
mov [rdi + (((4)-((4)/(4))*(4))*20 + (4/4)) * 4], r11d
mov r11d, 1
and r11d, edx
neg r11d
and r11d, r10d
xor r10d, eax
movzx eax, al
xor r10d, [rsi+rax*4]
mov eax, [rsp + 8*8 + ((4+3)-((4+3)/(10))*(10))*4]
xor r11d, [rsp + 8*8 + ((4+2)-((4+2)/(10))*(10))*4]
add ecx, r11d
movzx r11d, al
shr eax, 8
xor r10d, [rsi+1024+r11*4]
xor r10d, eax
imul edx, 54655307h
rol edx, 7
mov [rsp + 8*8 + ((4+0)-((4+0)/(10))*(10))*4], r10d
mov eax, [rsp + 8*8 + ((5+0)-((5+0)/(10))*(10))*4]
mov [rdi + (((5)-((5)/(4))*(4))*20 + (5/4)) * 4 + 80*4], eax
rol eax, 8
lea r11d, [r10d + ecx]
xor r11d, edx
mov [rdi + (((5)-((5)/(4))*(4))*20 + (5/4)) * 4], r11d
mov r11d, 1
and r11d, ecx
neg r11d
and r11d, r10d
xor r10d, eax
movzx eax, al
xor r10d, [rsi+rax*4]
mov eax, [rsp + 8*8 + ((5+3)-((5+3)/(10))*(10))*4]
xor r11d, [rsp + 8*8 + ((5+2)-((5+2)/(10))*(10))*4]
add edx, r11d
movzx r11d, al
shr eax, 8
xor r10d, [rsi+1024+r11*4]
xor r10d, eax
imul ecx, 54655307h
rol ecx, 7
mov [rsp + 8*8 + ((5+0)-((5+0)/(10))*(10))*4], r10d
mov eax, [rsp + 8*8 + ((6+0)-((6+0)/(10))*(10))*4]
mov [rdi + (((6)-((6)/(4))*(4))*20 + (6/4)) * 4 + 80*4], eax
rol eax, 8
lea r11d, [r10d + edx]
xor r11d, ecx
mov [rdi + (((6)-((6)/(4))*(4))*20 + (6/4)) * 4], r11d
mov r11d, 1
and r11d, edx
neg r11d
and r11d, r10d
xor r10d, eax
movzx eax, al
xor r10d, [rsi+rax*4]
mov eax, [rsp + 8*8 + ((6+3)-((6+3)/(10))*(10))*4]
xor r11d, [rsp + 8*8 + ((6+2)-((6+2)/(10))*(10))*4]
add ecx, r11d
movzx r11d, al
shr eax, 8
xor r10d, [rsi+1024+r11*4]
xor r10d, eax
imul edx, 54655307h
rol edx, 7
mov [rsp + 8*8 + ((6+0)-((6+0)/(10))*(10))*4], r10d
mov eax, [rsp + 8*8 + ((7+0)-((7+0)/(10))*(10))*4]
mov [rdi + (((7)-((7)/(4))*(4))*20 + (7/4)) * 4 + 80*4], eax
rol eax, 8
lea r11d, [r10d + ecx]
xor r11d, edx
mov [rdi + (((7)-((7)/(4))*(4))*20 + (7/4)) * 4], r11d
mov r11d, 1
and r11d, ecx
neg r11d
and r11d, r10d
xor r10d, eax
movzx eax, al
xor r10d, [rsi+rax*4]
mov eax, [rsp + 8*8 + ((7+3)-((7+3)/(10))*(10))*4]
xor r11d, [rsp + 8*8 + ((7+2)-((7+2)/(10))*(10))*4]
add edx, r11d
movzx r11d, al
shr eax, 8
xor r10d, [rsi+1024+r11*4]
xor r10d, eax
imul ecx, 54655307h
rol ecx, 7
mov [rsp + 8*8 + ((7+0)-((7+0)/(10))*(10))*4], r10d
mov eax, [rsp + 8*8 + ((8+0)-((8+0)/(10))*(10))*4]
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -