?? interpolate8x8_mmx.asm
字號:
lea ecx, [ecx+2*edx] AVG2_MMX_RND0 lea ecx, [ecx+2*edx] AVG2_MMX_RND0 lea ecx, [ecx+2*edx] AVG2_MMX_RND0 pop ebx ret.rounding1 mov eax, [esp + 4 + 24] ; height -> eax sub eax, 8 test eax, eax mov ecx, [esp + 4 + 4] ; dst -> edi mov eax, [esp + 4 + 8] ; src1 -> esi mov ebx, [esp + 4 + 12] ; src2 -> eax mov edx, [esp + 4 + 16] ; stride -> edx movq mm7, [mmx_one] jz near .start1 AVG2_MMX_RND1 lea ecx, [ecx+2*edx].start1 AVG2_MMX_RND1 lea ecx, [ecx+2*edx] AVG2_MMX_RND1 lea ecx, [ecx+2*edx] AVG2_MMX_RND1 lea ecx, [ecx+2*edx] AVG2_MMX_RND1 pop ebx ret;-----------------------------------------------------------------------------;; void interpolate8x8_avg4_mmx(uint8_t const *dst,; const uint8_t * const src1,; const uint8_t * const src2,; const uint8_t * const src3,; const uint8_t * const src4,; const uint32_t stride,; const uint32_t rounding);;;-----------------------------------------------------------------------------%macro AVG4_MMX_RND0 0 movq mm0, [eax] ; src1 -> mm0 movq mm1, [ebx] ; src2 -> mm1 movq mm2, mm0 movq mm3, mm1 pand mm2, [mmx_three] pand mm3, [mmx_three] pand mm0, [mmx_mask2] pand mm1, [mmx_mask2] psrlq mm0, 2 psrlq mm1, 2 lea eax, [eax+edx] lea ebx, [ebx+edx] paddb mm0, mm1 paddb mm2, mm3 movq mm4, [esi] ; src3 -> mm0 movq mm5, [edi] ; src4 -> mm1 movq mm1, mm4 movq mm3, mm5 pand mm1, [mmx_three] pand mm3, [mmx_three] pand mm4, [mmx_mask2] pand mm5, [mmx_mask2] psrlq mm4, 2 psrlq mm5, 2 paddb mm4, mm5 paddb mm0, mm4 paddb mm1, mm3 paddb mm2, mm1 paddb mm2, [mmx_two] pand mm2, [mmx_mask2] psrlq mm2, 2 paddb mm0, mm2 lea esi, [esi+edx] lea edi, [edi+edx] movq [ecx], mm0 ; (src1 + src2 + src3 + src4 + 2) / 4 -> dst%endmacro%macro AVG4_MMX_RND1 0 movq mm0, [eax] ; src1 -> mm0 movq mm1, [ebx] ; src2 -> mm1 movq mm2, mm0 movq mm3, mm1 pand mm2, [mmx_three] pand mm3, [mmx_three] pand mm0, [mmx_mask2] pand mm1, [mmx_mask2] psrlq mm0, 2 psrlq mm1, 2 lea eax,[eax+edx] lea ebx,[ebx+edx] paddb mm0, mm1 paddb mm2, mm3 movq mm4, [esi] ; src3 -> mm0 movq mm5, [edi] ; src4 -> mm1 movq mm1, mm4 movq mm3, mm5 pand mm1, [mmx_three] pand mm3, [mmx_three] pand mm4, [mmx_mask2] pand mm5, [mmx_mask2] psrlq mm4, 2 psrlq mm5, 2 paddb mm4, mm5 paddb mm0, mm4 paddb mm1, mm3 paddb mm2, mm1 paddb mm2, [mmx_one] pand mm2, [mmx_mask2] psrlq mm2, 2 paddb mm0, mm2 lea esi,[esi+edx] lea edi,[edi+edx] movq [ecx], mm0 ; (src1 + src2 + src3 + src4 + 2) / 4 -> dst%endmacroALIGN 16interpolate8x8_avg4_mmx: push ebx push edi push esi mov eax, [esp + 12 + 28] ; rounding test eax, eax mov ecx, [esp + 12 + 4] ; dst -> edi mov eax, [esp + 12 + 8] ; src1 -> esi mov ebx, [esp + 12 + 12] ; src2 -> eax mov esi, [esp + 12 + 16] ; src3 -> esi mov edi, [esp + 12 + 20] ; src4 -> edi mov edx, [esp + 12 + 24] ; stride -> edx movq mm7, [mmx_one] jnz near .rounding1 AVG4_MMX_RND0 lea ecx, [ecx+edx] AVG4_MMX_RND0 lea ecx, [ecx+edx] AVG4_MMX_RND0 lea ecx, [ecx+edx] AVG4_MMX_RND0 lea ecx, [ecx+edx] AVG4_MMX_RND0 lea ecx, [ecx+edx] AVG4_MMX_RND0 lea ecx, [ecx+edx] AVG4_MMX_RND0 lea ecx, [ecx+edx] AVG4_MMX_RND0 pop esi pop edi pop ebx ret.rounding1 AVG4_MMX_RND1 lea ecx, [ecx+edx] AVG4_MMX_RND1 lea ecx, [ecx+edx] AVG4_MMX_RND1 lea ecx, [ecx+edx] AVG4_MMX_RND1 lea ecx, [ecx+edx] AVG4_MMX_RND1 lea ecx, [ecx+edx] AVG4_MMX_RND1 lea ecx, [ecx+edx] AVG4_MMX_RND1 lea ecx, [ecx+edx] AVG4_MMX_RND1 pop esi pop edi pop ebx ret;-----------------------------------------------------------------------------;; void interpolate8x8_6tap_lowpass_h_mmx(uint8_t const *dst,; const uint8_t * const src,; const uint32_t stride,; const uint32_t rounding);;;-----------------------------------------------------------------------------%macro LOWPASS_6TAP_H_MMX 0 movq mm0, [eax] movq mm2, [eax+1] movq mm1, mm0 movq mm3, mm2 punpcklbw mm0, mm7 punpcklbw mm2, mm7 punpckhbw mm1, mm7 punpckhbw mm3, mm7 paddw mm0, mm2 paddw mm1, mm3 psllw mm0, 2 psllw mm1, 2 movq mm2, [eax-1] movq mm4, [eax+2] movq mm3, mm2 movq mm5, mm4 punpcklbw mm2, mm7 punpcklbw mm4, mm7 punpckhbw mm3, mm7 punpckhbw mm5, mm7 paddw mm2, mm4 paddw mm3, mm5 psubsw mm0, mm2 psubsw mm1, mm3 pmullw mm0, [mmx_five] pmullw mm1, [mmx_five] movq mm2, [eax-2] movq mm4, [eax+3] movq mm3, mm2 movq mm5, mm4 punpcklbw mm2, mm7 punpcklbw mm4, mm7 punpckhbw mm3, mm7 punpckhbw mm5, mm7 paddw mm2, mm4 paddw mm3, mm5 paddsw mm0, mm2 paddsw mm1, mm3 paddsw mm0, mm6 paddsw mm1, mm6 psraw mm0, 5 psraw mm1, 5 lea eax, [eax+edx] packuswb mm0, mm1 movq [ecx], mm0%endmacroALIGN 16interpolate8x8_6tap_lowpass_h_mmx: mov eax, [esp + 16] ; rounding movq mm6, [rounding_lowpass_mmx + eax * 8] mov ecx, [esp + 4] ; dst -> edi mov eax, [esp + 8] ; src -> esi mov edx, [esp + 12] ; stride -> edx pxor mm7, mm7 LOWPASS_6TAP_H_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_H_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_H_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_H_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_H_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_H_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_H_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_H_MMX ret;-----------------------------------------------------------------------------;; void interpolate8x8_6tap_lowpass_v_mmx(uint8_t const *dst,; const uint8_t * const src,; const uint32_t stride,; const uint32_t rounding);;;-----------------------------------------------------------------------------%macro LOWPASS_6TAP_V_MMX 0 movq mm0, [eax] movq mm2, [eax+edx] movq mm1, mm0 movq mm3, mm2 punpcklbw mm0, mm7 punpcklbw mm2, mm7 punpckhbw mm1, mm7 punpckhbw mm3, mm7 paddw mm0, mm2 paddw mm1, mm3 psllw mm0, 2 psllw mm1, 2 movq mm4, [eax+2*edx] sub eax, ebx movq mm2, [eax+2*edx] movq mm3, mm2 movq mm5, mm4 punpcklbw mm2, mm7 punpcklbw mm4, mm7 punpckhbw mm3, mm7 punpckhbw mm5, mm7 paddw mm2, mm4 paddw mm3, mm5 psubsw mm0, mm2 psubsw mm1, mm3 pmullw mm0, [mmx_five] pmullw mm1, [mmx_five] movq mm2, [eax+edx] movq mm4, [eax+2*ebx] movq mm3, mm2 movq mm5, mm4 punpcklbw mm2, mm7 punpcklbw mm4, mm7 punpckhbw mm3, mm7 punpckhbw mm5, mm7 paddw mm2, mm4 paddw mm3, mm5 paddsw mm0, mm2 paddsw mm1, mm3 paddsw mm0, mm6 paddsw mm1, mm6 psraw mm0, 5 psraw mm1, 5 lea eax, [eax+4*edx] packuswb mm0, mm1 movq [ecx], mm0%endmacroALIGN 16interpolate8x8_6tap_lowpass_v_mmx: push ebx mov eax, [esp + 4 + 16] ; rounding movq mm6, [rounding_lowpass_mmx + eax * 8] mov ecx, [esp + 4 + 4] ; dst -> edi mov eax, [esp + 4 + 8] ; src -> esi mov edx, [esp + 4 + 12] ; stride -> edx mov ebx, edx shl ebx, 1 add ebx, edx pxor mm7, mm7 LOWPASS_6TAP_V_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_V_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_V_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_V_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_V_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_V_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_V_MMX lea ecx, [ecx+edx] LOWPASS_6TAP_V_MMX pop ebx ret
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -