?? predict-a.asm
字號:
;*****************************************************************************;* predict-a.asm: h264 encoder library;*****************************************************************************;* Copyright (C) 2005 x264 project;*;* Authors: Loren Merritt <lorenm@u.washington.edu>;*;* This program is free software; you can redistribute it and/or modify;* it under the terms of the GNU General Public License as published by;* the Free Software Foundation; either version 2 of the License, or;* (at your option) any later version.;*;* This program is distributed in the hope that it will be useful,;* but WITHOUT ANY WARRANTY; without even the implied warranty of;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;* GNU General Public License for more details.;*;* You should have received a copy of the GNU General Public License;* along with this program; if not, write to the Free Software;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.;*****************************************************************************BITS 64;=============================================================================; Macros and other preprocessor constants;=============================================================================%include "amd64inc.asm"%macro STORE8x8 2 movq [parm1q + 0*FDEC_STRIDE], %1 movq [parm1q + 1*FDEC_STRIDE], %1 movq [parm1q + 2*FDEC_STRIDE], %1 movq [parm1q + 3*FDEC_STRIDE], %1 movq [parm1q + 4*FDEC_STRIDE], %2 movq [parm1q + 5*FDEC_STRIDE], %2 movq [parm1q + 6*FDEC_STRIDE], %2 movq [parm1q + 7*FDEC_STRIDE], %2%endmacro%macro STORE16x16 2 mov eax, 4ALIGN 4.loop: movq [parm1q + 1*FDEC_STRIDE], %1 movq [parm1q + 2*FDEC_STRIDE], %1 movq [parm1q + 3*FDEC_STRIDE], %1 movq [parm1q + 4*FDEC_STRIDE], %1 movq [parm1q + 1*FDEC_STRIDE + 8], %2 movq [parm1q + 2*FDEC_STRIDE + 8], %2 movq [parm1q + 3*FDEC_STRIDE + 8], %2 movq [parm1q + 4*FDEC_STRIDE + 8], %2 dec eax lea parm1q, [parm1q + 4*FDEC_STRIDE] jnz .loop nop%endmacroSECTION .rodata align=16ALIGN 16pw_2: times 4 dw 2pw_4: times 4 dw 4pw_8: times 4 dw 8pw_3210: dw 0 dw 1 dw 2 dw 3ALIGN 16pb_1: times 16 db 1pb_00s_ff: times 8 db 0pb_0s_ff: times 7 db 0 db 0xff;=============================================================================; Code;=============================================================================SECTION .textcglobal predict_4x4_ddl_mmxextcglobal predict_4x4_vl_mmxextcglobal predict_8x8_v_mmxextcglobal predict_8x8_dc_mmxextcglobal predict_8x8_dc_top_mmxextcglobal predict_8x8_dc_left_mmxextcglobal predict_8x8_ddl_mmxextcglobal predict_8x8_ddl_sse2cglobal predict_8x8_ddr_sse2cglobal predict_8x8_vl_sse2cglobal predict_8x8_vr_core_mmxextcglobal predict_8x8c_v_mmxcglobal predict_8x8c_dc_core_mmxextcglobal predict_8x8c_p_core_mmxextcglobal predict_16x16_p_core_mmxextcglobal predict_16x16_v_mmxcglobal predict_16x16_dc_core_mmxextcglobal predict_16x16_dc_top_mmxext; dest, left, right, src, tmp; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2%macro PRED8x8_LOWPASS0 6 mov%6 %5, %2 pavgb %2, %3 pxor %3, %5 mov%6 %1, %4 pand %3, [pb_1 GLOBAL] psubusb %2, %3 pavgb %1, %2%endmacro%macro PRED8x8_LOWPASS 5 PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, q%endmacro%macro PRED8x8_LOWPASS_XMM 5 PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, dqa%endmacro;-----------------------------------------------------------------------------; void predict_4x4_ddl_mmxext( uint8_t *src );-----------------------------------------------------------------------------ALIGN 16predict_4x4_ddl_mmxext: sub parm1q, FDEC_STRIDE movq mm3, [parm1q] movq mm1, [parm1q-1] movq mm2, mm3 movq mm4, [pb_0s_ff GLOBAL] psrlq mm2, 8 pand mm4, mm3 por mm2, mm4 PRED8x8_LOWPASS mm0, mm1, mm2, mm3, mm5%assign Y 1%rep 4 psrlq mm0, 8 movd [parm1q+Y*FDEC_STRIDE], mm0%assign Y (Y+1)%endrep ret;-----------------------------------------------------------------------------; void predict_4x4_vl_mmxext( uint8_t *src );-----------------------------------------------------------------------------ALIGN 16predict_4x4_vl_mmxext: movq mm1, [parm1q-FDEC_STRIDE] movq mm3, mm1 movq mm2, mm1 psrlq mm3, 8 psrlq mm2, 16 movq mm4, mm3 pavgb mm4, mm1 PRED8x8_LOWPASS mm0, mm1, mm2, mm3, mm5 movd [parm1q+0*FDEC_STRIDE], mm4 movd [parm1q+1*FDEC_STRIDE], mm0 psrlq mm4, 8 psrlq mm0, 8 movd [parm1q+2*FDEC_STRIDE], mm4 movd [parm1q+3*FDEC_STRIDE], mm0 ret;-----------------------------------------------------------------------------; void predict_8x8_v_mmxext( uint8_t *src, uint8_t *edge );-----------------------------------------------------------------------------ALIGN 16predict_8x8_v_mmxext: movq mm0, [parm2q+16] STORE8x8 mm0, mm0 ret;-----------------------------------------------------------------------------; void predict_8x8_dc_mmxext( uint8_t *src, uint8_t *edge );;-----------------------------------------------------------------------------ALIGN 16predict_8x8_dc_mmxext: pxor mm0, mm0 pxor mm1, mm1 psadbw mm0, [parm2q+7] psadbw mm1, [parm2q+16] paddw mm0, [pw_8 GLOBAL] paddw mm0, mm1 psrlw mm0, 4 pshufw mm0, mm0, 0 packuswb mm0, mm0 STORE8x8 mm0, mm0 ret;-----------------------------------------------------------------------------; void predict_8x8_dc_top_mmxext( uint8_t *src, uint8_t *edge );;-----------------------------------------------------------------------------ALIGN 16predict_8x8_dc_top_mmxext: pxor mm0, mm0 psadbw mm0, [parm2q+16] paddw mm0, [pw_4 GLOBAL] psrlw mm0, 3 pshufw mm0, mm0, 0 packuswb mm0, mm0 STORE8x8 mm0, mm0 ret;-----------------------------------------------------------------------------; void predict_8x8_dc_left_mmxext( uint8_t *src, uint8_t *edge );;-----------------------------------------------------------------------------ALIGN 16predict_8x8_dc_left_mmxext: pxor mm0, mm0 psadbw mm0, [parm2q+7] paddw mm0, [pw_4 GLOBAL] psrlw mm0, 3 pshufw mm0, mm0, 0 packuswb mm0, mm0 STORE8x8 mm0, mm0 ret;-----------------------------------------------------------------------------; void predict_8x8_ddl_mmxext( uint8_t *src, uint8_t *edge );-----------------------------------------------------------------------------ALIGN 16predict_8x8_ddl_mmxext: movq mm5, [parm2q+16] movq mm2, [parm2q+17] movq mm3, [parm2q+23] movq mm4, [parm2q+25] movq mm1, mm5 psllq mm1, 8 PRED8x8_LOWPASS mm0, mm1, mm2, mm5, mm7 PRED8x8_LOWPASS mm1, mm3, mm4, [parm2q+24], mm6%assign Y 7%rep 6 movq [parm1q+Y*FDEC_STRIDE], mm1 movq mm2, mm0 psllq mm1, 8 psrlq mm2, 56 psllq mm0, 8 por mm1, mm2%assign Y (Y-1)%endrep movq [parm1q+Y*FDEC_STRIDE], mm1 psllq mm1, 8 psrlq mm0, 56 por mm1, mm0%assign Y (Y-1) movq [parm1q+Y*FDEC_STRIDE], mm1 ret;-----------------------------------------------------------------------------; void predict_8x8_ddl_sse2( uint8_t *src, uint8_t *edge );-----------------------------------------------------------------------------ALIGN 16predict_8x8_ddl_sse2: movdqa xmm3, [parm2q+16] movdqu xmm2, [parm2q+17] movdqa xmm1, xmm3 pslldq xmm1, 1 PRED8x8_LOWPASS_XMM xmm0, xmm1, xmm2, xmm3, xmm4
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -