?? idct_8x8.asm
字號:
* All levels of looping are collapsed into single loops which are *
* pipelined. The outer loop focuses on 8-pt IDCTs, whereas the *
* inner loop controls the column-pointer to handle jumps between *
* IDCT blocks. (The column-pointer adjustment is handled by a *
* four-phase rotating "fixup" constant which takes the place of *
* the original inner-loop.) *
* *
* For performance, portions of the outer-loop code have been *
* inter-scheduled with the prologs and epilogs of both loops. *
* Finally, cosine term registers are reused between the horizontal *
* and vertical loops to save the need for reinitialization. *
* *
* *
* ASSUMPTIONS *
* This is a LITTLE ENDIAN implementation. *
* *
* The input array must be aligned on a double-word boundary. *
* *
* *
* NOTES *
* This function is fully interruptible. *
* *
* CYCLES *
* num_idcts * 72 + 63 *
* *
* For num_idcts = 6, cycles = 495 *
* *
* CODESIZE *
* 736 bytes *
* *
* ------------------------------------------------------------------------- *
* Copyright (c) 2005 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
.text .global _idct_8x8_idct_8x8:
.asg A4, A_data
.asg B4, B_count
.asg B3, B_ret
* ========================================================================= *
* Horizon loop
* ========================================================================= *
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg B0, B_c
.asg A8, A_i_ptr
.asg B9, B_i_ptr
.asg A9, A_o_ptr
.asg B16, B_o_ptr
.asg A17, A_C71x
.asg B18, B_C35x
.asg A18, A_C44x
.asg B19, B_C62x
.asg A19, A_C00nx
.asg B25, B_F76
.asg B24, B_F54
.asg B23, B_F32
.asg B22, B_F10
.asg A23, A_F76
.asg A22, A_F54
.asg A21, A_F32
.asg A20, A_F10
.asg B22, B_F17
.asg B21, B_F53
.asg B7, B_F26
.asg B8, B_F04
.asg A20, A_F17
.asg A25, A_F53
.asg A7, A_F26
.asg A23, A_F04
.asg B27, B_Q1S1
.asg B26, B_Q0S0
.asg A27, A_Q1S1
.asg A26, A_Q0S0
.asg B23, B_p1p0
.asg B17, B_r1r0
.asg B27, B_g2h2
.asg B26, B_q0s0
.asg A21, A_p1p0
.asg A16, A_r1r0
.asg A27, A_g2h2
.asg A26, A_q0s0
.asg B29, B_g1g0
.asg B28, B_h1h0
.asg B24, B_h3g3
.asg A29, A_g1g0
.asg A28, A_h1h0
.asg A22, A_h3g3
.asg B24, B_h3h2
.asg B20, B_g3g2
.asg B30, B_f10s
.asg B31, B_f23
.asg B21, B_f67
.asg B20, B_f54s
.asg A24, A_h3h2
.asg A25, A_g3g2
.asg A30, A_f10s
.asg A31, A_f23
.asg A25, A_f67
.asg A24, A_f54s
.asg B21, B_f76s
.asg B31, B_f32s
.asg A25, A_f76s
.asg A31, A_f32s
* ========================================================================= *
.asg 0xA57E, cst_c0nx ;cos term -c0 (scaled by sqrt(2))
.asg 0x5A82, cst_c0x ;cos term c0 (scaled by sqrt(2))
.asg 0x58C5, cst_c1x ;cos term c1 (scaled by sqrt(2))
.asg 0x539F, cst_c2x ;cos term c2 (scaled by sqrt(2))
.asg 0x4B42, cst_c3x ;cos term c3 (scaled by sqrt(2))
.asg 0x4000, cst_c4x ;cos term c3 (scaled by sqrt(2))
.asg 0x3249, cst_c5x ;cos term c5 (scaled by sqrt(2))
.asg 0x22A3, cst_c6x ;cos term c6 (scaled by sqrt(2))
.asg 0x11A8, cst_c7x ;cos term c7 (scaled by sqrt(2))
SHL B_count, 2, B_c
|| MV A_data, A_i_ptr
|| MV A_data, B_i_ptr
|| MVKL .S1 cst_c4x, A_C44x
SUB B_c, 1, B_c
||[!B_c] B B_ret
SPLOOPD 6
|| MVC B_c, ILC
|| ADD B_i_ptr, 16, B_i_ptr
|| PACK2 A_C44x, A_C44x, A_C44x
*- Stage 0 -----------------------------------------------------------------*
SPMASK
|| LDDW .D2T2 *+B_i_ptr[1], B_F76:B_F54 ;[ 1,1]
||^ MVD .M1 A_data, A_o_ptr
||^ MVD .M2 B_i_ptr, B_o_ptr
SPMASK
|| LDDW .D1T1 *+A_i_ptr[1], A_F76:A_F54 ;[ 2,1]
|| LDDW .D2T2 *B_i_ptr++[4], B_F32:B_F10 ;[ 2,1]
||^ MVKL .S2 cst_c5x, B_C35x
||^ MVKL .S1 cst_c0nx, A_C00nx
SPMASK
||^ MVKLH .S1 cst_c0x, A_C00nx
||^ MVKLH .S2 cst_c3x, B_C35x
LDDW .D1T1 *A_i_ptr++[4], A_F32:A_F10 ;[ 4,1]
SPMASK
||^ MVKL .S1 cst_c1x, A_C71x
||^ MVKL .S2 cst_c2x, B_C62x
SPMASK
||^ MVKLH .S1 cst_c7x, A_C71x
||^ MVKLH .S2 cst_c6x, B_C62x
*- Stage 1 -----------------------------------------------------------------*
PACK2 .S2 B_F32, B_F76, B_F26 ;[ 7,1]
|| PACKH2 .L2 B_F54, B_F32, B_F53 ;[ 7,1]
PACK2 .S2 B_F10, B_F54, B_F04 ;[ 8,1]
|| CMPYR1 .M2 B_F53, B_C35x, B_Q0S0 ;[ 8,1]
PACK2 .S1 A_F32, A_F76, A_F26 ;[ 9,1]
|| PACKH2 .L1 A_F54, A_F32, A_F53 ;[ 9,1]
|| CMPYR1 .M2 B_F26, B_C62x, B_r1r0 ;[ 9,1]
PACK2 .S1 A_F10, A_F54, A_F04 ;[10,1]
|| PACKH2 .L1 A_F10, A_F76, A_F17 ;[10,1]
|| PACKH2 .S2 B_F10, B_F76, B_F17 ;[10,1]
|| CMPYR1 .M2X B_F04, A_C44x, B_p1p0 ;[10,1]
|| CMPYR1 .M1X A_F53, B_C35x, A_Q0S0 ;[10,1]
CMPYR1 .M1X A_F26, B_C62x, A_r1r0 ;[11,1]
CMPYR1 .M1 A_F04, A_C44x, A_p1p0 ;[12,1]
|| CMPYR1 .M2X B_F17, A_C71x, B_Q1S1 ;[12,1]
*- Stage 2 -----------------------------------------------------------------*
NOP 1
CMPYR1 .M1 A_F17, A_C71x, A_Q1S1 ;[14,1]
NOP 1
ADDSUB2 .L2 B_Q1S1, B_Q0S0, B_g2h2:B_q0s0 ;[16,1]
CMPYR1 .M2X B_q0s0, A_C00nx, B_h3g3 ;[17,1]
ADDSUB2 .L2 B_p1p0, B_r1r0, B_g1g0:B_h1h0 ;[18,1]
|| ADDSUB2 .L1 A_Q1S1, A_Q0S0, A_g2h2:A_q0s0 ;[18,1]
*- Stage 3 -----------------------------------------------------------------*
CMPYR1 .M1 A_q0s0, A_C00nx, A_h3g3 ;[19,1]
ADDSUB2 .L1 A_p1p0, A_r1r0, A_g1g0:A_h1h0 ;[20,1]
PACKLH2 .L2 B_h3g3, B_g2h2, B_g3g2 ;[21,1]
|| PACKHL2 .S2 B_h3g3, B_g2h2, B_h3h2 ;[21,1]
SUB2 .D2 B_g1g0, B_h3h2, B_f67 ;[22,1]
PACKLH2 .L1 A_h3g3, A_g2h2, A_g3g2 ;[23,1]
|| PACKHL2 .S1 A_h3g3, A_g2h2, A_h3h2 ;[23,1]
|| ADD2 .S2 B_h1h0, B_g3g2, B_f23 ;[23,1]
|| ADD2 .L2 B_g1g0, B_h3h2, B_f10s ;[23,1]
ADD2 .S1 A_g1g0, A_h3h2, A_f10s ;[24,1]
|| SUB2 .D2 B_h1h0, B_g3g2, B_f54s ;[24,1]
*- Stage 4 -----------------------------------------------------------------*
SUB2 .S1 A_h1h0, A_g3g2, A_f54s ;[25,1]
|| SUB2 .L1 A_g1g0, A_h3h2, A_f67 ;[25,1]
|| ADD2 .D1 A_h1h0, A_g3g2, A_f23 ;[25,1]
|| ROTL .M2 B_f67, 16, B_f76s ;[25,1]
SWAP2 .S1 A_f23, A_f32s ;[26,1]
|| SWAP2 .L2 B_f23, B_f32s ;[26,1]
STDW .D2T2 B_f76s:B_f54s, *+B_o_ptr[1] ;[27,1]
|| ROTL .M1 A_f67, 16, A_f76s ;[27,1]
NOP 1
STDW .D1T1 A_f76s:A_f54s, *+A_o_ptr[1] ;[29,1]
|| STDW .D2T2 B_f32s:B_f10s, *B_o_ptr++[4] ;[29,1]
SPKERNEL 3, 5
|| STDW .D1T1 A_f32s:A_f10s, *A_o_ptr++[4] ;[30,1]
* ========================================================================= *
* Vertical loop
* ========================================================================= *
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg B21, B_i_ptr
.asg A21, A_o_ptr
.asg B20, B_k_fix
.asg A22, A_k_fix
.asg A17, A_C71x
.asg B18, B_C35x
.asg A18, A_C44x
.asg B19, B_C62x
.asg A19, A_C00nx
.asg A23, A_rnd3
.asg B22, B_fx1
.asg A9, A_fx2
.asg B16, B_F77
.asg B8, B_F66
.asg B25, B_F55
.asg B23, B_F44
.asg B17, B_F33
.asg B9, B_F22
.asg B25, B_F11
.asg B16, B_F00
.asg B29, B_F17x
.asg B28, B_F17
.asg B25, B_F53x
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -