?? inverse discrete cosine transform.txt
字號:
.asg A5, A_X5c5 ; X5 * c5
.asg A0, A_X5c7 ; X5 * c7
.asg A6, A_X7c1 ; X7 * c1
.asg A7, A_X7c3 ; X7 * c3
.asg A4, A_X7c5 ; X7 * c5
.asg A6, A_X7c7 ; X7 * c7
.asg A3, A_h2a ; X5 * c5 + X7 * c7
.asg B3, B_h2b ; X1 * c1 + X3 * c3
.asg B6, B_h2 ; Node h2 in signal flow graph
.asg A4, A_h3a ; X5 * c1 + X7 * c5
.asg B1, B_h3b ; X1 * c3 + X3 * c7
.asg A3, A_h3 ; Node h3 in signal flow graph
.asg A9, A_g3a ; X5 * c7 + X7 * c3
.asg B1, B_g3b ; X1 * c5 + X3 * c1
.asg B7, B_g3 ; Node g3 in signal flow graph
.asg A9, A_g2a ; X5 * c3 + X7 * c1
.asg B1, B_g2b ; X1 * c7 + X3 * c5
.asg A0, A_g2 ; Node g2 in signal flow graph
.asg B8, B_x0 ; Output x0, pre-saturate/truncate
.asg A1, A_x1 ; Output x1, pre-saturate/truncate
.asg B7, B_x2 ; Output x2, pre-saturate/truncate
.asg A4, A_x3 ; Output x3, pre-saturate/truncate
.asg A0, A_x4 ; Output x4, pre-saturate/truncate
.asg B4, B_x5 ; Output x5, pre-saturate/truncate
.asg A5, A_x6 ; Output x6, pre-saturate/truncate
.asg B6, B_x7 ; Output x7, pre-saturate/truncate
.asg B5, B_x0s ; Output x0, saturated to 9 bits
.asg A10, A_x1s ; Output x1, saturated to 9 bits
.asg B3, B_x2s ; Output x2, saturated to 9 bits
.asg A6, A_x3s ; Output x3, saturated to 9 bits
.asg A7, A_x4s ; Output x4, saturated to 9 bits
.asg B4, B_x5s ; Output x5, saturated to 9 bits
.asg A3, A_x6s ; Output x6, saturated to 9 bits
.asg B6, B_x7s ; Output x7, saturated to 9 bits
.asg B8, B_x0t ; Output x0, truncated to 9 bits
.asg A0, A_x1t ; Output x1, truncated to 9 bits
.asg B0, B_x2t ; Output x2, truncated to 9 bits
.asg A6, A_x3t ; Output x3, truncated to 9 bits
.asg A7, A_x4t ; Output x4, truncated to 9 bits
.asg B4, B_x5t ; Output x5, truncated to 9 bits
.asg A5, A_x6t ; Output x6, truncated to 9 bits
.asg B3, B_x7t ; Output x7, truncated to 9 bits
; ============================================================================
; ============================ PIPE LOOP PROLOG ==============================
v_prolog:
LDW .D2T1 *B0[1], A_o ; Unspill loop trip count
|| ADDK .S2 -128, B_o_ptr ; Fixup for vert loop
;-
LDW .D1T1 *+A_i_ptr[1], A_X7X6 ;[ 1,1]
|| LDW .D2T2 *-B_i_ptr[1], B_X1X0 ;[ 1,1]
ADDK .S1 -128, A_o_ptr ; Fixup for vert loop
; Set up modified constants for second loop
; Note: A_c7c5, B_c7c5 are in same regs both loops.
; Also, B_c2c1 reuses h_loop's B_c3c1.
LDW .D2T2 * B_i_ptr++[4], B_X3X2 ;[ 3,1]
|| LDW .D1T1 * A_i_ptr++[4], A_X5X4 ;[ 3,1]
MVKLH .S2 cst_c2, B_c2c1 ; c2 (B_c2c1 == B_c3c1)
|| MVKLH .S1 cst_c6, A_c6c3 ; c6
MVK .S2 8, B_i ; Inner loop counter.
;-
MPYHL .M1 A_X7X6, A_c6c3, A_X7c3 ;[ 6,1]
MPYH .M1 A_X7X6, A_c7c5, A_X7c7 ;[ 7,1]
|| MPYHL .M2 B_X1X0, B_c2c1, B_X1c1 ;[ 7,1]
MVK .S1 cst_c4, A_c1c4 ;[ 8,1]
|| MPYH .M1 A_X5X4, A_c7c5, A_X5c7 ;[ 8,1]
|| MPYHL .M2 B_X1X0, B_c7c5, B_X1c5 ;[ 8,1]
MPY .M1 A_X5X4, A_c1c4, A_P1 ;[ 9,1]
|| MPYHL .M2 B_X3X2, B_c2c1, B_X3c1 ;[ 9,1]
;-
ADD .D1 A_X5c7, A_X7c3, A_g3a ;[10,1]
|| MPYHL .M1 A_X5X4, A_c6c3, A_X5c3 ;[10,1]
|| MPYHL .M2X B_X3X2, A_c6c3, B_X3c3 ;[10,1]
SUB .L2 B_X1c5, B_X3c1, B_g3b ;[11,1]
|| MPYHL .M1 A_X5X4, A_c7c5, A_X5c5 ;[11,1]
|| MPY .M2X B_X1X0, A_c1c4, B_P0_t ;[11,1]
|| MVK .S2 -32768, B_rnd ;[ 6,1]
|| B .S1 v_loop_0 + 8 ; skip 2
;-
ADD .L2X B_g3b, A_g3a, B_g3 ;[12,1]
|| MPYHL .M1X A_X7X6, B_c2c1, A_X7c1 ;[12,1]
|| MPYH .M2 B_X3X2, B_c7c5, B_X3c7 ;[12,1]
|| LDW .D1T1 *+A_i_ptr[1], A_X7X6 ;[ 1,2]
|| LDW .D2T2 *-B_i_ptr[1], B_X1X0 ;[ 1,2]
|| B .S2 v_loop_1 + 8 ; skip 2
;-
SUB .D2 B_P0_t, B_rnd, B_P0 ;[13,1]
|| ADD .L2 B_X1c1, B_X3c3, B_h2b ;[13,1]
|| ADD .L1 A_X5c5, A_X7c7, A_h2a ;[13,1]
|| MPYLH .M1X A_X7X6, B_c2c1, A_X6c2 ;[13,1]
|| MPYLH .M2X B_X3X2, A_c6c3, B_X2c6 ;[13,1]
|| B .S2 v_loop_2 + 12 ; skip 3
|| MVKL .S1 cst_c1, A_c1c4 ;
;-
SUB .L1 A_X5c3, A_X7c1, A_g2a ;[14,1]
|| MPYHL .M1 A_X5X4, A_c1c4, A_X5c1 ;[14,1]
|| MPYHL .M2X B_X1X0, A_c6c3, B_X1c3 ;[14,1]
|| LDW .D2T2 * B_i_ptr++[4], B_X3X2 ;[ 3,2]
|| LDW .D1T1 * A_i_ptr++[4], A_X5X4 ;[ 3,2]
|| B .S2 v_loop_3 + 4 ; skip 1
|| ADD .S1X B_P0, A_P1, A_p0 ;[16,1]
;-
ADD .L2X B_h2b, A_h2a, B_h2 ;[15,1]
|| SUB .L1X B_P0, A_P1, A_p1 ;[15,1]
|| MPYHL .M1 A_X7X6, A_c7c5, A_X7c5 ;[15,1]
|| MPYLH .M2 B_X3X2, B_c2c1, B_X2c2 ;[15,1]
|| B .S2 v_loop_4 + 4 ; skip 1
SUB .L2X B_X2c6, A_X6c2, B_r1 ;[16,1]
|| MPYLH .M1 A_X7X6, A_c6c3, A_X6c6 ;[16,1]
|| MPYH .M2 B_X1X0, B_c7c5, B_X1c7 ;[16,1]
;-
; ===== Branch Occurs =====
; ============================ PIPE LOOP KERNEL ==============================
v_loop:
v_loop_0:
STH .D1T2 B_x7t, *+A_o_ptr[24] ;[28,1]
|| SHR .S1 A_x4s, trunc2, A_x4t ;[28,1]
|| ADD .L1 A_X5c1, A_X7c5, A_h3a ;[17,2]
|| SUB .D2 B_X1c3, B_X3c7, B_h3b ;[17,2]
|| SUB .L2X A_p1, B_r1, B_h1 ;[17,2]
|| MPYHL .M2 B_X3X2, B_c7c5, B_X3c5 ;[17,2]
|| MVK .S2 -32768, B_rnd ;[ 6,3]
|| MPYHL .M1 A_X7X6, A_c6c3, A_X7c3 ;[ 6,3]
v_loop_1:
STH .D1T1 A_x4t, * A_o_ptr++[1] ;[29,1]
|| SHR .S1 A_x1s, trunc2, A_x1t ;[29,1]
|| ADD .S2 B_h1, B_g3, B_x2 ;[18,2]
|| SUB .D2 B_h1, B_g3, B_x5 ;[18,2]
|| ADD .L1X A_p1, B_r1, A_g1 ;[18,2]
|| ADD .L2X B_X2c2, A_X6c6, B_r0 ;[18,2]
|| MPYH .M1 A_X7X6, A_c7c5, A_X7c7 ;[ 7,3]
|| MPYHL .M2 B_X1X0, B_c2c1, B_X1c1 ;[ 7,3]
v_loop_2:
[!B_i]ADDAW .D1 A_o_ptr, 28, A_o_ptr ;[30,1]
|| STH .D2T1 A_x3t, * B_o_ptr++[1] ;[30,1]
|| SHR .S2 B_x0s, trunc2, B_x0t ;[30,1]
|| SUB .L2 B_X1c7, B_X3c5, B_g2b ;[19,2]
|| SUB .L1X B_h3b, A_h3a, A_h3 ;[19,2]
|| MVK .S1 cst_c4, A_c1c4 ;[ 8,3]
|| MPYH .M1 A_X5X4, A_c7c5, A_X5c7 ;[ 8,3]
|| MPYHL .M2 B_X1X0, B_c7c5, B_X1c5 ;[ 8,3]
v_loop_3:
STH .D2T1 A_x1t, *-B_o_ptr[17] ;[31,1]
|| ADD .L2X A_p0, B_r0, B_g0 ;[20,2]
|| SSHL .S2 B_x5, satl, B_x5s ;[20,2]
|| SUB .S1X A_p0, B_r0, A_h0 ;[20,2]
|| SUB .L1 A_g1, A_h3, A_x6 ;[20,2]
|| ADD .D1 A_g1, A_h3, A_x1 ;[20,2]
|| MPY .M1 A_X5X4, A_c1c4, A_P1 ;[ 9,3]
|| MPYHL .M2 B_X3X2, B_c2c1, B_X3c1 ;[ 9,3]
v_loop_4:
STH .D2T2 B_x0t, *-B_o_ptr[25] ;[32,1]
|| SUB .S2 B_g0, B_h2, B_x7 ;[21,2]
|| ADD .L2 B_g0, B_h2, B_x0 ;[21,2]
|| ADD .L1X B_g2b, A_g2a, A_g2 ;[21,2]
|| SSHL .S1 A_x1, satl, A_x1s ;[21,2]
|| ADD .D1 A_X5c7, A_X7c3, A_g3a ;[10,3]
|| MPYHL .M1 A_X5X4, A_c6c3, A_X5c3 ;[10,3]
|| MPYHL .M2X B_X3X2, A_c6c3, B_X3c3 ;[10,3]
v_loop_5:
[ A_o]B .S1 v_loop ;[33,1]
||[!B_i]ADDAW .D2 B_o_ptr, 28, B_o_ptr ;[33,1]
|| SSHL .S2 B_x2, satl, B_x2s ;[22,2]
|| ADD .D1 A_h0, A_g2, A_x3 ;[22,2]
||[ A_o]SUB .L1 A_o, 1, A_o ;[22,2]
|| SUB .L2 B_X1c5, B_X3c1, B_g3b ;[11,3]
|| MPYHL .M1 A_X5X4, A_c7c5, A_X5c5 ;[11,3]
|| MPY .M2X B_X1X0, A_c1c4, B_P0_t ;[11,3]
v_loop_6:
SHR .S2 B_x5s, trunc2, B_x5t ;[23,2]
|| SUB .L1 A_h0, A_g2, A_x4 ;[23,2]
|| SSHL .S1 A_x6, satl, A_x6s ;[23,2]
|| ADD .L2X B_g3b, A_g3a, B_g3 ;[12,3]
|| MPYHL .M1X A_X7X6, B_c2c1, A_X7c1 ;[12,3]
|| MPYH .M2 B_X3X2, B_c7c5, B_X3c7 ;[12,3]
|| LDW .D1T1 *
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -