?? fft32x32.asm
字號:
SPMASK
||[B_wh] B .S2 LOOP_WHILE
||^ STDW .D2T1 A_xl1_3o:A_xl1_2o, *++B_y_old[B_h2_old_2];[33,1]
||^ STDW .D1T2 B_xl2_1o:B_xl2_0o, *A_y_old_2[A_h2_old_2];[33,1]
||^ SUB .S1 A_y_old_2, A_2h2, A_y_old_2 ;[33,1]
SPMASK
|| CMPGTU .L2 B_stride, B_radix, B_wh
|| ADD .L1X A_tw_offset, B_fft_jmp_temp,A_tw_offset
||^ STDW .D2T1 A_xl2_3o:A_xl2_2o, *B_y_old[B_h2_old_2] ;[34,1]
SPMASK
||^[!B_ifj] ADD .L1X A_y_old_2, B_fft_jmp_old_2, A_y_old_2 ;[35,1]
NOP
* ============ STAGE I,0 (prolog) + Outer Loop =======================*
SHRU .S2 B_stride, 2, B_stride
NOP
; branch occurs here
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg B16, B_l1
.asg B17, B_j0
.asg A1, A_r2
.asg B18, B_p_y0
.asg B19, B_p_y1
.asg B20, B_p_y2
.asg B21, B_p_y3
.asg B7, B_j
.asg B9, B_h0
.asg B22, B_h1
.asg B4, B_h2
.asg B5, B_h3
.asg B8, B_h4
.asg A5, A_p_x0
.asg A29, A_x1
.asg A28, A_x0
.asg B6, B_p_x0
.asg B27, B_x3
.asg B26, B_x2
.asg A31, A_x5
.asg A30, A_x4
.asg B29, B_x7
.asg B28, B_x6
.asg A17, A_xh0_0
.asg A16, A_xl0_0
.asg A25, A_xh1_0
.asg A24, A_xl1_0
.asg B25, B_xh0_1
.asg B24, B_xl0_1
.asg B31, B_xh1_1
.asg B30, B_xl1_1
.asg B9, B_xl1_1c
.asg A25, A_xh1_0
.asg B22, B_y0
.asg B23, B_y1
.asg B30, B_y4
.asg B31, B_y5
.asg A26, A_y2
.asg A27, A_y3
.asg A6, A_y6
.asg A7, A_y7
.asg A28, A_temp
.asg A18, A_radix
.asg A21, A_n
.asg B1, B_ptr_y
.asg B0, B_n
.asg A19, A_i
.asg B15, B_SP
.asg A20, A_ptr_x
* =========================== SETUP ============================== *
MV .S1X B_radix, A_radix
|| MV .S2X A_SP, B_SP ; Twin Stack Pointer
|| LDW .D1T1 *+A_SP[1], A_n ; Restore n
LDW .D1T1 *+A_SP[2], A_ptr_x ; Restore A_ptr_x
|| LDW .D2T2 *+B_SP[3], B_ptr_y ; Restore B_ptr_y
|| SHRU .S1 A_radix, 2, A_radix ; Restore A_radix
NOP 3
SHRU .S1 A_n, 2, A_i
|| SUB .L1 A_radix, 2, A_r2
SUB .L1 A_i, 1, A_i
|| MV .S2 A_n, B_n
SPLOOPD 6
|| MVC .S2 A_i, ILC
|| MV .D1 A_ptr_x, A_p_x0
|| ZERO .L2 B_j
;* =========================== STAGE 0 ============================== *
DEAL .M2 B_j, B_h0 ;[ 1,1]
SPMASK
||^ MVK .L2 4, B_j0
SPMASK
|| BITR .M2 B_h0, B_h1 ;[ 3,1]
||^ MV .L2 B_ptr_y, B_p_y0
||^[!A_r2] MVK .S2 8, B_j0
SPMASK
|| ADD .L2 B_j, B_j0, B_j ;[ 4,1]
||^ ADD .D2 A_ptr_x, 8, B_p_x0
SPMASK
||^ ADDAH .D2 B_p_y0, B_n, B_p_y1
||^ NORM .L2 B_n, B_l1
ROTL .M2 B_h1, 16, B_h2 ;[ 6,1]
|| LDDW .D1T1 *A_p_x0++[2], A_x1:A_x0 ;[ 6,1]
|| LDDW .D2T2 *B_p_x0++[2], B_x3:B_x2 ;[ 6,1]
;* =========================== STAGE 1 ============================== *
LDDW .D2T2 *B_p_x0++[2], B_x7:B_x6 ;[ 7,1]
|| LDDW .D1T1 *A_p_x0++[2], A_x5:A_x4 ;[ 7,1]
SPMASK
||^ ADDAW .D2 B_p_y0, B_n, B_p_y2
||^ ADD .L2 B_l1, 2, B_l1
SPMASK
||^[!A_r2] NORM .L2 B_n, B_l1
SPMASK
||^ ADDAH .D2 B_p_y2, B_n, B_p_y3
SPMASK
|| SHFL .M2 B_h2, B_h3 ;[11,1]
||^[!A_r2] ADD .D2 B_p_y2, B_n, B_p_y3
||^[!A_r2] ADD .L2 B_l1, 1, B_l1
SPMASK
||^[!A_r2] ADD .L2 B_p_y0, B_n, B_p_y1
;* =========================== STAGE 2 ============================== *
ADDSUB .L1 A_x1, A_x5, A_xh1_0:A_xl1_0 ;[13,1]
[!A_r2] ROTL .M2 B_x3, 0, B_xh1_1 ;[14,1]
|| ADDSUB .L1 A_x0, A_x4, A_xh0_0:A_xl0_0 ;[14,1]
|| ADDSUB .L2 B_x3, B_x7, B_xh1_1:B_xl1_1 ;[14,1]
MV .S2 B_xl1_1, B_xl1_1c ;[15,1]
||[!A_r2] ROTL .M1 A_x1, 0, A_xh1_0 ;[15,1]
|| ADDSUB .L2 B_x2, B_x6, B_xh0_1:B_xl0_1 ;[15,1]
[!A_r2] ROTL .M1 A_x5, 0, A_xl1_0 ;[16,1]
||[!A_r2] MV .S2 B_x7, B_xl0_1 ;[16,1]
||[!A_r2] ROTL .M2 B_x2, 0, B_xh0_1 ;[16,1]
[!A_r2] ROTL .M1 A_x4, 0, A_xl0_0 ;[17,1]
||[!A_r2] MV .L2 B_x6, B_xl1_1c ;[17,1]
|| SHRU .S2 B_h3, B_l1, B_h4 ;[17,1]
SUB .S1X A_xl1_0, B_xl0_1, A_y3 ;[18,1]
|| ADD .L1X A_xl1_0, B_xl0_1, A_y7 ;[18,1]
|| ADD .L2X A_xh1_0, B_xh1_1, B_y1 ;[18,1]
|| SUB .S2X A_xh1_0, B_xh1_1, B_y5 ;[18,1]
;* =========================== STAGE 3 ============================== *
SUB .L2X A_xh0_0, B_xh0_1, B_y4 ;[19,1]
|| ADD .S2X A_xh0_0, B_xh0_1, B_y0 ;[19,1]
|| SUB .S1X A_xl0_0, B_xl1_1c, A_y6 ;[19,1]
STDW .D2T2 B_y5:B_y4, *B_p_y2[B_h4] ;[20,1]
|| MV .D1 A_y3, A_temp ;[20,1]
|| ADD .S1X A_xl0_0, B_xl1_1c, A_y2 ;[20,1]
STDW .D2T2 B_y1:B_y0, *B_p_y0[B_h4] ;[21,1]
||[!A_r2] MV .S1 A_y7, A_y3 ;[21,1]
||[!A_r2] MV .D1 A_temp, A_y7 ;[21,1]
STDW .D2T1 A_y3:A_y2, *B_p_y1[B_h4] ;[22,1]
SPKERNEL 1, 0
|| STDW .D2T1 A_y7:A_y6, *B_p_y3[B_h4] ;[23,1]
;* =========================== END STAGE 3 ============================== *
* =========================== STAGE 3,2 + End Loop Code ============================== *
LDW .D2T2 *+B_SP[4], B_ret ; Get return address
|| LDDW .D1T1 *+A_SP[7], A13:A12
MVK .S2 64, B2
NOP
LDDW .D1T2 *+A_SP[6], B13:B12 ; Restore B13, B12
LDDW .D1T2 *+A_SP[4], B11:B10 ; Restore B11, B10
LDDW .D1T1 *+A_SP[3], A15:A14 ; Restore A15, A14
|| LDW .D2T2 *+B_SP[16], B14 ; Restore B14
* =========================== STAGE 3 + End Loop Code ============================== *
LDDW .D2T1 *+B_SP[5], A11:A10 ; Restore A11, A10
ADD .L2 B2, B_SP, B_SP
|| RETNOP .S2 B_ret, 4 ; Return to caller
; end overlap with EPILOG of SPLOOP
.end
* ======================================================================== *
* End of file: fft32x32.asm *
* ------------------------------------------------------------------------ *
* Copyright (C) 2005 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ======================================================================== *
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -