?? fft16x32.asm
字號(hào):
SPMASK
||^ STDW .D1T1 A_xl2_3o:A_xl2_2o, *++A_x__[A_h2_old] ;[0]
|| CMPGTU .L2X B_stride, A_radix, B_wh
|| SHRU .S2 B_stride, 2, B_stride ;
[B_wh] B .S2 LOOP_WHILE
NOP 4
CMPGTU .L2X B_stride, A_radix, B_wh
|| SHRU .S2 B_stride, 2, B_stride ;
; branch occurs here
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg B28, B_l1
.asg B9, B_j0
.asg A1, A_r2
.asg B7, B_p_y0
.asg B8, B_p_y1
.asg B16, B_p_y2
.asg B4, B_p_y3
.asg B18, B_j
.asg B19, B_h0
.asg B25, B_h1
.asg B20, B_h2
.asg B5, B_h3
.asg B22, B_h4
.asg A21, A_p_x0
.asg A17, A_x1
.asg A16, A_x0
.asg B17, B_p_x0
.asg B27, B_x3
.asg B26, B_x2
.asg A9, A_x5
.asg A8, A_x4
.asg B31, B_x7
.asg B30, B_x6
.asg A19, A_xh0_0
.asg A6, A_xh1_0
.asg B23, B_xh0_1
.asg B19, B_xh1_1
.asg B20, B_y0
.asg B24, B_y4
.asg B21, B_y1
.asg B25, B_y5
.asg A18, A_xl0_0
.asg A20, A_xl1_0
.asg B21, B_xl0_1
.asg B6, B_xl1_1
.asg A6, A_y2
.asg A7, A_y3
.asg A4, A_y6
.asg A5, A_y7
.asg A4, A_temp
; ====================== SYMBOLIC REGISTER ASSIGNMENTS =======================
.asg A24, A_SP
.asg A22, A_ptr_x
.asg A23, A_i
.asg B10, B_ptr_y
.asg B11, B_n
* ========================================================================= *
LDW .D2T2 *+B_SP[0], B_n ; Restore B_n
NOP 4
SHRU .S1 B_n, 2, A_i
SUB .L1 A_i, 1, A_i
SPLOOPD 6
|| ZERO .D2 B_j
|| SHRU .S1 A_radix, 2, A_radix ; restore A_radix
|| MVC .S2 A_i, ILC
* =========================== STAGE 0 ================================= *
SPMASK
|| DEAL .M2 B_j, B_h0 ;[ 1,1]
||^ LDW .D2T1 *+B_SP[1], A_ptr_x ; Restore A_ptr_x
SPMASK
||^ MVK .L2 4, B_j0
||^ SUB .L1 A_radix, 2, A_r2
||^ LDW .D2T2 *+B_SP[2], B_ptr_y ; Restore B_ptr_y
SPMASK
|| BITR .M2 B_h0, B_h1 ;[ 3,1]
||^[!A_r2] MVK .D2 8, B_j0
ADD .S2 B_j, B_j0, B_j ;[ 4,1]
SPMASK
||^ MV .S1X B_SP, A_SP ; Twin Stack Pointer
SPMASK
||^ ADD .S2 A_ptr_x, 8, B_p_x0
||^ MV .L1 A_ptr_x, A_p_x0
* =========================== STAGE 1 ================================= *
LDDW .D1T1 *A_p_x0++[2], A_x1:A_x0 ;[ 7,1]
|| LDDW .D2T2 *B_p_x0++[2], B_x3:B_x2 ;[ 7,1]
LDDW .D1T1 *A_p_x0++[2], A_x5:A_x4 ;[ 8,1]
|| ROTL .M2 B_h1, 16, B_h2 ;[ 8,1]
|| LDDW .D2T2 *B_p_x0++[2], B_x7:B_x6 ;[ 8,1]
SPMASK
||^ MV .L2 B_ptr_y, B_p_y0
SPMASK
||^ ADDAW .D2 B_p_y0, B_n, B_p_y2
SPMASK
||^ NORM .L2 B_n, B_l1
||^ ADDAH .D2 B_p_y2, B_n, B_p_y3
SPMASK
|| SHFL .M2 B_h2, B_h3 ;[12,1]
||^ ADD .S2 B_l1, 2, B_l1
||^ ADDAH .D2 B_p_y0, B_n, B_p_y1
* =========================== STAGE 2 ================================= *
SPMASK
||^[!A_r2] ADD .S2 B_p_y0, B_n, B_p_y1
SPMASK
||^[!A_r2] NORM .L2 B_n, B_l1
||^[!A_r2] ADD .S2 B_p_y2, B_n, B_p_y3
[!A_r2] ROTL .M1 A_x0, 0, A_xh0_0 ;[15,1]
|| ADD .S1 A_x4, A_x0, A_xh0_0 ;[15,1]
|| ADD .L2 B_x7, B_x3, B_xh1_1 ;[15,1]
|| ADD .S2 B_x6, B_x2, B_xh0_1 ;[15,1]
[!A_r2] ROTL .M1 A_x5, 0, A_xl1_0 ;[16,1]
|| SUB .L1 A_x1, A_x5, A_xl1_0 ;[16,1]
|| SUB .D1 A_x0, A_x4, A_xl0_0 ;[16,1]
||[!A_r2] ROTL .M2 B_x2, 0, B_xh0_1 ;[16,1]
|| SUB .L2 B_x2, B_x6, B_xl0_1 ;[16,1]
[!A_r2] ROTL .M1 A_x1, 0, A_xh1_0 ;[17,1]
|| ADD .D1 A_x5, A_x1, A_xh1_0 ;[17,1]
||[!A_r2] MV .L2 B_x7, B_xl0_1 ;[17,1]
||[!A_r2] ROTL .M2 B_x3, 0, B_xh1_1 ;[17,1]
|| SUB .S2 B_x3, B_x7, B_xl1_1 ;[17,1]
SPMASK
||[!A_r2] ROTL .M1 A_x4, 0, A_xl0_0 ;[18,1]
||[!A_r2] MV .L2 B_x6, B_xl1_1 ;[18,1]
|| SUB .S2X A_xh0_0, B_xh0_1, B_y4 ;[18,1]
||^[!A_r2] ADD .D2 B_l1, 1, B_l1
* =========================== STAGE 3 ================================= *
SHRU .S2 B_h3, B_l1, B_h4 ;[19,1]
|| ADD .L2X A_xh0_0, B_xh0_1, B_y0 ;[19,1]
|| ADD .S1X A_xl1_0, B_xl0_1, A_y7 ;[19,1]
|| SUB .L1X A_xl1_0, B_xl0_1, A_y3 ;[19,1]
[!A_r2] MV .S1 A_y7, A_y3 ;[20,1]
|| MV .L1 A_y3, A_temp ;[20,1]
|| ADD .S2X A_xh1_0, B_xh1_1, B_y1 ;[20,1]
|| SUB .L2X A_xh1_0, B_xh1_1, B_y5 ;[20,1]
STDW .D2T2 B_y1:B_y0, *B_p_y0[B_h4] ;[21,1]
|| SUB .D1X A_xl0_0, B_xl1_1, A_y6 ;[21,1]
||[!A_r2] MV .L1 A_temp, A_y7 ;[21,1]
ADD .S1X A_xl0_0, B_xl1_1, A_y2 ;[22,1]
|| STDW .D2T2 B_y5:B_y4, *B_p_y2[B_h4] ;[22,1]
STDW .D2T1 A_y3:A_y2, *B_p_y1[B_h4] ;[23,1]
SPKERNEL 1, 0
|| STDW .D2T1 A_y7:A_y6, *B_p_y3[B_h4] ;[24,1]
* =========================== END STAGE 3 ============================ *
LDDW .D1T1 *+A_SP[4], A11:A10 ; Restore A11,A10
|| LDW .D2T2 *+B_SP[3], B_ret ; Get return address
; cycle 2: D2,T2
LDDW .D2T2 *+B_SP[3], B11:B10 ; Restore B11,B10
|| LDDW .D1T1 *+A_SP[2], A15:A14 ; Restore A15,A14
; cycle 3
NOP
; cycle 4
NOP
; cycle 5
NOP
; cycle 6: D1,T2
LDDW .D1T2 *+A_SP[5], B13:B12 ; Restore B13,B12
; cycle 1: D1,T1,D2,T2
LDDW .D1T1 *+A_SP[6], A13:A12 ; Restore A13,A12
|| LDDW .D2T2 *+B_SP[5], B13:B12 ; Restore B13,B12
; cycle 2: D2,T2
LDW .D2T2 *++B_SP[16], B14 ; Restore B14
NOP
; cycle 4: S2
BNOP .S2 B_ret, 4 ; Return to caller
; end overlap with SPLOOP epilog
.end
* ======================================================================== *
* End of file: fft16x32.asm *
* ------------------------------------------------------------------------ *
* Copyright (C) 2005 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ======================================================================== *
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -