?? maxidx.asm
字號:
* ======================================================================== *
* TEXAS INSTRUMENTS, INC. *
* *
* NAME *
* maxidx -- maxidx *
* *
* *
* REVISION DATE *
* 22-Apr-2005 *
* *
* USAGE *
* *
* This routine is C callable, and has the following C prototype: *
* *
* int maxidx *
* ( *
* const short *Input, // Input array // *
* short nInputs // Length of input // *
* ) *
* *
* This routine returns the index of the maximum value of Input. *
* *
* *
* DESCRIPTION *
* *
* The "maxidx" routine finds the largest element in an array, *
* returning the index to that element within the array. *
* *
* The input array is treated as 8 separate "columns" that are *
* interleaved throughout the array. If values in different columns *
* are equal to the maximum value, then the element in the leftmost *
* column is returned. If two values within a column are equal to *
* the maximum, then the one with the lower index is returned. *
* Column takes precedence over index within the column. *
* *
* The function returns the index of the maximum value. *
* *
* int maxidx *
* ( *
* const short *Input, // Input array // *
* short nInputs // Length of input // *
* ) *
* *
* Input: pointer to input values *
* nInputs: number of inputs *
* *
* The above C code is a general implementation without *
* restrictions. The assembly code may have some restrictions, as *
* noted below. *
* *
* *
* TECHNIQUES *
* *
* The code is unrolled 8 times to enable full memory bandwidth to *
* be utilized and of the MAX2 instruction. This splits the search int *
* 8 subspaces. The global max is then found from the list of sub-maxe *
* Then using this offset from the subranges, the global max is search *
* for using a simple match then the global argument is found also . *
* For common maximums in multiple ranges, the argmax will be *
* different to the above c code, as described earlier. *
* More specific optimised c implementation would look like this: *
* *
* *
* ASSUMPTIONS *
* *
* The input length is a multiple of 8 and >= 40. *
* *
* The input data and coeeficients are stored on double word *
* aligned boundaries. *
* *
* *
* MEMORY NOTE *
* *
* There are no bank conflicts in this code. *
* *
* The code is ENDIAN NEUTRAL. *
* *
* *
* CODESIZE *
* *
* 192 bytes *
* *
* *
* CYCLES *
* *
* cycles = nInputs/4 + 20 *
* For count = 256, cycles = 84 *
* *
* ------------------------------------------------------------------------- *
* Copyright (c) 2005 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
* ======================================================================== *
* ======================================================================== *
************************ SYMBOLIC REGISTER ASSIGNMENTS ***********************
.asg A4, A_Input
.asg B2, B_Input
.asg B4, B_nInputs
.asg A6, A_MaxBuf
.asg A4, A_MaxIdx
.asg B8, B_MaxIdx
.asg B5, B_Input_
.asg A2, A_m1m0
.asg A3, A_m3m2
.asg B1, B_m7m6
.asg B0, B_m5m4
.asg A1, A_d3d2
.asg A0, A_d1d0
.asg B7, B_d7d6
.asg B6, B_d5d4
.asg A2, A_d0d1
.asg B4, B_d4d5
.asg A5, A_p0p0
.asg B6, B_p0p0
.asg B2, B_b5b4
.asg A1, A_b3b2
.asg B1, B_b7b6
.asg A5, A_max
.asg B2, B_argmax
.asg B4, B_arg
.asg A6, A_d0
.asg A0, A_c0
* ========================================================================= *
.text .global _maxidx_maxidx:
* ======================================================================== *
* ======================================================================== *
* ========================================================================= *
SHR .S2 B_nInputs, 3, B_nInputs ;N/8
LDDW .D1T1 *A_Input[0], A_m3m2:A_m1m0 ;
|| SUB .L2 B_nInputs, 5, B_nInputs ;N/8-5
SPLOOPD 1 ;
|| ADD .L2X A_Input, 8, B_Input ;
|| MV .D2X A_Input, B_Input_ ;
|| MVC .S2 B_nInputs, ILC ;
|| LDDW .D1T2 *A_Input[1], B_m7m6:B_m5m4 ;
* =========================== PIPE LOOP KERNEL ============================ *
LDDW .D2T2 *++B_Input[2], B_d7d6:B_d5d4 ;
|| LDDW .D1T1 *++A_Input[2], A_d3d2:A_d1d0 ;
NOP 4 ;
SPKERNEL 5,0
|| MAX2 .L2 B_d7d6, B_m7m6, B_m7m6 ;
|| MAX2 .S2 B_d5d4, B_m5m4, B_m5m4 ;
|| MAX2 .L1 A_d3d2, A_m3m2, A_m3m2
|| MAX2 .S1 A_d1d0, A_m1m0, A_m1m0 ;
* =========================== PIPE LOOP EPILOG ============================ *
MAX2 .L1 A_m1m0, A_m3m2, A_d1d0 ;
|| MAX2 .L2 B_m7m6, B_m5m4, B_d5d4 ;
|| ADD .D2 B_nInputs, 1, B_nInputs ;
MAX2 .L1X A_d1d0, B_d5d4, A_d1d0 ;
|| MAX2 .L2X B_d5d4, A_d1d0, B_d5d4 ;
PACKLH2.L1 A_d1d0, A_d1d0, A_d0d1 ;
|| PACKLH2.L2 B_d5d4, B_d5d4, B_d4d5 ;
|| MVC .S2 B_nInputs, ILC ;
MAX2 .L1 A_d1d0, A_d0d1, A_p0p0 ;
|| MAX2 .L2 B_d5d4, B_d4d5, B_p0p0 ;
|| CMPGT2 .S2 B_d4d5, B_d5d4, B_arg ;default 0/1
CMPEQ2 .S1 A_p0p0, A_m3m2, A_b3b2 ;
|| CMPEQ2 .S2 B_p0p0, B_m5m4, B_b5b4 ;
|| AND .L2 B_arg, 1, B_arg ;choose odd/even
CMPEQ2 .S2 B_p0p0, B_m7m6, B_b7b6 ;
|| SHRU .S1 A_p0p0, 16, A_max ;extract max
||[A_b3b2]ADD .L2 B_arg, 2, B_arg ;select 2/3
||[B_b5b4]ADD .D2 B_arg, 4, B_arg ;select 4/5
[B_b7b6]ADD .L2 B_arg, 6, B_arg ;select 6/7
SPLOOPD 1 ;define 1 cycle loop
|| LDHU .D2T1 *++B_Input_[B_arg], A_d0 ;load 1st max value
* ========================================================================== *
LDHU .D2T1 *++B_Input_[8], A_d0 ;
NOP 3 ;
SUB .L1 A_max, A_d0, A_c0 ;
SPKERNEL 0,0 ;
|| ADD .S2 B_arg, 8, B_arg ;
|| [!A_c0]MV .L2 B_arg, B_MaxIdx
* ========================================================================== *
**** E0, C0 **** .L1, .S2, .L2 ***********************************************
SPMASK
||^ BNOP .S2 B3, 4 ; Substitute S2
|| ADD .D2 B_arg, 8, B_arg ; Move ADD to D2
**** E1, C0 **** .L1, .S2, .L2 ***********************************************
**** E2, C0 **** .L1, .S2, .L2 ***********************************************
**** E3, C0 **** .L1, .S2, .L2 ***********************************************
**** E4, C0 **** .S2, .L2 ****************************************************
**** Epilogue Complete ****************************************************
MV .L1X B_MaxIdx, A_MaxIdx ;return val
.end
* ======================================================================== *
* End of file: maxidx.asm *
* ------------------------------------------------------------------------ *
* Copyright (C) 2005 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ======================================================================== *
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -