?? performs a dot product on 256 pairs of 9 element vectors.txt
字號:
*===============================================================================
*
* TEXAS INSTRUMENTS, INC.
*
* MINIMUM ERROR SEARCH LOOP
*
* Revision Date: 04/23/97
*
* USAGE This routine is C Callable and can be called as:
*
* int minerror(short *GSP0_TABLE, short *errCoefs,
* int savePtr_ret)
*
* *GSP0_TABLE -- GSP0 terms array
* *errCoefs -- array of error coefficients
* *savePtr_ret -- Index of pair of vectors giving max dotprod
*
* If routine is not to be used as a C callable function then
* you need to initialize values for all of the values passed
* as these are assumed to be in registers as defined by the
* calling convention of the compiler, (refer to the C compiler
* reference guide).
*
* C CODE
* This is the C equivalent of the assembly code. Note that
* the assembly code is hand optimized and restrictions may
* apply.
*
* int minerror(short *errCoefs, short *GSP0_TABLE,
* int savePtr_ret)
* {
* int val, maxVal;
* int i, j;
* short *tmpPtr;
* short *tmpPtr2;
* short *endPtr;
* short *endPtr2;
* short *savePtr;
*
* #define GSP0_TERMS 9
* #define GSP0_NUM 256
*
* maxVal = -50.0;
* tmpPtr = GSP0_TABLE;
* for (endPtr = tmpPtr + GSP0_TERMS*GSP0_NUM; tmpPtr < endPtr; ){
* val = 0;
* tmpPtr2 = errCoefs;
* for(endPtr2=tmpPtr2+GSP0_TERMS;tmpPtr2<endPtr2;tmpPtr2++){
* val += *tmpPtr * *tmpPtr2;
* tmpPtr++;
* }
* if (val > maxVal) {
* maxVal = val;
* savePtr = tmpPtr;
* }
* }
* savePtr_ret = (savePtr - GSP0_TABLE)*2;
* return (maxVal);
* }
*
*
*
* DESCRIPTION
*
* This the minimum energy error search which is a large
* part of the VSELP vocoder codebook search. It performs
* a dot product on 256 pairs of 9 element vectors and
* searches for the pair of vectors which produces the
* maximum dot product result.
*
*
* TECHNIQUES
*
* 1. The inner loop is unrolled 2 times
*
* ASSUMPTIONS
*
* 1. Number of error coefficients is 9
* 2. Number of GSP0 terms is 256
*
* MEMORY NOTE
*
* No memory bank hits given errCoefs & GSP0_TABLE are both on
* even or both on odd word boundaries (4 hits if not)
*
* CYCLES
*
* (256/2)*9 + 14 or 1166 cycles
*
*===============================================================================
.global _minerror
.text
_minerror:
STW .D2 B12,*B15 ; push B12 onto stack
STW .D2 B11,*--B15 ; push B11 onto stack
STW .D2 A15,*--B15[2] ; push A15 onto stack
*** BEGIN Benchmark Timing ***
B_START:
LDW .D1 *A4,A7 ; A7 = g(1) & g(0)
|| STW .D2 B13,*+B15[1] ; push B13 onto stack
MVK .S2 -42,B6 ; used for evaluating savePtr
|| LDW .D1 *+A4[2],A15 ; A15 = g(5) & g(4)
LDW .D2 *B4++,B0 ; B0 = x(1) & x(0)
|| LDW .D1 *+A4[1],A8 ; A8 = g(3) & g(2)
|| MV .L2 B4,B13 ; used for evaluating savePtr
|| MVK .S1 1,A1 ; A1 = 1
|| MVK .S2 -50,B11 ; set maxval = 0;
LDW .D2 *B4++,B0 ; B0 = x(3) & x(2)
|| MVK .S1 127,A2 ; initialize loop cntr (N-1)
LDW .D2 *B4++,B0 ; B0 = x(5) & x(4)
|| LDW .D1 *+A4[3],A5 ; A5 = g(7) & g(6)
LDW .D2 *B4++,B0 ; B0 = x(7) & x(6)
|| LDH .D1 *+A4[8],A0 ; A0 = g(8)
|| MVK .S2 0,B1 ; initialize compare reg
MVK .S2 0,B2 ; initialize val
|| LDW .D2 *B4++,B0 ; B0 = x(0) & x(8)
|| MV .L2X A15,B5 ; copy g(5) & g(4) to other reg file
OUTLOOP: ; OUTER LOOP BEGINS HERE
MPY .M1X B0,A7,A3 ; p0 = x(0) * g(0)
|| MPYH .M2X B0,A7,B8 ; p1 = x(1) * g(1)
|| ADD .L1 A3,A9,A9 ;* val0 += p0,
|| ADD .L2 B8,B7,B7 ;* val1 += p1,
|| LDW .D2 *B4++,B0 ; B0 = x(2) & x(1)
|| [B1] MV .S2 B2,B11 ;* make val = maxval
MPY .M1X B0,A8,A3 ; p0 = x(2) * g(2)
|| MPYH .M2X B0,A8,B8 ; p1 = x(3) * g(3)
|| LDW .D2 *B4++,B0 ; B0 = x(4) & x(3)
|| ADD .L1 A9,A3,A9 ;* val0 += p0,
|| ADD .L2 B7,B8,B7 ;* val1 += p1,
|| [B1] ADD .S2 B6,B4,B12 ;* update saveptr
MPY .M1X B0,A15,A3 ; p0 = x(4) * g(4)
|| MPYH .M2 B0,B5,B8 ; p1 = x(5) * g(5)
|| ADD .S1 0,A3,A9 ; val0 += p0,
|| ADD .S2 0,B8,B7 ; val1 += p1,
|| LDW .D2 *B4++,B0 ; B0 = x(6) & x(5)
|| ADD .L2X B7,A9,B2 ;* val = val0 + val1,
MPY .M1X B0,A5,A3 ; p0 = x(6) * g(6)
|| MPYH .M2X B0,A5,B8 ; p1 = x(7) * g(7)
|| ADD .L1 A3,A9,A9 ; val0 += p0,
|| ADD .S2 B8,B7,B7 ; val1 += p1,
|| LDW .D2 *B4++,B0 ; B0 = x(8) & x(7)
||[!A1] CMPGT .L2 B2,B11,B1 ;* compare val with maxval
|| [A2] B .S1 OUTLOOP ; for OUTLOOP
MPY .M1X B0,A0,A3 ; p0 = x(8) * g(8)
|| MPYHL .M2X B0,A7,B8 ; p0 = x(0) * g(0)
|| ADD .L1 A3,A9,A9 ; val0 += p0,
|| ADD .L2 B8,B7,B7 ; val1 += p1,
|| LDW .D2 *B4++,B0 ; B0 = x(1) & x(0)
|| [B1] MV .S2 B2,B11 ;* make maxval = val
MPYLH .M2X B0,A7,B8 ; p1 = x(1) * g(1)
|| MPYHL .M1X B0,A8,A3 ; p0 = x(2) * g(2)
|| LDW .D2 *B4++,B0 ; B0 = x(3) & x(2)
|| ADD .L1 A3,A9,A9 ; val0 += p0,
|| ADD .L2 B8,B7,B7 ; val1 += p1,
|| [B1] ADD .S2 B6,B4,B12 ;* update saveptr
MPYLH .M2X B0,A8,B8 ; p1 = x(3) * g(3)
|| MPYHL .M1X B0,A15,A3 ; p0 = x(4) * g(4)
|| LDW .D2 *B4++,B0 ; B0 = x(5) & x(4)
|| ADD .L1 A9,A3,A9 ; val0 += p0,
|| ADD .L2 0,B8,B9 ;* val1 += p1,
|| ADD .S1 -1,A2,A2 ; A2-- dec loop counter
|| [B1] ADD .S2 2,B12,B12 ;* update saveptr
MPYLH .M2 B0,B5,B8 ; p1 = x(5) * g(5)
|| MPYHL .M1X B0,A5,A3 ; p0 = x(6) * g(6)
|| ADD .D1 0,A3,A9 ; val0 = p0,
|| ADD .S2 B9,B8,B7 ; val1 = p1,
|| LDW .D2 *B4++,B0 ; B0 = x(7) & x(6)
|| [A1] ADD .S1 -1,A1,A1 ; A1-- dec priming counter
|| ADD .L2X B7,A9,B2 ;* val = val0 + val1,
MPYLH .M2X B0,A5,B8 ; p1 = x(7) * g(7)
|| MPYHL .M1X B0,A0,A3 ; p0 = x(8) * g(8)
|| ADD .L1 A9,A3,A9 ; val0 += p0,
|| ADD .S2 B7,B8,B7 ; val1 += p1,
|| LDW .D2 *B4++,B0 ; B0 = x(0) & x(8)
|| CMPGT .L2 B2,B11,B1 ;* compare val with maxval
* OUTLOOP ENDS HERE
ADD .L1 A9,A3,A9 ; val0 += p0,
|| ADD .L2 B7,B8,B7 ; val1 += p1,
|| LDW .D2 *B15++,A15 ; pop A15 off stack
|| [B1] MV .S2 B2,B11 ;* make val = maxval
ADD .L1 A9,A3,A9 ; val0 += p0,
|| ADD .L2 B7,B8,B7 ; val1 += p1,
|| LDW .D2 *B15++,B13 ; pop B13 off stack
|| [B1] ADD .S2 B6,B4,B12 ;* update saveptr
ADD .L2X B7,A9,B2 ; val0 += val1,
|| LDW .D2 *B15++,B11 ; pop B11 off stack
|| B .S2 B3
|| MV .S1X B11,A4 ; A4 returns maxVal
CMPGT .L2 B2,B11,B1 ; compare val with maxval
|| LDW .D2 *B15,B12 ; pop B12 off stack
|| [B1] ADD .S2 B12,4,B12 ; updata saveptr
[B1] MV .L1X B2,A4 ; make val = maxval
|| [B1] ADD .D2 -20,B4,B12 ; update saveptr
SUB .S2 B12,B13,B12 ; update saveptr
STW .D1 B12,*A6 ; store maxVal ptr
B_END:
*** END Benchmark Timing ***
NOP
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -