?? performs vselp vocoder codebook search.txt
字號(hào):
|| LDW .D2 *B14++,B2 ; *tmpPtr2,
|| MPY .M1X A13,B10,A11 ; p0 = *tmpPtr * *tmpPtr2,
|| MPYH .M2X A13,B10,B5 ; p1 = *tmpPtr * *tmpPtr2,
|| ADD .L1 A12,A11,A12 ; Dcurrent0 += p0,
|| ADD .L2 B7,B5,B7 ; Dcurrent1 += p1,
|| [A1] B .S1 LOOP2 ; for endPtr = tmpPtr+S_LEN
LDW .D1 *A10++,A0 ; *tmpPtr,
|| LDW .D2 *B14++,B10 ; *tmpPtr2,
|| MPY .M1X A0,B2,A11 ; p0 = *tmpPtr * *tmpPtr2,
|| MPYH .M2X A0,B2,B5 ; p1 = *tmpPtr * *tmpPtr2,
|| ADD .L1 A12,A11,A12 ; Dcurrent0 += p0,
|| ADD .L2 B7,B5,B7 ; Dcurrent1 += p1,
|| [A1] ADD .S1 -1,A1,A1 ; i--
; end of LOOP2
ADD .L1X A12,B7,A12 ; Dcurrent0 += Dcurrent1,
|| ADD .L2X A12,B7,B3 ; Dcurrent0 += Dcurrent1,
|| ADD .D1 A15,A14,A10 ; tmpPtr = wBasisPtr + (m-1) * S_LEN
|| ADD .D2 B12,B0,B14 ; tmpPtr2 = wBasisPtr + (j-1) * S_LEN
LDW .D1 *A10++,A13 ; *tmpPtr,
|| [A2] B .S1 LOOP2B ; for j = m+1 OR for m = 1
SHL .S1 A12,1,A12 ; 2.0 * Dcurrent
|| SHR .S2 B3,13,B3 ; scale 4.0 * Dcurrent
|| LDW .D2 *B14++,B10 ; *tmpPtr2,
|| LDW .D1 *A10++,A0 ; *tmpPtr,
ADD .L1 A5,A12,A5 ; G += 2.0 * Dcurrent
|| STH .D2 B3,*B11 ; *(D + m*Ddim + j) = 4.0 * Dcurrent
|| [B1] ADD .S2 2,B11,B11 ; D + m*Ddim + j (inc by 1 16 bit wd)
||[!B1] ADD .S1 -1,A2,A2 ; for m lp cntr = numBasis-1
||[!B1] ADD .L2X B11,A9,B11 ; D + m*Ddim + j (inc by 1 16 bit wd)
||[!B1] ADD .D1 2,A9,A9 ; inc index for D + m*Ddim + j calc
LDW .D1 *A10++,A13 ; *tmpPtr,
|| LDW .D2 *B14++,B2 ; *tmpPtr2,
|| MPY .M1 0,A11,A11 ; p0 = 0
|| MPY .M2 0,B5,B5 ; p1 = 0
|| [A2] B .S1 LOOP2 ; for LOOP2
||[!B1] SUB .L2X A4,B13,B1 ; for j lp cntr = numBasis-B13
|| [B1] ADD .S2 -1,B1,B1 ; dec for j lp cntr
LDW .D1 *A10++,A0 ; *tmpPtr,
|| LDW .D2 *B14++,B10 ; *tmpPtr2,
|| MPY .M1 0,A12,A12 ; Dcurrent0 = 0
|| MPY .M2 0,B7,B7 ; Dcurrent1 = 0
|| [B1] ADD .S2 -8,B14,B12 ; update B12 pointer
||[!B1] ADD .L1 -12,A10,A15 ; update A15 pointer
||[!A2] B .S1 START3 ; for m = 1 complete
LDW .D1 *A10++,A13 ; *tmpPtr,
|| LDW .D2 *B14++,B2 ; *tmpPtr2,
||[!B1] ADD .L2X B0,A15,B12 ; update B12 pointer
|| [A2] B .S1 LOOP2 ; for LOOP2
; end of OUTLOOP2
START3:
ADD .L1X 6,B6,A10 ; intPtr = TABLE + 1
|| SHR .S1 A3,16,A15 ; C>>16
|| LDH .D2 *+B6[1],A13 ; bitChanged = *++intPtr
|| MV .L2X A4,B12 ;
LDH .D2 *B6++[2],B13 ; codeWord = *intPtr
|| MPY .M1 A15,A15,A8 ; cSqrdBest = (C>>16) * (C>>16)
|| SHR .S1 A5,16,A5 ; G = G>>16
|| ZERO .D1 A4 ; wordSave = 0;
MV .L2X A3,B14 ; cSave = C
|| MVK .S1 1,A1 ;
|| MV .L1 A5,A15 ; gBest = G
MVK .S1 62,A0 ; set outer loop3 counter
ADD .L1X 2,B4,A12 ; R + 1
|| SHR .S1 A8,16,A8 ; cSqrdBest = ((C>>16) * (C>>16)>>16)
SHL .S1 A1,A13,A14 ; mask = 0x1 << bitChanged
|| LDH .D1 *+A12[A13],A11 ; *(R + bitChanged)
|| ADD .L1 1,A13,A12 ; bitChanged++
OUTLOOP3:
AND .L2X B13,A14,B2 ; codeWord & mask
|| ADD .L1 -1,A12,A1 ; loop counter = bitChanged - 1
|| ADD .D1 A7,A12,A12 ; j*Ddim + bitChanged
SHL .S1 A12,1,A12 ; used to scale offset
|| ADD .L1 1,A13,A13 ; bitChanged++
|| MV .L2X A7,B10 ; copy Ddim to other reg file
|| [B2] MVK .S2 1,B2 ; theta = !(!(codeWord & mask))
ADD .L2X B8,A12,B7 ; D + j*Ddim + bitChanged
|| [A1] ADD .D1 -1,A1,A1 ; decrement counter
||[!A1] SUB .L1 A2,A2,A2 ; prevents first LD from executing
|| [A1] MVK .S1 1,A2 ; allows first LD to execute
[A2] LDH .D2 *B7++[B10],B9 ; *(D + j*Ddim + bitChanged)
|| MVK .S1 1,A2 ; tmpMask = 1;
|| [A1] B .S2 LOOP3A ; for j=1
|| MV .L2 B2,B5 ; theta
AND .L2X B13,A2,B0 ; codeWord & tmpMask
|| [A1] ADD .D1 -1,A1,A1 ; decrement counter
|| MVK .S2 0,B9 ; zero initial load value
|| SHL .S1 A2,2,A2 ; tmpMask <<= 1
[B2] ADD .D1 A3,A11,A3 ; C += *(R + bitChanged)
||[!B2] SUB .L1 A3,A11,A3 ; C -= *(R + bitChanged)
|| LDH .D2 *B7++[B10],B9 ; *(D + j*Ddim + bitChanged)
|| [B0] MVK .S2 1,B2 ; !(!(codeWord & tmpMask))
||[!B0] SUB .L2 B2,B2,B2 ; !(!(codeWord & tmpMask))
|| [A1] B .S1 LOOP3A ; for j=1
|| MPY .M1 A13,A7,A12 ; bitChanged * Ddim
AND .L2 B13,2,B0 ; codeWord & tmpMask
|| [A1] ADD .D1 -1,A1,A1 ; decrement counter
LOOP3A:
LDH .D2 *B7++[B10],B9 ; *(D + j*Ddim + bitChanged)
|| [B0] MVK .S2 1,B2 ; !(!(codeWord & tmpMask))
||[!B0] MPY .M2 0,B2,B2 ; !(!(codeWord & tmpMask))
|| CMPEQ .L2 B5,B2,B1 ; (theta == !(!(codeWord & tmpMask)))
|| [B1] ADD .L1X A5,B9,A5 ; G += *(D + bitChanged*Ddim + j)
|| [A1] B .S1 LOOP3A ; for j=1
SHL .S1 A2,1,A2 ; tmpMask <<= 1
||[!B1] SUB .L1X A5,B9,A5 ; G -= *(D + bitChanged*Ddim + j)
|| AND .L2X B13,A2,B0 ; codeWord & tmpMask
|| [A1] ADD .D1 -1,A1,A1 ; decrement counter
; end of LOOP3A
ADD .L2X 1,A13,B7 ; j = bitChanged + 1
|| SHL .S1 A12,1,A12 ; used to scale offset
|| [B1] ADD .L1X A5,B9,A5 ; G += *(D + bitChanged*Ddim + j)
|| MVK .S2 0,B1 ; initialize condreg
ADDAH .D2 B8,B7,B7 ; j + D
|| SUB .L1X B12,A13,A1 ; loop cntr = numBasis - bitChanged
ADD .L2X B7,A12,B7 ; D + bitChanged*Ddim + j
|| [A1] ADD .D1 -1,A1,A1 ; loop cntr = numBasis - (bitChanged+1)
|| SHR .S1 A3,16,A12 ; C>>16
LDH .D2 *B7++,B9 ; *(D + bitChanged*Ddim + j)
|| [A1] B .S2 LOOP3B ; for bitChanged+1
|| MPY .M1 A12,A12,A11 ; (C>>16) * (C>>16)
SHL .S1 A14,1,A2 ; tmpMask = mask << 1;
|| [A1] ADD .D1 -1,A1,A1 ; decrement counter
|| MPY .M2X A0,1,B2 ; move outer lp cntr to B2
LDH .D2 *B7++,B9 ; *(D + bitChanged*Ddim + j)
|| AND .L2X B13,A2,B0 ; codeWord & tmpMask
|| [A1] B .S2 LOOP3B ; for j = bitChanged+1
|| SHR .S1 A11,16,A11 ; (C>>16) * (C>>16) >> 16
||[!B2] ADD .L1X 4, B15, A9 ; copy stack pointer at end of loop
[B0] MVK .S2 1,B0 ; !(!(codeWord & tmpMask))
|| MPY .M1 A11,A15,A12 ; ((C>>16) * (C>>16) >>16) * gBest
|| [A1] ADD .D1 -1,A1,A1 ; decrement counter
|| SHL .S1 A2,1,A2 ; tmpMask <<= 1
LOOP3B:
LDH .D2 *B7++,B9 ; *(D + bitChanged*Ddim + j)
|| SUB .S2 B5,B0,B1 ; (theta == !(!(codeWord & tmpMask)))
|| AND .L2X B13,A2,B0 ; codeWord & tmpMask
|| [B1] SUB .L1X A5,B9,A5 ; G -= *(D + bitChanged*Ddim + j)
|| [A1] B .S1 LOOP3B ; for j = bitChanged+1
[B0] MVK .S2 1,B0 ; !(!(codeWord & tmpMask))
||[!B1] ADD .L1X A5,B9,A5 ; G += *(D + bitChanged*Ddim + j)
|| [A1] ADD .D1 -1,A1,A1 ; decrement counter
|| SHL .S1 A2,1,A2 ; tmpMask <<= 1
; end of LOOP3B
[B2] B .S2 OUTLOOP3 ; for iePtr = intPtr
|| [B2] LDH .D2 *B6++[2],B13 ; codeWord = *intPtr
|| [B2] LDH .D1 *A10++[2],A13 ; bitChanged = *++intPtr
|| [B1] SUB .L1X A5,B9,A5 ; G -= *(D + bitChanged*Ddim + j)
MPY .M1 A8,A5,A6 ; cSqrdBest * G
||[!B2] LDW .D2 *B15++, B3 ; pop B3 off stack
||[!B2] LDW .D1 *A9++, A15 ; pop A15 off stack
ADD .L1 -1,A0,A0 ; dec OUTLOOP3 counter
|| MVK .S2 127,B11 ; load (1<<numBasis) - 1
|| MVK .S1 1,A14 ;
||[!B2] LDW .D1 *A9++[2], B10 ; pop B10 off stack
||[!B2] LDW .D2 *B15++[2], A14 ; pop A14 off stack
ADD .S1X 2,B4,A12 ; R + 1
|| CMPLT .L1 A6,A12,A1 ; (((C>>16) * (C>>16)) >>16) * gBest
; > cSqrdBest * G)
[A1] MPY .M1 A11,1,A8 ; cSqrdBest =(((C>>16) * (C>>16)) >> 16)
|| [A1] MV .L1X B13,A4 ; wordSave = codeWord
|| [A1] MV .S1 A5,A15 ; gBest = G
|| [A1] MV .S2X A3,B14 ; cSave = C
||[!B2] LDW .D1 *A9++[2], B12 ; pop B12 off stack
||[!B2] LDW .D2 *B15++[2], A13 ; pop A13 off stack
SHL .S1 A14,A13,A14 ; mask = 0x1 << bitChanged
|| LDH .D1 *+A12[A13],A11 ; *(R + bitChanged)
|| ADD .L1 1,A13,A12 ; bitChanged++
|| CMPGT .L2 0,B14,B0 ; if (cSave < 0.0)
; end of OUTLOOP3
[B0] XOR .L1X A4,B11,A4 ; wordSave ^= (1<<numBasis) - 1
||[!B2] LDW .D1 *A9++[2], B11 ; pop B11 off stack
||[!B2] LDW .D2 *B15++[2], A12 ; pop A12 off stack
B_END:
*** END Benchmark Timing ***
LDW .D1 *A9++[2], B14 ; pop B14 off stack
|| LDW .D2 *B15++[2], A11 ; pop A11 off stack
|| B .S2 B3
LDW .D1 *A9++[2], B13 ; pop B13 off stack
|| LDW .D2 *B15++[3], A10 ; pop A10 off stack
NOP 4
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -