?? levinson.asm
字號:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs ( Micro Signal Architecture 1.0 specification).
By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software.
********************************************************************************
Module Name : levinson.asm
Label name : __levinson
Version : 1.2
Change History :
Version Date Author Comments
1.2 11/18/2002 Swarnalatha Tested with VDSP++ 3.0
compiler 6.2.2 on
ADSP-21535 Rev.0.2
1.1 11/13/2002 Swarnalatha Tested with VDSP++ 3.0
on ADSP-21535 Rev. 0.2
1.0 06/26/2001 Srinivas Original
Description : This program implements the double precision Levinson-Durbin
Algorithm.
The autocorrelation values r(i) are assumed to be in 1.31
format.
This function gives filter coeff
a(0),a(1).... . in 4.12 format.
a(i)[j] -> jth LPC in ith iteration.
Algorithm : Levinson-Durbin Algorithm
Eld(0)=r(0)
for i=1 to 10 do
a(i-1)[0] = 1
k(i) = - [summ{a(i-1)[j] * r(i-j)}]/Eld(i-1), j=0 to i-1
a(i)[i]=k(i)
for j=1 to i-1 do
a(i)[j] = a(i-1)[j] + k(i) * a(i-1)[i-j]
end
Eld(i) = {1-k(i)*k(i)}*Eld(i-1)
end
Prototype : void _levinson(fract32 *PtrAutoCorr, fract16 *PtrOutput,
fract16 *PtrReflCoeff);
PtrAutoCorr :- Pointer to autocorrelation buffer,
PtrOutput :- Pointer to the output buffer,
PtrReflCoeff :- Pointer to reflection coeff buffer.
Assumptions : (1) Order of the filter, nOrder is assumed to be 10.
Registers used : A0, A1, R0-R7, P0-P5, I0-I3, L0-L3, LC0, LC1.
Performance :
Code size : 344 bytes
Cycle count : 1283 Cycles
*******************************************************************************/
.section L1_code;
.global __levinson;
.align 8;
.extern __ld_div32;
__levinson:
[--SP] = (R7:4,P5:3);
[--SP] = RETS; //Store RETS register values in stack
L0 = 0;
L1 = 0;
L2 = 0;
P2 = R0; //The address of the Autocorr values r(0)
L3 = 0;
P3 = R1; //store address of the o/p buffer
P0 = -80; //Offset for stack buffers,
//memory required = 2*(nOrder)*4 bytes
I3 = R2; //PtrReflCoeff
R1 = [P2++]; //r[0]
SP = SP + P0; //Allocating the memory for temp buffers
P0 = 40; //Offset for single buffer
I0 = P2; //I0 pointing r[1]
R5 = R5 - R5 (NS) || R0 = [P2++];
//Load r(1);
P5 = SP; //Starting address of temp buffer1 in stack
P1 = P0 + P5; //Starting address for temp buffer2 in stack
I2 = P5; //I2 Pointing to a(i-1)[1]
/************************* Beginning of first iteration *******************/
R0 = -R0; //-r(1)
CALL __ld_div32; //k1 = -r(1)/Eld(0)
R4.H = 0X7FFF; //load 1 in 1.31 format
R4.L = 0XFFFF;
[--SP] = R4; //store 1 in stack
R2 = R0 >>> 4 || W[I3++] = R0.H;
//k1 in 5.27, Store Reflection coeff k1
A1 = R0.H * R0.L (M); //Higher 16 bits * Lower 16 bits
A1 = A1 >>>14; //shifted 14 times, result here should be twice A1
//and stored in A1.L
R3 = (A1 += R0.H * R0.H) || [I2] = R2;
//k1*k1, a(i-1)[1]= k1
R7 = R4 - R3 ; //(1-k1*k1)
P0 = 9; //To set loop counter to (nOrder-1)
A1 = R7.H * R1.L (M); //multiplication of R3.H & R1.L in mixed mode
A1 += R1.H * R7.L (M);
A1 = A1 >>> 15; //results of cross multiplication in lower 16 bits
R1 = (A1 += R7.H * R1.H);
//Eld(1)=(1-k1*k1)*Eld(0), 32 bit multiplication
//result
I1 = P1; //I1 pointing to ai(1)
R6.L = SIGNBITS R1; //Number of sign bits
R1 = ASHIFT R1 BY R6.L; //Normalize
M0 = 8;
P4 = 1; //store the iteration number
/******************************** 2 to nOrder iteration *********************/
LSETUP(LSTART0,LEND0) LC1=P0;
//(nOrder-1) times
LSTART0:A1=A0=0 || R0 = [I2++] || R7 = [I0--];
//summ(a*r)=0, a(i-1)[1], r[i-1]
/******************* Loop for reflection coeff calculation ******************/
/******************* summ -= a(i-1)[j] * r[i-j] *****************************/
LSETUP(LSTART1,LEND1) LC0=P4;
//(i-1)
LSTART1: R2 = (A0 -= R0.H * R7.H), A1 = R0.H * R7.L (M);
//summ(X.H*Y.H), X.H*Y.L
R3 = (A1 += R7.H * R0.L) (M)|| R0=[I2++] || R7=[I0--];
//X.H*Y.L + Y.H*X.L
R3 = R3 >>> 15; //cross product
LEND1: R5 = R5 + R3 (S);
//summ(X.H*Y.L + Y.H*X.L)
R0 = R2 - R5; //final result of summation of a and r product
R0 = R0 << 4 || R3 = [P2++];
//summ in 1.31, load r[i]
R0 = R0 - R3 (NS) || I2 -= M0;
//add (-r[i]), I2 pointing to a(i-1)[i-1]
CALL __ld_div32; //devide summ(a*r) by Eld(i-1)
I0 = P5; //I0 pointing to a(i-1)[1]
R0 = ASHIFT R0 BY R6.L || R2 = [I2--];
//Denormalize result k(i), a(i-1)[i-1]
/********************** Loop for LPC updation ******************************/
LSETUP(LSTART2,LEND2) LC0=P4;
//(i-1)
A1 = R0.H * R2.L (M);
//multiplication (k(i)*a(i-1)[i-j]) of R0.H & R2.L
//in mixed mode
LSTART2: A1 += R2.H * R0.L (M);
A1 = A1 >>>15 || R3 = [I0++];
//result in lower 16 bits, a(i-1)[j];
R7 = (A1 += R0.H * R2.H) || R2 = [I2--];
// k(i)*a(i-1)[i-j], a(i-1)[i-j]
R7 = R3 + R7; //a(i)[j] = a(i-1)[j] + k(i)a(i-1)[i-j],
LEND2: A1 = R0.H * R2.L (M) || [I1++] = R7;
//update the coeff values
I0 = P2; //I0 pointing to r[i]
R3 = R0 >>> 4 || W[I3++] = R0.H ;
//k(i) in 5.27, Store Reflection coeff in buffer
A1 = R0.H * R0.L (M) || I0 -= 4 || [I1] = R3;
//ki*ki, I0 points to r[i-1], a(i)[i]=k(i)
A1 = A1 >>> 14; //shifted 14 times result here should be twice A1
//and located in A1.L
R3 = (A1 += R0.H * R0.H) || R5 = [SP];
//ki*ki, load 1
R0 = R5 - R3 (NS); //(1-ki*ki)
P4 += 1; //Increment iteration counter (i-1)
A1 = R0.H * R1.L (M);
//multiplication of R0.H & R1.L in mixed mode
A1 += R1.H * R0.L (M);
A1 = A1 >>>15; //result in lower 16 bits
R1 = (A1 += R0.H * R1.H);
//Eld(i)=(1-ki*ki)Eld(i-1)
R5 = 0;
R3.L = SIGNBITS R1; //Get sign bits
R1 = ASHIFT R1 BY R3.L;
//Normalize Eld(i)
R6.L = R6.L + R3.L (S);
//Update TotalSignBits
R2 = P1; //Interchanging the old & new a's array
I2 = R2; //I2 pointing to old a's i.e. a(i-1)[1]
I1 = P5; //I1 pointing to new a's i.e. a(i)[1]
P1 = P5;
LEND0: P5 = R2; //a(i-1)[1]
TERMINATE:
R1.L = 4096; //1 in 4.12 format
P0 = 11; //nOrder
R0 = [I2++]; //load A[1]
/*********** Loop for copying the LPC values to o/p buffer **************/
LSETUP(STEND4,STEND4) LC1=P0;
//nOrder
STEND4:R1 = R0 >>> 15 || W[P3++] = R1 || R0 = [I2++];
//In 4.12 format, Copy LPC values to o/p buffer,
//load A[i]
P0 = 84;
SP = SP + P0; //releasing the memory allocated for temp buffer
RETS = [SP++];
(R7:4, P5:3) = [SP++]; // Pop the registers before returning.
RTS;
NOP; //to avoid one stall if LINK or UNLINK happens to be
//the next instruction after RTS in the memory.
__levinson.end:
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -