?? fir32_emac.s
字號:
;************************************************************************
;*
;* Copyright:
;* Freescale Semiconductor, INC. All Rights Reserved.
;* You are hereby granted a copyright license to use, modify, and
;* distribute the SOFTWARE so long as this entire notice is
;* retained without alteration in any modified and/or redistributed
;* versions, and that such modified versions are clearly identified
;* as such. No licenses are granted by implication, estoppel or
;* otherwise under any patents or trademarks of Freescale Semiconductor,
;* Inc. This software is provided on an "AS IS" basis and without warranty.
;*
;* To the maximum extent permitted by applicable law, FREESCALE
;* DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, INCLUDING
;* IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR
;* PURPOSE AND ANY WARRANTY AGAINST INFRINGEMENT WITH REGARD TO THE
;* SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) AND ANY
;* ACCOMPANYING WRITTEN MATERIALS.
;*
;* To the maximum extent permitted by applicable law, IN NO EVENT
;* SHALL FREESCALE BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING
;* WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS
;* INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
;* LOSS) ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
;*
;* Freescale assumes no responsibility for the maintenance and support
;* of this software
;**********************************************************************
;* FILENAME: fir32.s
;*
;* PURPOSE: FIR32 module source file, containing functions for allocating/deallocating
;* data structures for filter and computing a Finite Impulse
;* Responce filter.
;*
;* AUTHOR: original code was written by Andrey Butok,
;* optimized for eMAC unit by Dmitriy Karpenko
;*********************************************
.section .text ;-=Locate the code in the ".text" section.=-
;#define __EMAC_H
;#include "emac.h"
.ALIGN 4
.XDEF _FIR32_EMAC
;******************************************************
;* NAME: void FIR32( struct tIirStruct *pIIR, Frac32* pX, Frac32* pY, unsigned int n)
;*
;* DESCRIPTION: Computes a Finite Impulse Response (FIR) filter for a array of 32-bit fractional data values.
;******************************************************
;* a2 pFIR - Pointer to a data structure containing private data for the iir filter
;* 68(a7) pX - Pointer to the input vector of n data elements
;* 72(a7) pY - Pointer to the output vector of n data elements
;* d2 k - Counter for inner loop
;* d1 i - Counter for outer loop
;* d0 N - Length of coefficients vector(N<=n)
;* a0 pCurY - Pointer to the current Y
;* a1 pCurX - Pointer to the current X
;* a3 pCurCoef - Pointer to the current coefficient
;* a4 pCurHistory - Pointer to the current element of history buffer
;* a5 pPredY - Pointer to the previous Y
;******************************************************
_FIR32_EMAC:
;//Saving values of used registers
lea -60(a7),a7
movem.l d0-d7/a0-a6,(a7)
lea -4(a7),a7
;//Saving value of MAC status register
move.l MACSR, d0
move.l d0,(a7)
lea 4(a7),a7
;//initializing MAC mode
;#ifdef __FRACT_M
move.l #0x00000030,MACSR
;#else
;move.l #0x00000000,MACSR
;#endif
;//Most useful parameters are moved from stack to registers.
move.l 72(a7),a0 ;pCurY=pY; // Pointer to the current Y
move.l 68(a7),a1 ;pCurX=pX; // Pointer to the current X.
move.l 64(a7),a2 ;N=pFIR->iFirCoefCount;
move.l 4(a2),d0
;// Begin of getting Y[1]..Y[N]
move.l #0,ACC0 ;//accumulators' initialization
move.l #0,ACC1
move.l #0,ACC2
move.l #0,ACC3
;//computing a block of output samples from Y[1] to Y[N-N%4]
moveq.l #4,d1 ;for(i=4;i<=N;i+=4) { //Begin of outer loop #1
.FORi1:
cmp.l d0,d1 ; //Comparing i with N
bhi .ENDFORi1 ; //If (i>N) then jump to .ENDFORi1
move.l 68(a7),a6 ;pCurX=pX+i-4; //Current sample pointer initialization
lea (-16,a6,d1.l*4),a1
move.l (a2),a3 ;pCurCoef=pFIR->pFirCoef; //Current coefficient for input pointer initialization
movem.l (a1),d3-d6 ;d3=*pCurX++; d4=*pCurX++; d5=*pCurX++; d6=*pCurX; pCurX-=3;
move.l (a3)+,a6 ;a6=*pCurCoef++;
mac.l a6,d6,<<,-(a1),d6,ACC3 ;ACC3+=a6*d6; d6=*--pCurX; //getting next input sample
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d3,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d3; a6=*pCurCoef++;//getting next coefficient
;//cycle of multiplying 8 input samples on 4 coefficients per iteration
move.l #4,d2 ;for(k=4; k<i; k+=4) { //Begin of inner loop #1
.FORk1:
cmp.l d1,d2 ;//comparing k with i
bcc .ENDFORk1 ;//if (k>=i) then jump to .ENDFORk1
mac.l a6,d5,<<,-(a1),d5,ACC3 ;ACC3+=a6*d5; d5=*--pCurX;
mac.l a6,d4,<<,ACC2 ;ACC2+=a6*d4;
mac.l a6,d3,<<,ACC1 ;ACC1+=a6*d3;
mac.l a6,d6,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d6; a6=*pCurCoef++;
mac.l a6,d4,<<,-(a1),d4,ACC3 ;ACC3+=a6*d4; d4=*--pCurX;
mac.l a6,d3,<<,ACC2 ;ACC2+=a6*d3;
mac.l a6,d6,<<,ACC1 ;ACC1+=a6*d6;
mac.l a6,d5,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d5; a6=*pCurCoef++;
mac.l a6,d3,<<,-(a1),d3,ACC3 ;ACC3+=a6*d3; d3=*--pCurX;
mac.l a6,d6,<<,ACC2 ;ACC2+=a6*d6;
mac.l a6,d5,<<,ACC1 ;ACC1+=d6*d5;
mac.l a6,d4,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d4; a6=*pCurCoef++;
mac.l a6,d6,<<,-(a1),d6,ACC3 ;ACC3+=a6*d6; d6=*--pCurX;
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d3,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d3; a6=*pCurCoef++;
addq.l #4,d2 ;//k+=4
bra .FORk1 ;//jumping to .FORk1
.ENDFORk1: ;} //end of inner loop #1
;//multiplying 3 first input samples on 3 coefficients
mac.l a6,d3,<<,ACC1 ;ACC1+=a6*d3;
mac.l a6,d4,<<,ACC2 ;ACC2+=a6*d4;
mac.l a6,d5,<<,(a3)+,a6,ACC3 ;ACC3+=a6*d5; a6=*pCurCoef++;
mac.l a6,d3,<<,ACC2 ;ACC2+=a6*d3;
mac.l a6,d4,<<,(a3)+,a6,ACC3 ;ACC3+=a6*d4; a6=*pCurCoef++;
mac.l a6,d3,<<,ACC3 ;ACC3+=a6*d3;
;//Testing that history buffer is not empty => this is not the first calling of this subroutine
tst.l 12(a2) ;if (pFIR=>iFirHistoryCount>0) { //if #1
beq .ENDBUFx ;//if (pFIR=>iFirHistoryCount=0) then jump to .ENDBUFy
move.l 8(a2),a6 ;pCurX=pFIR->pFirHistory+i-4; //Current sample pointer initialization
lea (-16,a6,d1.l*4),a1
move.l (a2),a6 ;pCurCoef=pFIR->pFirCoef+N; //Current coefficient for input pointer initialization
lea (0, a6, d0.l*4), a3
move.l (a1)+,d3 ;d3=*pCurX++;
move.l (a1)+,d4 ;d4=*pCurX++;
move.l (a1)+,d5 ;d5=*pCurX++;
move.l (a1)+,d6 ;d6=*pCurX++;
move.l -(a3),a6 ;a6=*--pCurCoef;
cmp.l d0,d1 ;if (N=i) {//if #2
bne .CONT ;//if (N!=i) then jump to .CONT
;//multiplying 3 input samples from history buffer on 3 coefficients
mac.l a6,d3,<<,ACC0 ;ACC0+=a6*d3;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d5,<<,-(a3),a6,ACC2 ;ACC2+=a6*d5; a6=*--pCurCoef;
mac.l a6,d4,<<,ACC0 ;ACC0+=a6*d4;
mac.l a6,d5,<<,-(a3),a6,ACC1 ;ACC1+=a6*d5; a6=*--pCurCoef;
mac.l a6,d5,<<,ACC0 ;ACC0+=a6*d5;
bra .ENDBUFx ;//jump to .ENDBUFx
;} //end if #2
.CONT: ;if (N!=i) { //if #3
mac.l a6,d3,<<,(a1)+,d3,ACC0 ;ACC0+=a6*d3; d3=*pCurX++;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d6,<<,-(a3),a6,ACC3 ;ACC3+=a6*d6; a6=*--pCurCoef;
move.l d1,d2
addq.l #4,d2
;//cycle of multiplying 8 input samples from history buffer on 4 coefficients per iteration
.FORk11: ;for(k=i+4; k<N; k+=4) { //begin of inner loop #2
cmp.l d0,d2 ;//comparing k with N=-
bcc .ENDFORk11 ;//if (k>=N) then jump to .ENDFORk11
mac.l a6,d4,<<,(a1)+,d4,ACC0 ;ACC0+=a6*d4; d4=*pCurX++;
mac.l a6,d5,<<,ACC1 ;ACC1+=a6*d5;
mac.l a6,d6,<<,ACC2 ;ACC2+=a6*d6;
mac.l a6,d3,<<,-(a3),a6,ACC3 ;ACC3+=a6*d3; a6=*--pCurCoef;
mac.l a6,d5,<<,(a1)+,d5,ACC0 ;ACC0+=a6*d5; d5=*pCurX++;
mac.l a6,d6,<<,ACC1 ;ACC1+=a6*d6;
mac.l a6,d3,<<,ACC2 ;ACC2+=a6*d3;
mac.l a6,d4,<<,-(a3),a6,ACC3 ;ACC3+=a6*d4; a6=*--pCurCoef;
mac.l a6,d6,<<,(a1)+,d6,ACC0 ;ACC0+=a6*d6; d6=*pCurX++;
mac.l a6,d3,<<,ACC1 ;ACC1+=a6*d3;
mac.l a6,d4,<<,ACC2 ;ACC2+=a6*d4;
mac.l a6,d5,<<,-(a3),a6,ACC3 ;ACC3+=a6*d5; a6=*--pCurCoef;
mac.l a6,d3,<<,(a1)+,d3,ACC0 ;ACC0+=a6*d3; d3=*pCurX++;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d6,<<,-(a3),a6,ACC3 ;ACC3+=a6*d6; a6=*--pCurCoef;
addq.l #4,d2 ;//k+=4
bra .FORk11 ;//jumping to .FORk11
.ENDFORk11: ;} //end of inner loop #2
;//cycle of multiplying 4 input samples from history buffer on 1 coefficient per iteration
move.l d0,d2 ;//d2=(N-1)%4;
subq.l #1,d2
andi.l #3,d2
.FORk12: ;for(k=(N-1)%4; k>0; k--){//begin of inner loop #3=-
cmpi.l #0,d2 ;//comparing k with 0
beq .ENDFORk12 ;//if (k=0) then jump to .ENDFORk12
mac.l a6,d4,<<,ACC0 ;ACC0+=a6*d4;
mac.l a6,d5,<<,ACC1 ;ACC1+=a6*d5;
mac.l a6,d6,<<,ACC2 ;ACC2+=a6*d6;
mac.l a6,d3,<<,-(a3),a6,ACC3 ;ACC3+=a6*d3; a6=*--pCurCoef;
move.l d5,d4 ;d4=d5;
move.l d6,d5 ;d5=d6;
move.l d3,d6 ;d6=d3;
move.l (a1)+,d3 ;d3=*pCurX++;
subq.l #1,d2 ;//decrementing k
bra .FORk12 ;//jumping to .FORk12
.ENDFORk12: ;} //end of inner loop #3=-
;//multiplying 3 input samples on 3 coefficients
mac.l a6,d4,<<,ACC0 ;ACC0+=a6*d4;
mac.l a6,d5,<<,ACC1 ;ACC1+=a6*d5;
mac.l a6,d6,<<,-(a3),a6,ACC2 ;ACC2+=a6*d6; a6=*--pCurCoef;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -