?? cfft2_21160.asm
字號:
/*___________________________________________________________________________
CFFT2_21160.ASM ADSP-21160 Radix-2 DIT Complex FFT
Does a radix-2 FFT of length 64 or greater on input data x(n).
N 1st half of normal-ordered complex input stored in DM
N 2nd half of normal-ordered complex input stored in PM
N real part of fft stored in DM
N imag part of fft stored in PM
N Interleaved Sin and Cos table stored in DM
Based on FFTRAD2 by Kapriel Karagozian Analog Devices DSP Div. 1-800-ANALOGD
Author: 25-APR-91 Ronnin Yee Analog Devices DSP Div. 1-800-ANALOGD
Modified: 11/98 Richard Grafton Analog Devices DSP Div. 1-800-ANALOGD
(converted for ADSP-21160)
Modified: 4/99 Richard Grafton Analog Devices DSP Div. 1-800-ANALOGD
(optimized for SIMD of ADSP-21160)
Modified: 6/00 Philip Giordano Resolved illegal mixed word dual-data SIMD access
extra cycle introduced to resolve access
(must further optimize this access)
Code verified on Rev 0.1 silicon and 4.1.2 tools release,
simulator version 2.0.2.1
Calling Information:
dm(twiddle[N]) - Interleaved sin(2pi*n/N) table and cos(2pi*n/N) table from Twiddle.exe
program, twiddle factors interleaved as cos[0], sin[0], cos[1], sin[1] ...
stored in long-word space
dm(cmplx1[N]) - 1st half of complex input array stored in dm
pm(cmplx2[N]) - 2nd half of complex input array stored in pm
(Note: Because the bit reversed address mode is used with the array
cmplx1 and the array cmplx2, they must start at addresses that are integer multiples of
the length (N) of the transform, (i.e. 0,N,2N,3N,...). This is accomplished by
specifying the segment starting at that addresses in the linker description file
and placing the variable alone in its own segment. These addresses must
also be reflected in the preprocessor variables IREDM and IREPM respectively
in bit reversed format.)
Results:
dm(refft[N]) - real working array and output
pm(imfft[N]) - imaginary working array and output
Benchmarks:
FFT Length cycles (SIMD) time (us) 80MHz
---------- ------------- ---------------
64 640 8
128 1245 15.56
256 2554 31.93
512 5399 67.49
1024 11572 144.65
2048 24913 311.41
4096 53614 670.18
8192 115083 1438.54
First 2 Stages - 5 cycles per 4 butterflies
Middle Stages - 4 cycles per 2 butterflies
2nd to Last Stage - 10 cycles per 4 butterflies
Last FFT Stage - 6 cycles per 2 butterflies
Conversion Stage - 6 cycles per 2 elements (N/2 elements total)
Memory Usage:
pm code = 163 words, pm data = 2*N words, dm data = 3*N words
____________________________________________________________________________*/
/* Include for symbolic definition of system register bits */
#include "def21160.h"
/*_________The constants below must be changed for different length FFTs______
N = number of points in the FFT, must be a power of 2
STAGES = log2(N)
BRMODIFY = bitrev(32 bit N/2)
IREDM = bitrev(32 bit addr of input real in dm), addr is 0,N,2N,3N,...
IREPM = bitrev(32 bit addr of input imag in pm), addr is 0,N,2N,3N,...
____________________________________________________________________________*/
#define N 256
#define STAGES 8
#define BRMODIFY 0x01000000
#define IREDM 0x0008A000
#define IREPM 0x00012000
/*________These constants are independent of the number of points____________*/
#define BFLY8 4 /*Offset between bf branches in a group of 8*/
.SEGMENT/DM seg_dm64;
.VAR twiddle[N] = "twiddle.dat"; /* twiddle factors, from RFFT2TBL, interleaved as */
/* cos[0] sin[0], cos[1], sin[1] ... */
.ENDSEG;
.SEGMENT/DM seg_dmda;
.ALIGN 2;
.VAR refft[N]; /* real result */
.GLOBAL refft;
.ENDSEG;
.SEGMENT/DM seg_dmrd; /* Segment Addr. = integer multiple of N */
.ALIGN 2;
.VAR cmplx1[N] = "incplx1.dat"; /* 1st half of input complex (interleaved) data */
.GLOBAL cmplx1;
.ENDSEG;
.SEGMENT/DM seg_pmda;
.ALIGN 2;
.VAR imfft[N]; /* imag result */
.GLOBAL imfft;
.ENDSEG;
.SEGMENT/DM seg_pmrd;
.ALIGN 2;
.VAR cmplx2[N] = "incplx2.dat"; /* 2nd half of input complex (interleaved) data */
.GLOBAL cmplx2;
.ENDSEG;
/*______________________reset vector test call of fft______________________*/
.SEGMENT/PM seg_rth; /* program starts at the reset vector */
Resrvd: nop;nop;nop;nop;
Reset: idle;
call cfft2;
stop: idle;
nop;
.ENDSEG;
.SEGMENT/PM seg_pmco;
/*______________________________begin FFT__________________________________*/
cfft2: f4=-1.0;
s4=1.0;
bit set MODE1 BR0 | BR8 | RND32 | CBUFEN | PEYEN; /* enable bit reverse of i0 */
/*Do bitrev and packing within first two stages*/
b0=IREDM;
l0=0;
m0=BRMODIFY;
b8=IREPM;
l8=0;
m8=BRMODIFY;
r0=refft;
r0=lshift r0 by -1;
b2=r0;
l2=N;
m1=1;
r0=imfft;
r0=lshift r0 by -1;
b10=r0;
l10=N;
m9=1;
/*Do the first two stages (actually a radix-4 FFT stage)*/
f8=dm(i0,m0), f9=pm(i8,m8);
f8=f8+f9, f9=f8-f9, f10=dm(i0,m0), f11=pm(i8,m8);
f12=f10+f11, f3=f10-f11;
f3=f3*f4, f2=f8+f12, f6=f8-f12;
LCNTR=N/4, do FSTAGE until LCE; /* do N/4 simple radix-4 butterflies */
r3<->s3;
f3=f9+f3, f7=f9-f3, f8=dm(i0,m0), f9=pm(i8,m8);
f8=f8+f9, f9=f8-f9, f10=dm(i0,m0), f11=pm(i8,m8);
f12=f10+f11, f3=f10-f11, dm(i2,m1)=f2, pm(i10,m9)=f2;
FSTAGE: f3=f3*f4, f2=f8+f12, f6=f8-f12, dm(i2,m1)=f6, pm(i10,m9)=f6;
/*middle stages loop */
bit clr MODE1 BR0 | BR8; /*finished with bitreversal*/
b2=refft;
b8=imfft;
l8=N;
b0=refft;
l0=N;
b1=twiddle;
l1=@twiddle/2;
b10=imfft;
l10=N;
b11=imfft;
l11=N;
m0=-BFLY8;
m1=-N/8;
m2=-BFLY8-2;
m11=-2;
r2=2;
r3=-BFLY8; /*initializes m0,10 - incr for butterf branches */
r5=BFLY8/2; /*counts # butterflies per a group */
r9=(-2*BFLY8)-2; /*initializes m12 - wrap around to next grp + 2 */
r10=-2*BFLY8; /*initializes m8 - incr between groups */
r13=-BFLY8-2; /*initializes m2,13 - wrap to bgn of 1st group */
r15=N/8; /*# OF GROUPS IN THIRD STAGE */
f1=dm(i1,m1); /*set pointers to tables to 1st coeff. */
LCNTR=STAGES-4, do end_stage until LCE; /*# OF STAGES TO BE HANDLED = LOG2(N)-4 */
m8=r10;
m10=r3;
m12=r9;
i0=refft+N-2;
i2=refft+N-2;
i8=imfft+N-2;
i10=imfft+N-2;
i11=imfft+N-2;
r15=r15-r2, m13=r13; /*CALCULATE # OF CORE */
/*BFLIES/GROUP IN THIS STAGE */
s1=dm(i1,m1); /* load s1 with cos and s0 with sin values */
r1=dm(i1,m1); f7=pm(i8,m8); /* Resolved illegal mixed word dual-data SIMD access */
f12=f0*f7, f6=dm(i0,m0);
f8=f1*f6, modify(i11,m10);
f11=f1*f7;
f14=f0*f6, f12=f8+f12, f8=dm(i0,m0), f7=pm(i8,m8);
f12=f0*f7, f13=f8+f12, f10=f8-f12, f6=dm(i0,m0);
/*Each iteration does another set of bttrflys in each group */
LCNTR=r5, do end_group until LCE; /*# OF BUTTERFLIES/GROUP IN THIS STAGE */
/*core butterfly loop */
LCNTR=r15, do end_bfly until LCE; /*Do a butterfly in each group - 2 */
f8=f1*f6, f14=f11-f14, dm(i2,m0)=f10, f9=pm(i11,m8);
f11=f1*f7, f3=f9+f14, f9=f9-f14, dm(i2,m0)=f13, f7=pm(i8,m8);
f14=f0*f6, f12=f8+f12, f8=dm(i0,m0), pm(i10,m10)=f9;
end_bfly:
f12=f0*f7, f13=f8+f12, f10=f8-f12, f6=dm(i0,m0), pm(i10,m10)=f3;
/*finish up last bttrfly and set up for next butterfly in each group */
f8=f1*f6, f14=f11-f14, dm(i2,m0)=f10, f9=pm(i11,m8);
f11=f1*f7, f4=f9+f14, f9=f9-f14, dm(i2,m0)=f13, f14=pm(i8,m11);
f14=f0*f6, f12=f8+f12, f8=dm(i0,m2), pm(i10,m10)=f9;
s1=dm(i1,m1);
f13=f8+f12, f10=f8-f12, r1=dm(i1,m1); f7=pm(i8,m8); /* Resolved illegal mixed word dual-data SIMD access */
f14=f11-f14, dm(i2,m0)=f10, f9=pm(i11,m12);
/*start on next butterfly in each group */
f12=f0*f7, f3=f9+f14, f9=f9-f14, f6=dm(i0,m0);
f8=f1*f6, dm(i2,m2)=f13, pm(i10,m10)=f4;
f11=f1*f7, pm(i10,m10)=f9;
f14=f0*f6, f12=f8+f12, f8=dm(i0,m0), f7=pm(i8,m8);
end_group:
f12=f0*f7, f13=f8+f12, f10=f8-f12, f6=dm(i0,m0), pm(i10,m13)=f3;
r4=r15+r2, i1=b1; /*PREPARE R4 FOR #OF BFLIES CALC */
r15=ashift r4 by -1; /*# OF BFLIES/GRP IN NEXT STAGE */
r4=-r15;
m1=r4; /*update inc for sin & cos */
r5=ashift r5 by 1, f1=dm(i1,m1); /*update # bttrfly in a grp */
r3=ashift r5 by 1;
r3=-r3; /* inc for bttrfly branch */
r13=r3-r2, m0=r3; /* wrap to 1st grp */
r10=ashift r3 by 1; /* inc between grps */
end_stage: r9=r10-r2, m2=r13; /* wrap to grp +1 */
/*_________ next to last stage__________*/
m1=-2; /*modifier to sine table pntr */
m8=r10; /*incr between groups */
m10=r3; /*incr between bttrfly branches */
m12=r9; /*wrap around to next grp + 1 */
m13=r13; /*wrap to bgn of 1st group */
i0=refft+N-2;
r0=b1;
r1=(N/2)-2;
r0=r0+r1;
i1=r0; /*pntr to 1st twiddle coeffs */
i2=refft+N-2;
i8=imfft+N-2;
i10=imfft+N-2;
i11=imfft+N-2;
s1=dm(i1,m1);
r1=dm(i1,m1); f7=pm(i8,m8);
f12=f0*f7, f6=dm(i0,m0);
f8=f1*f6, modify(i11,m10);
f11=f1*f7, f7=pm(i8,m12);
f14=f0*f6, f12=f8+f12, f8=dm(i0,m0);
f12=f0*f7, f13=f8+f12, f10=f8-f12, f6=dm(i0,m0);
/*Do the N/4 butterflies in the two groups of this stage */
LCNTR=N/8, do end_group2 until LCE;
f8=f1*f6, f14=f11-f14, dm(i2,m0)=f10, f9=pm(i11,m8);
f11=f1*f7, f3=f9+f14, f9=f9-f14, dm(i2,m0)=f13;
f14=f0*f6, f12=f8+f12, f8=dm(i0,m2), pm(i10,m10)=f9;
s1=dm(i1,m1);
f13=f8+f12, f10=f8-f12, r1=dm(i1,m1); f7=pm(i8,m8); /* Resolved illegal mixed word dual-data SIMD access */
f12=f0*f7, f14=f11-f14, f6=dm(i0,m0), f9=pm(i11,m12);
f8=f1*f6, f3=f9+f14, f9=f9-f14, dm(i2,m0)=f10, pm(i10,m10)=f3;
f11=f1*f7, dm(i2,m2)=f13, pm(i10,m10)=f9;
f14=f0*f6, f12=f8+f12, f8=dm(i0,m0), f7=pm(i8,m12);
end_group2:
f12=f0*f7, f13=f8+f12, f10=f8-f12, f6=dm(i0,m0), pm(i10,m13)=f3;
/* The last stage */
m0=-N/2;
m2=-N/2-2;
m10=m0;
m13=m2;
i0=refft+N-2;
r0=b1;
r1=(N/2)-1;
r0=r0+r1;
i1=r0; /*pntr to 1st twiddle coeffs */
i2=refft+N-2;
i8=imfft+N-2;
i10=imfft+N-2;
i11=imfft+N-2;
m1=-1; /*modifiers to coeff tables */
/*start first bttrfly */
s1=dm(i1,m1);
r1=dm(i1,m1); f7=pm(i8,m11); /* Resolved illegal mixed word dual-data SIMD access */
f12=f0*f7, f6=dm(i0,m0);
f8=f1*f6, modify(i11,m10);
f11=f1*f7;
f14=f0*f6, f12=f8+f12, f8=dm(i0,m2), f9=pm(i11,m11);
/*do N/2 bttrflys in the last stage */
LCNTR=N/4, do last_stage until LCE;
s1=dm(i1,m1);
f13=f8+f12, f10=f8-f12, r1=dm(i1,m1); f7=pm(i8,m11); /* Resolved illegal mixed word dual-data SIMD access */
f12=f0*f7, f14=f11-f14, f6=dm(i0,m0);
f8=f1*f6, f3=f9+f14, f15=f9-f14, dm(i2,m0)=f10, f9=pm(i11,m11);
f11=f1*f7, dm(i2,m2)=f13, pm(i10,m10)=f15;
last_stage:
f14=f0*f6, f12=f8+f12, f8=dm(i0,m2), pm(i10,m13)=f3;
rts (db);
bit clr mode1 PEYEN;
nop;
/*_______________________________________________________________________*/
.ENDSEG;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -