?? complex_vector_multiply.asm
字號:
/**************************************************************
File Name: Complex_Vector_Multiply.asm
Date Modified: 02/17/99 RFG original version
05/01/00 RFG modified format
07/13/00 RFG verified on silicon
Description:
Subroutine that implements a Complex Vector Multipy
given two complex input vectors.
Equation: Z(n) = X(n)*Y(n)
Z = (Xr + jXi) * (Yr + jYi) = (Xr*Yr - Xi*Yi) + j(Xr*Yi + Xi*Yr)
Calling Parameters:
b0,i0 = address of x input in long-word space
b1,i1 = address of x input in long-word space + 1
b8,i8 = address of y input in long-word space + 1
b9,i9 = address of y input in long-word space
b2,i2 = address of output in long-word space
r2 = number of samples/2 + 1
l0,l1,l2,l8,l9 = 0
m0,m8 = 2
m1 = 1
Assumptions:
All arrays must start on even address boundaries.
All arrays must have an even number 32-bit word length (zero pad if necessary).
X input is stored in Block 1 interleaved as real(0), imag(0), real(1), imag(1).
Y input is stored in Block 0 interleaved as real(0), imag(0), real(1), imag(1).
Ouput is stored in Block 1 interleaved as real(0), imag(0), real(1), imag(1).
Output must be N + 4 locations, first four locations are for two dummy writes for optimization.
Instructions are stored in Block 0.
Return Values:
b2 points to the output array
Registers Affected:
ustat1,ustat2
f0,s0,f1,s1,f4,s4,f5,s5,f8,s8,f10,s10,f12,s12,f13,s13
i0,i1,i2,i8,i9
Cycle Count:
15 + (samples*5/2) + 2 cache misses
Memory Usage:
Instructions Words (48-bits):
10 instruction words
Data Words (16, 32, 40, or 64-bits):
2 * Number of samples locations for the x input buffer (32-bits)
2 * Number of samples locations for the y input buffer (32-bits)
2 * Number of samples + 4 locations for the output buffer (32-bits)
**************************************************************/
#include "def21160.h" /* Symbol Definition File */
.global cx_vec_mult;
/* program memory code */
.section/pm seg_pmco;
cx_vec_mult:
bit set MODE1 CBUFEN | PEYEN | BDCST1 | BDCST9; /* Circular Buffer, SIMD enabled, Broadcast I1 & I9 enabled */
lcntr=r2, do macs until lce;
f13=f1*f4, f12=f8-f12, f0=dm(i1,m0), f4=pm(i9,m8); /* F13/S13 = Xi * Yr, F12/S12 = Xr*Yr - Xi*Yi, F0/S0 = Xr(odd), F1/S1 = Xi(odd) , F4/S4 = Yr(even), F5/S5 = Yi(even) */
f0=dm(i0,m0), f4=pm(i8,m8); /* F0 = Xr(even), F1 = Xi(even), S4 = Yr(odd), S5 = Yi(odd) */
f8=f0*f4, f13=f10+f13; /* F8/S8 = Xr * Yr, F13/S13 = Xr*Yi + Xi*Yr */
f10=f0*f5, dm(i2,m1)=f12; /* F10/S10 = Xr * Yi, store results of even samples */
macs: f12=f1*f5, dm(i2,m1)=s12; /* F12/S12 = Xi * Yi, store results of odd samples */
rts (db);
bit clr MODE1 CBUFEN | PEYEN | BDCST1 | BDCST9; /* Circular Buffer, SIMD disabled, Broadcast I1 & I9 disabled */
nop;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -