?? dotprod.asm
字號:
* ========================================================================= *
* TEXAS INSTRUMENTS, INC. *
* *
* NAME *
* dotprod -- dotprod *
* *
* *
* REVISION DATE *
* 24-May-2005 *
* *
* USAGE *
* *
* This routine is C callable, and has the following C prototype: *
* *
* int dotprod *
* ( *
* const short *m, // Pointer to first vector // *
* const short *n, // Pointer to second vector // *
* int count // Length of vectors. // *
* ); *
* *
* This routine returns the dot product as its return value. *
* *
* *
* DESCRIPTION *
* *
* The "dotprod" function implements a dot product of two input *
* vectors, returning the scalar result. Each element of the *
* first array is multiplied with the corresponding element of the *
* second array, and the products are summed. The sum is returned. *
* *
* int dotprod *
* ( *
* const short *m, // Pointer to first vector // *
* const short *n, // Pointer to second vector // *
* int count // Length of vectors. // *
* ) *
* { *
* int i, sum = 0; *
* *
* for (i = 0; i < count; i++) *
* sum += m[i] * n[i]; *
* *
* return sum; *
* } *
* *
* The above C code is a general implementation without *
* restrictions. The assembly code has some restrictions, as *
* noted below. *
* *
* *
* TECHNIQUES *
* *
* The code is unrolled 4 times to enable full memory and multiplier *
* bandwidth to be utilized. *
* *
* One cycle for a XP stall exists to add A and B side sums. *
* *
* *
* ASSUMPTIONS *
* *
* The input length is a multiple of 4 and greater than 0. *
* *
* The input data and coeeficients are stored on double word *
* aligned boundaries. *
* *
* *
* MEMORY NOTE *
* *
* To avoid bank conflicts, The input arrays 'm' and 'n' must *
* be offset by 4 half-words (8 bytes). *
* *
* The code is ENDIAN NEUTRAL. *
* *
* *
* CYCLES *
* *
* cycles = count/4 + 14 *
* For count = 256, cycles = 78 *
* *
* CODESIZE *
* *
* 64 bytes *
* *
* ------------------------------------------------------------------------- *
* Copyright (c) 2005 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
* ======================================================================== *
* ======================================================================== *
**********************= SYMBOLIC REGISTER ASSIGNMENTS ************************
.asg A4, A_m ; pointer to vector m
.asg B4, B_n ; pointer to vector n
.asg B21, B_count ; number of elements in each vector
.asg A19, A_sum ; partial sum a
.asg A18, A_prod ; sum of products a[i]*b[i]+a[i+1]*b[i+1]
.asg B19, B_sum ; partial sum b
.asg B18, B_prod ; product sum a[i+2]*b[i+2]+a[i+3]*b[i+3]
.asg A17, A_reg1 ; elements a[i+3] a[i+2]
.asg A16, A_reg0 ; elements a[i+1] a[i]
.asg B17, B_reg1 ; elements b[i+3] b[i+2]
.asg B16, B_reg0 ; elements b[i+1] b[i]
.asg A4 , A_sumt ; total sum a + b returned to caller
.text .global _dotprod_dotprod:
* ======================================================================== *
* ======================================================================== *
SHR .S2X A6, 2, B_count ; n/4
SUB .L2 B_count, 4, B_count ; n/4-4
SPLOOPD 1
|| MVC .S2 B_count, ILC
|| ZERO .L1 A_sum ;
|| ZERO .L2 B_sum ; sum's = 0
*----------------------------------------------------------------------------*
LDDW .D2T2 *B_n++, B_reg1:B_reg0 ; load b[i+3,i]
|| LDDW .D1T1 *A_m++, A_reg1:A_reg0 ; load a[i+3,i]
NOP 4
DOTP2 .M2X A_reg0, B_reg0, B_prod ; a[0]*b[0]+a[1]*b[1]
|| DOTP2 .M1X A_reg1, B_reg1, A_prod ; a[2]*b[2]+a[3]*b[3]
NOP 3
SPKERNEL 4, 0
|| ADD .L2 B_sum, B_prod, B_sum ; sum += productb
|| ADD .L1 A_sum, A_prod, A_sum ; sum += producta
*----------------------------------------------------------------------------*
BNOP .S2 B3, 4
*---- Epilogue complete -----------------------------------------------------*
ADD .L1X A_sum, B_sum, A_sumt ; final sum
*---- Branch occurs ---------------------------------------------------------*
.end
* ======================================================================== *
* End of file: dotprod.asm *
* ------------------------------------------------------------------------ *
* Copyright (C) 2005 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ======================================================================== *
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -