?? cxmatmul.cpp
字號:
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "_cxcore.h"
/****************************************************************************************\
* cvGEMM *
\****************************************************************************************/
icvBLAS_GEMM_32f_t icvBLAS_GEMM_32f_p = 0;
icvBLAS_GEMM_64f_t icvBLAS_GEMM_64f_p = 0;
icvBLAS_GEMM_32fc_t icvBLAS_GEMM_32fc_p = 0;
icvBLAS_GEMM_64fc_t icvBLAS_GEMM_64fc_p = 0;
static void
icvGEMM_CopyBlock( const uchar* src, int src_step,
uchar* dst, int dst_step,
CvSize size, int pix_size )
{
int j;
size.width = size.width * (pix_size / sizeof(int));
for( ; size.height--; src += src_step, dst += dst_step )
{
for( j = 0; j <= size.width - 4; j += 4 )
{
int t0 = ((const int*)src)[j];
int t1 = ((const int*)src)[j+1];
((int*)dst)[j] = t0;
((int*)dst)[j+1] = t1;
t0 = ((const int*)src)[j+2];
t1 = ((const int*)src)[j+3];
((int*)dst)[j+2] = t0;
((int*)dst)[j+3] = t1;
}
for( ; j < size.width; j++ )
((int*)dst)[j] = ((const int*)src)[j];
}
}
static void
icvGEMM_TransposeBlock( const uchar* src, int src_step,
uchar* dst, int dst_step,
CvSize size, int pix_size )
{
int i, j;
for( i = 0; i < size.width; i++, dst += dst_step, src += pix_size )
{
const uchar* _src = src;
switch( pix_size )
{
case sizeof(int):
for( j = 0; j < size.height; j++, _src += src_step )
((int*)dst)[j] = ((int*)_src)[0];
break;
case sizeof(int)*2:
for( j = 0; j < size.height*2; j += 2, _src += src_step )
{
int t0 = ((int*)_src)[0];
int t1 = ((int*)_src)[1];
((int*)dst)[j] = t0;
((int*)dst)[j+1] = t1;
}
break;
case sizeof(int)*4:
for( j = 0; j < size.height*4; j += 4, _src += src_step )
{
int t0 = ((int*)_src)[0];
int t1 = ((int*)_src)[1];
((int*)dst)[j] = t0;
((int*)dst)[j+1] = t1;
t0 = ((int*)_src)[2];
t1 = ((int*)_src)[3];
((int*)dst)[j+2] = t0;
((int*)dst)[j+3] = t1;
}
break;
default:
assert(0);
return;
}
}
}
#define ICV_DEF_GEMM_SINGLE_MUL( flavor, arrtype, worktype ) \
static CvStatus CV_STDCALL \
icvGEMMSingleMul_##flavor( const arrtype* a_data, size_t a_step, \
const arrtype* b_data, size_t b_step, \
const arrtype* c_data, size_t c_step, \
arrtype* d_data, size_t d_step, \
CvSize a_size, CvSize d_size, \
double alpha, double beta, int flags ) \
{ \
int i, j, k, n = a_size.width, m = d_size.width, drows = d_size.height; \
const arrtype *_a_data = a_data, *_b_data = b_data, *_c_data = c_data; \
arrtype* a_buf = 0; \
size_t a_step0, a_step1, c_step0, c_step1, t_step; \
\
a_step /= sizeof(a_data[0]); \
b_step /= sizeof(b_data[0]); \
c_step /= sizeof(c_data[0]); \
d_step /= sizeof(d_data[0]); \
a_step0 = a_step; \
a_step1 = 1; \
\
if( !c_data ) \
c_step0 = c_step1 = 0; \
else if( !(flags & CV_GEMM_C_T) ) \
c_step0 = c_step, c_step1 = 1; \
else \
c_step0 = 1, c_step1 = c_step; \
\
if( flags & CV_GEMM_A_T ) \
{ \
CV_SWAP( a_step0, a_step1, t_step ); \
n = a_size.height; \
if( a_step > 1 && n > 1 ) \
a_buf = (arrtype*)cvAlloc(n*sizeof(a_data[0])); \
} \
\
if( n == 1 ) /* external product */ \
{ \
arrtype* b_buf = 0; \
\
if( a_step > 1 ) \
{ \
a_buf = (arrtype*)cvAlloc(drows*sizeof(a_data[0])); \
for( k = 0; k < drows; k++ ) \
a_buf[k] = a_data[a_step*k]; \
a_data = a_buf; \
} \
\
if( b_step > 1 ) \
{ \
b_buf = (arrtype*)cvAlloc(d_size.width*sizeof(b_buf[0]) ); \
for( j = 0; j < d_size.width; j++ ) \
b_buf[j] = b_data[j*b_step]; \
b_data = b_buf; \
} \
\
for( i = 0; i < drows; i++, _c_data += c_step0, \
d_data += d_step ) \
{ \
worktype al = worktype(a_data[i])*alpha; \
c_data = _c_data; \
for( j = 0; j <= d_size.width - 2; j += 2, c_data += 2*c_step1 )\
{ \
worktype s0 = al*b_data[j]; \
worktype s1 = al*b_data[j+1]; \
if( !c_data ) \
{ \
d_data[j] = arrtype(s0); \
d_data[j+1] = arrtype(s1); \
} \
else \
{ \
d_data[j] = arrtype(s0 + c_data[0]*beta); \
d_data[j+1] = arrtype(s1 + c_data[c_step1]*beta); \
} \
} \
\
for( ; j < d_size.width; j++, c_data += c_step1 ) \
{ \
worktype s0 = al*b_data[j]; \
if( !c_data ) \
d_data[j] = arrtype(s0); \
else \
d_data[j] = arrtype(s0 + c_data[0]*beta); \
} \
} \
} \
else if( flags & CV_GEMM_B_T ) /* A * Bt */ \
{ \
for( i = 0; i < drows; i++, _a_data += a_step0, \
_c_data += c_step0, \
d_data += d_step ) \
{ \
a_data = _a_data; \
b_data = _b_data; \
c_data = _c_data; \
\
if( a_buf ) \
{ \
for( k = 0; k < n; k++ ) \
a_buf[k] = a_data[a_step1*k]; \
a_data = a_buf; \
} \
\
for( j = 0; j < d_size.width; j++, b_data += b_step, \
c_data += c_step1 ) \
{ \
worktype s0(0), s1(0), s2(0), s3(0); \
\
for( k = 0; k <= n - 4; k += 4 ) \
{ \
s0 += worktype(a_data[k])*b_data[k]; \
s1 += worktype(a_data[k+1])*b_data[k+1]; \
s2 += worktype(a_data[k+2])*b_data[k+2]; \
s3 += worktype(a_data[k+3])*b_data[k+3]; \
} \
\
for( ; k < n; k++ ) \
s0 += worktype(a_data[k])*b_data[k]; \
s0 = (s0+s1+s2+s3)*alpha; \
\
if( !c_data ) \
d_data[j] = arrtype(s0); \
else \
d_data[j] = arrtype(s0 + c_data[0]*beta); \
} \
} \
} \
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -