?? cxmatmul.cpp
字號:
\
for( ; d_size.height--; _c_data += c_step0, \
d_buf += d_buf_step, \
d_data += d_step ) \
{ \
if( _c_data ) \
{ \
c_data = _c_data; \
for( j = 0; j <= d_size.width - 4; j += 4, c_data += 4*c_step1 )\
{ \
worktype t0 = alpha*d_buf[j]; \
worktype t1 = alpha*d_buf[j+1]; \
t0 += beta*worktype(c_data[0]); \
t1 += beta*worktype(c_data[c_step1]); \
d_data[j] = arrtype(t0); \
d_data[j+1] = arrtype(t1); \
t0 = alpha*d_buf[j+2]; \
t1 = alpha*d_buf[j+3]; \
t0 += beta*worktype(c_data[c_step1*2]); \
t1 += beta*worktype(c_data[c_step1*3]); \
d_data[j+2] = arrtype(t0); \
d_data[j+3] = arrtype(t1); \
} \
for( ; j < d_size.width; j++, c_data += c_step1 ) \
{ \
worktype t0 = alpha*d_buf[j]; \
d_data[j] = arrtype(t0 + beta*c_data[0]); \
} \
} \
else \
{ \
for( j = 0; j <= d_size.width - 4; j += 4 ) \
{ \
worktype t0 = alpha*d_buf[j]; \
worktype t1 = alpha*d_buf[j+1]; \
d_data[j] = arrtype(t0); \
d_data[j+1] = arrtype(t1); \
t0 = alpha*d_buf[j+2]; \
t1 = alpha*d_buf[j+3]; \
d_data[j+2] = arrtype(t0); \
d_data[j+3] = arrtype(t1); \
} \
for( ; j < d_size.width; j++ ) \
d_data[j] = arrtype(alpha*d_buf[j]); \
} \
} \
return CV_OK; \
}
ICV_DEF_GEMM_SINGLE_MUL( 32f_C1R, float, double)
ICV_DEF_GEMM_BLOCK_MUL( 32f_C1R, float, double)
ICV_DEF_GEMM_STORE( 32f_C1R, float, double)
ICV_DEF_GEMM_SINGLE_MUL( 64f_C1R, double, double)
ICV_DEF_GEMM_BLOCK_MUL( 64f_C1R, double, double)
ICV_DEF_GEMM_STORE( 64f_C1R, double, double)
ICV_DEF_GEMM_SINGLE_MUL( 32f_C2R, CvComplex32f, CvComplex64f)
ICV_DEF_GEMM_BLOCK_MUL( 32f_C2R, CvComplex32f, CvComplex64f)
ICV_DEF_GEMM_STORE( 32f_C2R, CvComplex32f, CvComplex64f)
ICV_DEF_GEMM_SINGLE_MUL( 64f_C2R, CvComplex64f, CvComplex64f)
ICV_DEF_GEMM_BLOCK_MUL( 64f_C2R, CvComplex64f, CvComplex64f)
ICV_DEF_GEMM_STORE( 64f_C2R, CvComplex64f, CvComplex64f)
typedef CvStatus (CV_STDCALL *CvGEMMSingleMulFunc)( const void* src1, size_t step1,
const void* src2, size_t step2, const void* src3, size_t step3,
void* dst, size_t dststep, CvSize srcsize, CvSize dstsize,
double alpha, double beta, int flags );
typedef CvStatus (CV_STDCALL *CvGEMMBlockMulFunc)( const void* src1, size_t step1,
const void* src2, size_t step2, void* dst, size_t dststep,
CvSize srcsize, CvSize dstsize, int flags );
typedef CvStatus (CV_STDCALL *CvGEMMStoreFunc)( const void* src1, size_t step1,
const void* src2, size_t step2, void* dst, size_t dststep,
CvSize dstsize, double alpha, double beta, int flags );
static void icvInitGEMMTable( CvBigFuncTable* single_mul_tab,
CvBigFuncTable* block_mul_tab,
CvBigFuncTable* store_tab )
{
single_mul_tab->fn_2d[CV_32FC1] = (void*)icvGEMMSingleMul_32f_C1R;
single_mul_tab->fn_2d[CV_64FC1] = (void*)icvGEMMSingleMul_64f_C1R;
single_mul_tab->fn_2d[CV_32FC2] = (void*)icvGEMMSingleMul_32f_C2R;
single_mul_tab->fn_2d[CV_64FC2] = (void*)icvGEMMSingleMul_64f_C2R;
block_mul_tab->fn_2d[CV_32FC1] = (void*)icvGEMMBlockMul_32f_C1R;
block_mul_tab->fn_2d[CV_64FC1] = (void*)icvGEMMBlockMul_64f_C1R;
block_mul_tab->fn_2d[CV_32FC2] = (void*)icvGEMMBlockMul_32f_C2R;
block_mul_tab->fn_2d[CV_64FC2] = (void*)icvGEMMBlockMul_64f_C2R;
store_tab->fn_2d[CV_32FC1] = (void*)icvGEMMStore_32f_C1R;
store_tab->fn_2d[CV_64FC1] = (void*)icvGEMMStore_64f_C1R;
store_tab->fn_2d[CV_32FC2] = (void*)icvGEMMStore_32f_C2R;
store_tab->fn_2d[CV_64FC2] = (void*)icvGEMMStore_64f_C2R;
}
CV_IMPL void
cvGEMM( const CvArr* Aarr, const CvArr* Barr, double alpha,
const CvArr* Carr, double beta, CvArr* Darr, int flags )
{
const int block_lin_size = 128;
const int block_size = block_lin_size * block_lin_size;
static CvBigFuncTable single_mul_tab, block_mul_tab, store_tab;
static int inittab = 0;
static double zero[] = {0,0,0,0};
static float zerof[] = {0,0,0,0};
uchar* buffer = 0;
int local_alloc = 0;
uchar* block_buffer = 0;
CV_FUNCNAME( "cvGEMM" );
__BEGIN__;
CvMat *A = (CvMat*)Aarr;
CvMat *B = (CvMat*)Barr;
CvMat *C = (CvMat*)Carr;
CvMat *D = (CvMat*)Darr;
int len = 0;
CvMat stub, stub1, stub2, stub3;
CvSize a_size, d_size;
int type;
if( !CV_IS_MAT( A ))
{
int coi = 0;
CV_CALL( A = cvGetMat( A, &stub1, &coi ));
if( coi != 0 )
CV_ERROR( CV_BadCOI, "" );
}
if( !CV_IS_MAT( B ))
{
int coi = 0;
CV_CALL( B = cvGetMat( B, &stub2, &coi ));
if( coi != 0 )
CV_ERROR( CV_BadCOI, "" );
}
if( !CV_IS_MAT( D ))
{
int coi = 0;
CV_CALL( D = cvGetMat( D, &stub, &coi ));
if( coi != 0 )
CV_ERROR( CV_BadCOI, "" );
}
if( beta == 0 )
C = 0;
if( C )
{
if( !CV_IS_MAT( C ))
{
int coi = 0;
CV_CALL( C = cvGetMat( C, &stub3, &coi ));
if( coi != 0 )
CV_ERROR( CV_BadCOI, "" );
}
if( !CV_ARE_TYPES_EQ( C, D ))
CV_ERROR( CV_StsUnmatchedFormats, "" );
if( (flags&CV_GEMM_C_T) == 0 && (C->cols != D->cols || C->rows != D->rows) ||
(flags&CV_GEMM_C_T) != 0 && (C->rows != D->cols || C->cols != D->rows))
CV_ERROR( CV_StsUnmatchedSizes, "" );
if( (flags & CV_GEMM_C_T) != 0 && C->data.ptr == D->data.ptr )
{
cvTranspose( C, D );
C = D;
flags &= ~CV_GEMM_C_T;
}
}
else
{
C = &stub3;
C->data.ptr = 0;
C->step = 0;
C->type = CV_MAT_CONT_FLAG;
}
type = CV_MAT_TYPE(A->type);
if( !CV_ARE_TYPES_EQ( A, B ) || !CV_ARE_TYPES_EQ( A, D ) )
CV_ERROR( CV_StsUnmatchedFormats, "" );
a_size.width = A->cols;
a_size.height = A->rows;
d_size.width = D->cols;
d_size.height = D->rows;
switch( flags & (CV_GEMM_A_T|CV_GEMM_B_T) )
{
case 0:
len = B->rows;
if( a_size.width != len ||
B->cols != d_size.width ||
a_size.height != d_size.height )
CV_ERROR( CV_StsUnmatchedSizes, "" );
break;
case 1:
len = B->rows;
if( a_size.height != len ||
B->cols != d_size.width ||
a_size.width != d_size.height )
CV_ERROR( CV_StsUnmatchedSizes, "" );
break;
case 2:
len = B->cols;
if( a_size.width != len ||
B->rows != d_size.width ||
a_size.height != d_size.height )
CV_ERROR( CV_StsUnmatchedSizes, "" );
break;
case 3:
len = B->cols;
if( a_size.height != len ||
B->rows != d_size.width ||
a_size.width != d_size.height )
CV_ERROR( CV_StsUnmatchedSizes, "" );
break;
}
if( flags == 0 && 2 <= len && len <= 4 && (len == d_size.width || len == d_size.height) )
{
int i;
if( type == CV_64F )
{
double* d = D->data.db;
const double *a = A->data.db, *b = B->data.db, *c = C->data.db;
size_t d_step = D->step/sizeof(d[0]),
a_step = A->step/sizeof(a[0]),
b_step = B->step/sizeof(b[0]),
c_step = C->step/sizeof(c[0]);
if( !c )
c = zero;
switch( len )
{
case 2:
if( len == d_size.width && b != d )
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -