?? programcu.cu
字號(hào):
float off = 0.5f * FDIV(next - pre, weight + weight - next - pre);
key.w = radius_per_ten_degrees * (index_max + 0.5f + off);
d_list[idx] = key;
}else
{
float max_vote = vote[0];
#pragma unroll
for(int i = 1; i < 36; ++i) max_vote = max(max_vote, vote[i]);
float vote_threshold = max_vote * 0.8f;
float pre = vote[35];
float max_rot[2], max_vot[2] = {0, 0};
int ocount = 0;
#pragma unroll
for(int i =0; i < 36; ++i)
{
float next = vote[i + 1];
if(vote[i] > vote_threshold && vote[i] > pre && vote[i] > next)
{
float di = 0.5f * FDIV(next - pre, vote[i] + vote[i] - next - pre);
float rot = i + di + 0.5f;
float weight = vote[i];
///
if(weight > max_vot[1])
{
if(weight > max_vot[0])
{
max_vot[1] = max_vot[0];
max_rot[1] = max_rot[0];
max_vot[0] = weight;
max_rot[0] = rot;
}
else
{
max_vot[1] = weight;
max_rot[1] = rot;
}
ocount ++;
}
}
pre = vote[i];
}
float fr1 = max_rot[0] / 36.0f;
if(fr1 < 0) fr1 += 1.0f;
unsigned short us1 = ocount == 0? 65535 : ((unsigned short )floor(fr1 * 65535.0f));
unsigned short us2 = 65535;
if(ocount > 1)
{
float fr2 = max_rot[1] / 36.0f;
if(fr2 < 0) fr2 += 1.0f;
us2 = (unsigned short ) floor(fr2 * 65535.0f);
}
unsigned int uspack = (us2 << 16) | us1;
key.w = __int_as_float(uspack);
d_list[idx] = key;
}
}
void ProgramCU::ComputeOrientation(CuTexImage* list, CuTexImage* got, CuTexImage*key,
float sigma, float sigma_step, int existing_keypoint)
{
int len = list->GetImgWidth();
if(len <= 0) return;
int width = got->GetImgWidth(), height = got->GetImgHeight();
if(existing_keypoint)
{
list->BindTexture(texDataF4);
}else
{
list->BindTexture(texDataList);
if(GlobalUtil::_SubpixelLocalization) key->BindTexture(texDataF4);
}
got->BindTexture2D(texDataF2);
const int block_width = 64;
dim3 grid((len + block_width -1) / block_width);
dim3 block(block_width);
ComputeOrientation_Kernel<<<grid, block>>>((float4*) list->_cuData,
len, width, height, sigma, sigma_step,
GlobalUtil::_OrientationGaussianFactor,
GlobalUtil::_OrientationGaussianFactor * GlobalUtil::_OrientationWindowFactor,
GlobalUtil::_FixedOrientation? 0 : GlobalUtil::_MaxOrientation,
existing_keypoint, GlobalUtil::_SubpixelLocalization, GlobalUtil::_KeepExtremumSign);
ProgramCU::CheckErrorCUDA("ComputeOrientation");
}
template <bool DYNAMIC_INDEXING> void __global__ ComputeDescriptor_Kernel(float4* d_des, int num,
int width, int height, float window_factor)
{
const float rpi = 4.0/ 3.14159265358979323846;
int idx = IMUL(blockIdx.x, blockDim.x) + threadIdx.x;
int fidx = idx >> 4;
if(fidx >= num) return;
float4 key = tex1Dfetch(texDataF4, fidx);
int bidx = idx& 0xf, ix = bidx & 0x3, iy = bidx >> 2;
float spt = fabs(key.z * window_factor);
float s, c; __sincosf(key.w, &s, &c);
float anglef = key.w > 3.14159265358979323846? key.w - (2.0 * 3.14159265358979323846) : key.w ;
float cspt = c * spt, sspt = s * spt;
float crspt = c / spt, srspt = s / spt;
float2 offsetpt, pt;
float xmin, ymin, xmax, ymax, bsz;
offsetpt.x = ix - 1.5f;
offsetpt.y = iy - 1.5f;
pt.x = cspt * offsetpt.x - sspt * offsetpt.y + key.x;
pt.y = cspt * offsetpt.y + sspt * offsetpt.x + key.y;
bsz = fabs(cspt) + fabs(sspt);
xmin = max(1.5f, floor(pt.x - bsz) + 0.5f);
ymin = max(1.5f, floor(pt.y - bsz) + 0.5f);
xmax = min(width - 1.5f, floor(pt.x + bsz) + 0.5f);
ymax = min(height - 1.5f, floor(pt.y + bsz) + 0.5f);
float des[9];
#pragma unroll
for(int i =0; i < 9; ++i) des[i] = 0.0f;
for(float y = ymin; y <= ymax; y += 1.0f)
{
for(float x = xmin; x <= xmax; x += 1.0f)
{
float dx = x - pt.x;
float dy = y - pt.y;
float nx = crspt * dx + srspt * dy;
float ny = crspt * dy - srspt * dx;
float nxn = fabs(nx);
float nyn = fabs(ny);
if(nxn < 1.0f && nyn < 1.0f)
{
float2 cc = tex2D(texDataF2, x, y);
float dnx = nx + offsetpt.x;
float dny = ny + offsetpt.y;
float ww = exp(-0.125f * (dnx * dnx + dny * dny));
float wx = 1.0 - nxn;
float wy = 1.0 - nyn;
float weight = ww * wx * wy * cc.x;
float theta = (anglef - cc.y) * rpi;
if(theta < 0) theta += 8.0f;
float fo = floor(theta);
int fidx = fo;
float weight1 = fo + 1.0f - theta;
float weight2 = theta - fo;
if(DYNAMIC_INDEXING)
{
des[fidx] += (weight1 * weight);
des[fidx + 1] += (weight2 * weight);
//this dynamic indexing part might be slow
}else
{
#pragma unroll
for(int k = 0; k < 8; ++k)
{
if(k == fidx)
{
des[k] += (weight1 * weight);
des[k+1] += (weight2 * weight);
}
}
}
}
}
}
des[0] += des[8];
int didx = idx << 1;
d_des[didx] = make_float4(des[0], des[1], des[2], des[3]);
d_des[didx+1] = make_float4(des[4], des[5], des[6], des[7]);
}
void __global__ NormalizeDescriptor_Kernel(float4* d_des, int num)
{
float4 temp[32];
int idx = IMUL(blockIdx.x, blockDim.x) + threadIdx.x;
if(idx >= num) return;
int sidx = idx << 5;
float norm1 = 0, norm2 = 0;
#pragma unroll
for(int i = 0; i < 32; ++i)
{
temp[i] = tex1Dfetch(texDataF4, sidx +i);
norm1 += (temp[i].x * temp[i].x + temp[i].y * temp[i].y +
temp[i].z * temp[i].z + temp[i].w * temp[i].w);
}
norm1 = rsqrt(norm1);
#pragma unroll
for(int i = 0; i < 32; ++i)
{
temp[i].x = min(0.2f, temp[i].x * norm1);
temp[i].y = min(0.2f, temp[i].y * norm1);
temp[i].z = min(0.2f, temp[i].z * norm1);
temp[i].w = min(0.2f, temp[i].w * norm1);
norm2 += (temp[i].x * temp[i].x + temp[i].y * temp[i].y +
temp[i].z * temp[i].z + temp[i].w * temp[i].w);
}
norm2 = rsqrt(norm2);
#pragma unroll
for(int i = 0; i < 32; ++i)
{
temp[i].x *= norm2; temp[i].y *= norm2;
temp[i].z *= norm2; temp[i].w *= norm2;
d_des[sidx + i] = temp[i];
}
}
void ProgramCU::ComputeDescriptor(CuTexImage*list, CuTexImage* got, CuTexImage* dtex)
{
int num = list->GetImgWidth();
int width = got->GetImgWidth();
int height = got->GetImgHeight();
dtex->InitTexture(num*128, 1, 1);
got->BindTexture2D(texDataF2);
list->BindTexture(texDataF4);
int block_width = 64;
dim3 grid((num * 16 + block_width -1) / block_width);
dim3 block(block_width);
if(GlobalUtil::_UseDynamicIndexing)
ComputeDescriptor_Kernel<true><<<grid, block>>>((float4*) dtex->_cuData, num, width, height, GlobalUtil::_DescriptorWindowFactor);
else
ComputeDescriptor_Kernel<false><<<grid, block>>>((float4*) dtex->_cuData, num, width, height, GlobalUtil::_DescriptorWindowFactor);
if(GlobalUtil::_NormalizedSIFT)
{
dtex->BindTexture(texDataF4);
const int block_width = 32;
dim3 grid((num + block_width -1) / block_width);
dim3 block(block_width);
NormalizeDescriptor_Kernel<<<grid, block>>>((float4*) dtex->_cuData, num);
}
CheckErrorCUDA("ComputeDescriptor");
}
//////////////////////////////////////////////////////
void ProgramCU::FinishCUDA()
{
cudaThreadSynchronize();
}
void ProgramCU::CheckErrorCUDA(const char* location)
{
cudaError_t e = cudaGetLastError();
if(e)
{
if(location) std::cerr << location << ":\t";
std::cerr << cudaGetErrorString(e) << endl;
}
}
void __global__ ConvertDOG_Kernel(float* d_result, int width, int height)
{
int row = (blockIdx.y << BLOCK_LOG_DIM) + threadIdx.y;
int col = (blockIdx.x << BLOCK_LOG_DIM) + threadIdx.x;
if(col < width && row < height)
{
int index = row * width + col;
float v = tex1Dfetch(texData, index);
d_result[index] = (col == 0 || row == 0 || col == width -1 || row == height -1)?
0.5 : saturate(0.5+20.0*v);
}
}
///
void ProgramCU::DisplayConvertDOG(CuTexImage* dog, CuTexImage* out)
{
if(out->_cuData == NULL) return;
int width = dog->GetImgWidth(), height = dog ->GetImgHeight();
dog->BindTexture(texData);
dim3 grid((width + BLOCK_DIM - 1)/ BLOCK_DIM, (height + BLOCK_DIM - 1)/BLOCK_DIM);
dim3 block(BLOCK_DIM, BLOCK_DIM);
ConvertDOG_Kernel<<<grid, block>>>((float*) out->_cuData, width, height);
ProgramCU::CheckErrorCUDA("DisplayConvertDOG");
}
void __global__ ConvertGRD_Kernel(float* d_result, int width, int height)
{
int row = (blockIdx.y << BLOCK_LOG_DIM) + threadIdx.y;
int col = (blockIdx.x << BLOCK_LOG_DIM) + threadIdx.x;
if(col < width && row < height)
{
int index = row * width + col;
float v = tex1Dfetch(texData, index << 1);
d_result[index] = (col == 0 || row == 0 || col == width -1 || row == height -1)?
0 : saturate(5 * v);
}
}
void ProgramCU::DisplayConvertGRD(CuTexImage* got, CuTexImage* out)
{
if(out->_cuData == NULL) return;
int width = got->GetImgWidth(), height = got ->GetImgHeight();
got->BindTexture(texData);
dim3 grid((width + BLOCK_DIM - 1)/ BLOCK_DIM, (height + BLOCK_DIM - 1)/BLOCK_DIM);
dim3 block(BLOCK_DIM, BLOCK_DIM);
ConvertGRD_Kernel<<<grid, block>>>((float*) out->_cuData, width, height);
ProgramCU::CheckErrorCUDA("DisplayConvertGRD");
}
void __global__ ConvertKEY_Kernel(float4* d_result, int width, int height)
{
int row = (blockIdx.y << BLOCK_LOG_DIM) + threadIdx.y;
int col = (blockIdx.x << BLOCK_LOG_DIM) + threadIdx.x;
if(col < width && row < height)
{
int index = row * width + col;
float4 keyv = tex1Dfetch(texDataF4, index);
int is_key = (keyv.x == 1.0f || keyv.x == -1.0f);
int inside = col > 0 && row > 0 && row < height -1 && col < width - 1;
float v = inside? saturate(0.5 + 20 * tex1Dfetch(texData, index)) : 0.5;
d_result[index] = is_key && inside ?
(keyv.x > 0? make_float4(1.0f, 0, 0, 1.0f) : make_float4(0.0f, 1.0f, 0.0f, 1.0f)):
make_float4(v, v, v, 1.0f) ;
}
}
void ProgramCU::DisplayConvertKEY(CuTexImage* key, CuTexImage* dog, CuTexImage* out)
{
if(out->_cuData == NULL) return;
int width = key->GetImgWidth(), height = key ->GetImgHeight();
dog->BindTexture(texData);
key->BindTexture(texDataF4);
dim3 grid((width + BLOCK_DIM - 1)/ BLOCK_DIM, (height + BLOCK_DIM - 1)/BLOCK_DIM);
dim3 block(BLOCK_DIM, BLOCK_DIM);
ConvertKEY_Kernel<<<grid, block>>>((float4*) out->_cuData, width, height);
}
void __global__ DisplayKeyPoint_Kernel(float4 * d_result, int num)
{
int idx = IMUL(blockIdx.x, blockDim.x) + threadIdx.x;
if(idx >= num) return;
float4 v = tex1Dfetch(texDataF4, idx);
d_result[idx] = make_float4(v.x, v.y, 0, 1.0f);
}
void ProgramCU::DisplayKeyPoint(CuTexImage* ftex, CuTexImage* out)
{
int num = ftex->GetImgWidth();
int block_width = 64;
dim3 grid((num + block_width -1) /block_width);
dim3 block(block_width);
ftex->BindTexture(texDataF4);
DisplayKeyPoint_Kernel<<<grid, block>>>((float4*) out->_cuData, num);
ProgramCU::CheckErrorCUDA("DisplayKeyPoint");
}
void __global__ DisplayKeyBox_Kernel(float4* d_result, int num)
{
int idx = IMUL(blockIdx.x, blockDim.x) + threadIdx.x;
if(idx >= num) return;
int kidx = idx / 10, vidx = idx - IMUL(kidx , 10);
float4 v = tex1Dfetch(texDataF4, kidx);
float sz = fabs(v.z * 3.0f);
///////////////////////
float s, c; __sincosf(v.w, &s, &c);
///////////////////////
float dx = vidx == 0? 0 : ((vidx <= 4 || vidx >= 9)? sz : -sz);
float dy = vidx <= 1? 0 : ((vidx <= 2 || vidx >= 7)? -sz : sz);
float4 pos;
pos.x = v.x + c * dx - s * dy;
pos.y = v.y + c * dy + s * dx;
pos.z = 0; pos.w = 1.0f;
d_result[idx] = pos;
}
void ProgramCU::DisplayKeyBox(CuTexImage* ftex, CuTexImage* out)
{
int len = ftex->GetImgWidth();
int block_width = 32;
dim3 grid((len * 10 + block_width -1) / block_width);
dim3 block(block_width);
ftex->BindTexture(texDataF4);
DisplayKeyBox_Kernel<<<grid, block>>>((float4*) out->_cuData, len * 10);
}
///////////////////////////////////////////////////////////////////
inline void CuTexImage:: BindTexture(textureReference& texRef)
{
cudaBindTexture(NULL, &texRef, _cuData, &texRef.channelDesc, _numBytes);
}
inline void CuTexImage::BindTexture2D(textureReference& texRef)
{
cudaChannelFormatDesc desc;
cudaGetChannelDesc(&desc, _cuData2D);
cudaBindTextureToArray(&texRef, _cuData2D, &desc);
}
int ProgramCU::IsCudaSupported()
{
int deviceCount;
cudaGetDeviceCount(&deviceCount);
return deviceCount;
}
////////////////////////////////////////////////////////////////////////////////////////
// siftmatch funtions
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -