?? marsgpuemit.cu
字號:
/**
*This is the source code for Mars, a MapReduce framework on graphics
*processors.
*Author: Wenbin Fang (HKUST), Bingsheng He (HKUST)
*Mentor: Naga K. Govindaraju (Microsoft Corp.), Qiong Luo (HKUST), Tuyong
*Wang (Sina.com).
*If you have any question on the code, please contact us at {saven,
*wenbin, luo}@cse.ust.hk.
*The copyright is held by HKUST. Mars is provided "as is" without any
*guarantees of any kind.
*/
//---------------------------------------------------------------------------------------------------------------------
//float type
//----------------------------------------------------------------------------------------------------------------------
__device__ void gpuEmitKeyFloat(float key,
size_t keySize,
size_t* psKeySizes,
size_t* psCounts,
int2* keyValOffsets,
char* interKeys,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
float *pKeySet = (float*)(interKeys + psKeySizes[index] + keyValOffsets[index].x);
pKeySet[0] = key;
keyValOffsets[index].x += keySize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].x =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].x +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].y);
}
interOffsetSizes[psCounts[index] + curIndex[index]].y = keySize;
}
__device__ void gpuEmitValFloat(float val,
size_t valSize,
size_t* psValSizes,
size_t* psCounts,
int2* keyValOffsets,
char* interVals,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
float *pValSet = (float*)(interVals + psValSizes[index] + keyValOffsets[index].y);
pValSet[0] = val;
keyValOffsets[index].y += valSize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].z =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].z +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].w);
}
interOffsetSizes[psCounts[index] + curIndex[index]].w = valSize;
}
//---------------------------------------------------------------------------------------------------------------------
//float2 type
//----------------------------------------------------------------------------------------------------------------------
__device__ void gpuEmitKeyFloat2(float2 key,
size_t keySize,
size_t* psKeySizes,
size_t* psCounts,
int2* keyValOffsets,
char* interKeys,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
float2 *pKeySet = (float2*)(interKeys + psKeySizes[index] + keyValOffsets[index].x);
pKeySet[0] = key;
keyValOffsets[index].x += keySize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].x =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].x +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].y);
}
interOffsetSizes[psCounts[index] + curIndex[index]].y = keySize;
}
__device__ void gpuEmitValFloat2(float2 val,
size_t valSize,
size_t* psValSizes,
size_t* psCounts,
int2* keyValOffsets,
char* interVals,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
float2 *pValSet = (float2*)(interVals + psValSizes[index] + keyValOffsets[index].y);
pValSet[0] = val;
keyValOffsets[index].y += valSize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].z =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].z +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].w);
}
interOffsetSizes[psCounts[index] + curIndex[index]].w = valSize;
}
//---------------------------------------------------------------------------------------------------------------------
//int type
//----------------------------------------------------------------------------------------------------------------------
__device__ void gpuEmitKeyInt(int key,
size_t keySize,
size_t* psKeySizes,
size_t* psCounts,
int2* keyValOffsets,
char* interKeys,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
int *pKeySet = (int*)(interKeys + psKeySizes[index] + keyValOffsets[index].x);
pKeySet[0] = key;
keyValOffsets[index].x += keySize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].x =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].x +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].y);
}
interOffsetSizes[psCounts[index] + curIndex[index]].y = keySize;
}
__device__ void gpuEmitValInt(int val,
size_t valSize,
size_t* psValSizes,
size_t* psCounts,
int2* keyValOffsets,
char* interVals,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
int *pValSet = (int*)(interVals + psValSizes[index] + keyValOffsets[index].y);
pValSet[0] = val;
keyValOffsets[index].y += valSize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].z =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].z +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].w);
}
interOffsetSizes[psCounts[index] + curIndex[index]].w = valSize;
}
//---------------------------------------------------------------------------------------------------------------------
//int2 type
//----------------------------------------------------------------------------------------------------------------------
__device__ void gpuEmitKeyInt2(int2 key,
size_t keySize,
size_t* psKeySizes,
size_t* psCounts,
int2* keyValOffsets,
char* interKeys,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
int2 *pKeySet = (int2*)(interKeys + psKeySizes[index] + keyValOffsets[index].x);
pKeySet[0].x = key.x;
pKeySet[0].y = key.y;
keyValOffsets[index].x += keySize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].x =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].x +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].y);
}
interOffsetSizes[psCounts[index] + curIndex[index]].y = keySize;
}
__device__ void gpuEmitValInt2(int2 val,
size_t valSize,
size_t* psValSizes,
size_t* psCounts,
int2* keyValOffsets,
char* interVals,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
int2 *pValSet = (int2*)(interVals + psValSizes[index] + keyValOffsets[index].y);
pValSet[0] = val;
keyValOffsets[index].y += valSize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].z =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].z +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].w);
}
interOffsetSizes[psCounts[index] + curIndex[index]].w = valSize;
}
//---------------------------------------------------------------------------------------------------------------------
//int5 type
//----------------------------------------------------------------------------------------------------------------------
__device__ void gpuEmitKeyInt5(int5 key,
size_t keySize,
size_t* psKeySizes,
size_t* psCounts,
int2* keyValOffsets,
char* interKeys,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
int5 *pKeySet = (int5*)(interKeys + psKeySizes[index] + keyValOffsets[index].x);
pKeySet[0] = key;
keyValOffsets[index].x += keySize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].x =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].x +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].y);
}
interOffsetSizes[psCounts[index] + curIndex[index]].y = keySize;
}
__device__ void gpuEmitValInt5(int5 val,
size_t valSize,
size_t* psValSizes,
size_t* psCounts,
int2* keyValOffsets,
char* interVals,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
int5 *pValSet = (int5*)(interVals + psValSizes[index] + keyValOffsets[index].y);
pValSet[0] = val;
keyValOffsets[index].y += valSize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].z =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].z +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].w);
}
interOffsetSizes[psCounts[index] + curIndex[index]].w = valSize;
}
//---------------------------------------------------------------------------------------------------------------------
//pointer type
//----------------------------------------------------------------------------------------------------------------------
__device__ void CopyData(char4 *des, char4 *src, size_t size)
{
//if data size is less than 4, simply copy it
if (size < 4)
{
for (int i = 0; i < size; i++)
des[i] = src[i];
return;
}
//if data size is greater than 4, use vector
int size4 = size >> 2;
char *pDes = NULL;
char *pSrc = NULL;
for (int i = 0; i < size4; i++)
{
pDes = (char*)&des[i];
pSrc = (char*)&src[i];
for (int j = 0; j < 4; j++)
pDes[j] = pSrc[j];
}
int remainder = size & 3;
if (remainder > 0)
{
pDes += 4;
pSrc += 4;
for (int i = 0; i < remainder; i++)
pDes[i] = pSrc[i];
}
}
__device__ void gpuEmitKeyPointer(char *key,
size_t keySize,
size_t* psKeySizes,
size_t* psCounts,
int2* keyValOffsets,
char* interKeys,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
char4 *pKeySet = (char4*)(interKeys + psKeySizes[index] + keyValOffsets[index].x);
CopyData(pKeySet, (char4*)key, keySize);
keyValOffsets[index].x += keySize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].x =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].x +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].y);
}
interOffsetSizes[psCounts[index] + curIndex[index]].y = keySize;
}
__device__ void gpuEmitValPointer(char *val,
size_t valSize,
size_t* psValSizes,
size_t* psCounts,
int2* keyValOffsets,
char* interVals,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
char4 *pValSet = (char4*)(interVals + psValSizes[index] + keyValOffsets[index].y);
CopyData(pValSet, (char4*)val, valSize);
keyValOffsets[index].y += valSize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].z =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].z +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].w);
}
interOffsetSizes[psCounts[index] + curIndex[index]].w = valSize;
}
//---------------------------------------------------------------------------------------------------------------------
//emit key and value
//----------------------------------------------------------------------------------------------------------------------
__device__ void gpuEmitIntermediate(char* key,
char* val,
size_t keySize,
size_t valSize,
size_t* psKeySizes,
size_t* psValSizes,
size_t* psCounts,
int2* keyValOffsets,
char* interKeys,
char* interVals,
int4* interOffsetSizes,
size_t* curIndex)
{
size_t index = (blockIdx.x * blockDim.x + threadIdx.x);
char4 *pKeySet = (char4*)(interKeys + psKeySizes[index] + keyValOffsets[index].x);
char4 *pValSet = (char4*)(interVals + psValSizes[index] + keyValOffsets[index].y);
CopyData(pKeySet, (char4*)key, keySize);
CopyData(pValSet, (char4*)val, valSize);
keyValOffsets[index].x += keySize;
keyValOffsets[index].y += valSize;
if (curIndex[index] != 0)
{
interOffsetSizes[psCounts[index] + curIndex[index]].x =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].x +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].y);
interOffsetSizes[psCounts[index] + curIndex[index]].z =
(interOffsetSizes[psCounts[index] + curIndex[index] - 1].z +
interOffsetSizes[psCounts[index] + curIndex[index] - 1].w);
}
interOffsetSizes[psCounts[index] + curIndex[index]].y = keySize;
interOffsetSizes[psCounts[index] + curIndex[index]].w = valSize;
curIndex[index]++;
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -