亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關(guān)于我們
? 蟲蟲下載站

?? marsgpulib.cu

?? GPU實現(xiàn)的MapReduce framework,對于學(xué)習(xí)并行編程和cuda平臺的編程方面有著極好的參考價值
?? CU
?? 第 1 頁 / 共 2 頁
字號:
void StartGPUSort(Schedule_t *sched, char mode)
{
	char *d_interKeys = sched->outputSmallChunk.keys;
	char *d_interVals = sched->outputSmallChunk.vals;
	int4 *d_interIndex = (int4*)sched->outputSmallChunk.index;

	size_t allKeySize = sched->outputSmallChunk.keySize;
	size_t allValSize = sched->outputSmallChunk.valSize;
	size_t allCounts = sched->outputSmallChunk.recCount;

	char *d_outputKeys = d_interKeys;
	char *d_outputVals = d_interVals;
	int4 *d_outputIndex = NULL;
	int2 **outputKeyListRange;

	size_t interDiffKeyCount = 0;

	if (mode & MAP_SORT || mode & MAP_SORT_REDUCE)
	{
		d_outputIndex = (int4*)D_MALLOC(sizeof(int4)*allCounts);
		outputKeyListRange = (int2**)BenMalloc(sizeof(int2*));

		interDiffKeyCount = 
			GPUBitonicSortMem (d_interKeys, 
					  allKeySize, 
					  d_interVals, 
					  allValSize, 
					  d_interIndex, 
					  allCounts, 
					  d_outputKeys, 
					  d_outputVals, 
					  d_outputIndex, 
					  outputKeyListRange);
	}

	//---------------------------------------------
	//output
	//---------------------------------------------
	char *interKeys = (char*)BenMalloc(allKeySize);
	char *interVals = (char*)BenMalloc(allValSize);
	int4 *interIndex = (int4*)BenMalloc(sizeof(int4)*allCounts);
	
	if (mode & MAP_SORT || mode & MAP_SORT_REDUCE)
	{
		D_MEMCPY_D2H(interKeys, d_outputKeys, allKeySize);
		D_MEMCPY_D2H(interVals, d_outputVals, allValSize);
		D_MEMCPY_D2H(interIndex, d_outputIndex, sizeof(int4)*allCounts);
	
		sched->outputSmallChunk.diffKeyCount = interDiffKeyCount;
		sched->outputSmallChunk.keyListRange = *outputKeyListRange;	
		BenFree((char**)&outputKeyListRange, sizeof(int2**));
		//D_FREE(d_outputKeys, allKeySize);
		//D_FREE(d_outputVals, allValSize);
		D_FREE(d_outputIndex, sizeof(int4)*allCounts);
	}
	else
	{
		D_MEMCPY_D2H(interKeys, d_interKeys, allKeySize);
		D_MEMCPY_D2H(interVals, d_interVals, allValSize);
		D_MEMCPY_D2H(interIndex, (void*)d_interIndex, sizeof(int4)*allCounts);
	}
	
	sched->outputSmallChunk.keys = interKeys;
	sched->outputSmallChunk.vals = interVals;
	sched->outputSmallChunk.index = interIndex;

	D_FREE(d_interKeys, allKeySize);
	D_FREE(d_interVals, allValSize);
	D_FREE(d_interIndex, sizeof(int4)*allCounts);
}

__global__	void gpuReduceCount(char*		interKeys,
							  char*	    interVals,
							  int4*	    interOffsetSizes,
							  int2*		interKeyListRange,
							  size_t*   outputKeysSizePerTask,
							  size_t*   outputValsSizePerTask,
							  size_t*   outputCountPerTask,
							  size_t    recordNum, 
							  size_t    recordsPerTask,
							  size_t	taskNum,
							  size_t	keyOffset,
							  size_t	valOffset)
{
	size_t index = (blockIdx.x * blockDim.x + threadIdx.x);

	for (int i = 0; i <= recordsPerTask; i++)
	{
		int cindex = i*taskNum+index;
		if (cindex >= recordNum) return;
	
		int valStartIndex = interKeyListRange[cindex].x;
		int valCount = interKeyListRange[cindex].y - interKeyListRange[cindex].x;

		size_t keySize = interOffsetSizes[interKeyListRange[cindex].x].y;

		char *key = gpuGetRecordFromBuf(interKeys, 
			interOffsetSizes, valStartIndex, 0, keyOffset, valOffset);
		char *vals = gpuGetRecordFromBuf(interVals, 
			interOffsetSizes, valStartIndex, 1, keyOffset, valOffset);

		gpu_reduce_count(key,
		             vals,
				     keySize,
					 valCount,
					 interOffsetSizes,
				     outputKeysSizePerTask,
				     outputValsSizePerTask,
				     outputCountPerTask,
					 valStartIndex);
	}
}

__global__ void gpuReduce(char*		interKeys,
						char*		interVals,
						int4*		interOffsetSizes,
						int2*		interKeyListRange,
					    size_t*		psKeySizes,
					    size_t*		psValSizes,
					    size_t*		psCounts,
						char*		outputKeys,
						char*		outputVals,
						int4*		outputOffsetSizes,
						int2*		keyValOffsets,
						size_t*		curIndex,
						size_t		recordNum, 
						size_t		recordsPerTask,
						size_t		taskNum,
						size_t		keyOffset,
						size_t		valOffset)
{
	size_t index = (blockIdx.x * blockDim.x + threadIdx.x);

	outputOffsetSizes[psCounts[index]].x = psKeySizes[index];
	outputOffsetSizes[psCounts[index]].z = psValSizes[index];

	for (int i = 0; i <= recordsPerTask; i++)
	{
		int cindex = i*taskNum+index;
		if (cindex >= recordNum) return;
	 
		int valStartIndex = interKeyListRange[cindex].x;
		int valCount = interKeyListRange[cindex].y - interKeyListRange[cindex].x;

		size_t keySize = interOffsetSizes[interKeyListRange[cindex].x].y;

		char *key = gpuGetRecordFromBuf(interKeys, 
			interOffsetSizes, valStartIndex, 0, keyOffset, valOffset);
		char *vals = gpuGetRecordFromBuf(interVals, 
			interOffsetSizes, valStartIndex, 1, keyOffset, valOffset);

		gpu_reduce(key,
			   vals,
			   keySize,
			   valCount,
			   psKeySizes,
			   psValSizes,
			   psCounts,
			   keyValOffsets,
			   interOffsetSizes,
			   outputKeys,
			   outputVals,
			   outputOffsetSizes,
			   curIndex,
			   valStartIndex);
	}
}

void StartGPUReduce(Schedule_t *sched, char mode)
{
	//D_ENTER_FUNC("StartGPUReduce");
	BEN_ASSERT(sched != NULL);

	//-------------------------------------------------------
	//get reduce input data
	//-------------------------------------------------------
	//!!!
	size_t	interRecCount = sched->inputSmallChunk.recCount;
	size_t	interDiffKeyCount = sched->inputSmallChunk.diffKeyCount;
	size_t	interKeySize = sched->inputSmallChunk.keySize;
	size_t	interValSize = sched->inputSmallChunk.valSize;
	size_t	interKeyOffset = sched->inputSmallChunk.keyOffset;
	size_t	interValOffset = sched->inputSmallChunk.valOffset;
	//!!!

	if (interRecCount <= 0) return;

	//!!!
	char *interKeys = sched->inputSmallChunk.keys;
	char *interVals = sched->inputSmallChunk.vals;
	int4 *interIndex = sched->inputSmallChunk.index;
	int2 *interKeyListRange = sched->inputSmallChunk.keyListRange;
	//!!!

	//----------------------------------------------
	//determine the number of threads to run
	//----------------------------------------------
	size_t gridDim = sched->gpuReduceGridDim;
	size_t blockDim = sched->gpuReduceBlockDim;
	size_t sharedMemSize = sched->gpuReduceSharedMemSize;
	size_t threadNum = gridDim*blockDim;
	size_t recPerThread = interRecCount / threadNum;
	if (0 == recPerThread)
		recPerThread = 1;

	//---------------------------------------------------
	//transfer data to gpu device memory
	//---------------------------------------------------
	char*	d_interKeys = D_MALLOC(interKeySize);
	D_MEMCPY_H2D(d_interKeys, interKeys, interKeySize);

	char*	d_interVals = D_MALLOC(interValSize);
	D_MEMCPY_H2D(d_interVals, interVals, interValSize);

	int4*	d_interIndex = (int4*)D_MALLOC(sizeof(int4)*interRecCount);
	D_MEMCPY_H2D(d_interIndex, interIndex, sizeof(int4)*interRecCount);

	int2*	d_interKeyListRange = (int2*)D_MALLOC(sizeof(int2)*interDiffKeyCount);
	D_MEMCPY_H2D(d_interKeyListRange, interKeyListRange, sizeof(int2)*interDiffKeyCount);

	//----------------------------------------------
	//calculate output data keys'buf size 
	//	 and values' buf size
	//----------------------------------------------
	size_t*	d_outputKeySizePerThread = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);

	size_t*	d_outputValSizePerThread = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);

	size_t*	d_outputCountPerThread = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);

	gpuReduceCount<<<gridDim, blockDim, sharedMemSize>>>(d_interKeys,
										    d_interVals,
										    d_interIndex,
											d_interKeyListRange,
										    d_outputKeySizePerThread,
										    d_outputValSizePerThread,
										    d_outputCountPerThread,
										    interDiffKeyCount, 
										    recPerThread,
											threadNum,
											interKeyOffset,
											interValOffset);

	//-------------------------------------------
	//do prefix sum
	//-------------------------------------------
	size_t *d_psKeySizes = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);
	size_t allKeySize = prefexSum((int*)d_outputKeySizePerThread, (int*)d_psKeySizes, threadNum);

	size_t *d_psValSizes = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);
	size_t allValSize = prefexSum((int*)d_outputValSizePerThread, (int*)d_psValSizes, threadNum);

	size_t *d_psCounts = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);
	size_t allCounts = prefexSum((int*)d_outputCountPerThread, (int*)d_psCounts, threadNum);

	//----------------------------------------------------
	//allocate output buffer
	//----------------------------------------------------
	char*	d_outputKeys = D_MALLOC(allKeySize);
	char*	d_outputVals = D_MALLOC(allValSize);
	int4*	d_outputIndex = (int4*)D_MALLOC(sizeof(int4)*allCounts);

	//----------------------------------------------------
	//start reduce
	//----------------------------------------------------		
	int2*	d_keyValOffsets = (int2*)D_MALLOC(sizeof(int2)*threadNum);

	size_t*	d_curIndex = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);
	
	gpuReduce<<<gridDim, blockDim, sharedMemSize>>>(d_interKeys,
									   d_interVals,
									   d_interIndex,
									   d_interKeyListRange,
									   d_psKeySizes,
									   d_psValSizes,
									   d_psCounts,
									   d_outputKeys,
									   d_outputVals,
									   d_outputIndex,
									   d_keyValOffsets,
									   d_curIndex,
									   interDiffKeyCount, 
									   recPerThread,
									   threadNum,
									   interKeyOffset,
									   interValOffset);
 
	//----------------------------------------------------
	//output
	//----------------------------------------------------
	char *outputKeys = (char*)BenMalloc(allKeySize);
	char *outputVals = (char*)BenMalloc(allValSize);
	int4 *outputIndex = (int4*)BenMalloc(sizeof(int4)*allCounts);

	D_MEMCPY_D2H(outputKeys, d_outputKeys, allKeySize);
	D_MEMCPY_D2H(outputVals, d_outputVals, allValSize);
	D_MEMCPY_D2H(outputIndex, d_outputIndex, sizeof(int4)*allCounts);

	sched->outputSmallChunk.keys = outputKeys;
	sched->outputSmallChunk.vals = outputVals;
	sched->outputSmallChunk.index = outputIndex;
	sched->outputSmallChunk.keySize = allKeySize;
	sched->outputSmallChunk.valSize = allValSize;
	sched->outputSmallChunk.indexSize = allCounts*sizeof(int4);
	sched->outputSmallChunk.recCount = allCounts;
	sched->outputSmallChunk.rangeSize = sched->outputSmallChunk.diffKeyCount*sizeof(int2);

	//----------------------------------------------------
	//clean
	//----------------------------------------------------
	D_FREE(d_interKeys, interKeySize);
	D_FREE(d_interVals, interValSize);
	D_FREE(d_interIndex, interRecCount * sizeof(int4));
	D_FREE(d_interKeyListRange, sizeof(int2)*interDiffKeyCount);

	D_FREE(d_outputKeySizePerThread, sizeof(size_t)*threadNum);
	D_FREE(d_outputValSizePerThread, sizeof(size_t)*threadNum);
	D_FREE(d_outputCountPerThread, sizeof(size_t)*threadNum);

	D_FREE(d_psKeySizes, sizeof(size_t)*threadNum);
	D_FREE(d_psValSizes, sizeof(size_t)*threadNum);
	D_FREE(d_psCounts, sizeof(size_t)*threadNum);

	D_FREE(d_outputKeys, allKeySize);
	D_FREE(d_outputVals, allValSize);
	D_FREE(d_outputIndex, allCounts*sizeof(int4));

	D_FREE(d_keyValOffsets, sizeof(int2)*threadNum);
	D_FREE(d_curIndex, sizeof(size_t)*threadNum);

	//D_LEAVE_FUNC("StartGPUReduce");
}

__device__ void gpuEmitCount(size_t		keySize,
						  size_t		valSize,
						  size_t*		outputKeysSizePerTask,
						  size_t*		outputValsSizePerTask,
						  size_t*		outputCountPerTask)
{
	size_t index = (blockIdx.x * blockDim.x + threadIdx.x);	

	outputKeysSizePerTask[index] += keySize;
	outputValsSizePerTask[index] += valSize;
	outputCountPerTask[index]++;
}

__device__ void gpuEmit  (char*		key, 
					   char*		val, 
					   size_t		keySize, 
					   size_t		valSize,
					   size_t*		psKeySizes, 
					   size_t*		psValSizes, 
					   size_t*		psCounts, 
					   int2*		keyValOffsets, 
					   char*		outputKeys,
					   char*		outputVals,
					   int4*		outputOffsetSizes,
					   size_t*		curIndex)
{
	size_t index = (blockIdx.x * blockDim.x + threadIdx.x);

	char4 *pKeySet = (char4*)(outputKeys + psKeySizes[index] + keyValOffsets[index].x);
	char4 *pValSet = (char4*)(outputVals + psValSizes[index] + keyValOffsets[index].y);

	copyData(pKeySet, (char4*)key, keySize);
	copyData(pValSet, (char4*)val, valSize);

	keyValOffsets[index].x += keySize;
	keyValOffsets[index].y += valSize;

	if (curIndex[index] != 0)
	{
	outputOffsetSizes[psCounts[index] + curIndex[index]].x = 
		(outputOffsetSizes[psCounts[index] + curIndex[index] - 1].x + 
		 outputOffsetSizes[psCounts[index] + curIndex[index] - 1].y);
	outputOffsetSizes[psCounts[index] + curIndex[index]].z = 
		(outputOffsetSizes[psCounts[index] + curIndex[index] - 1].z + 
		 outputOffsetSizes[psCounts[index] + curIndex[index] - 1].w);
	}
	
	outputOffsetSizes[psCounts[index] + curIndex[index]].y = keySize;
	
	outputOffsetSizes[psCounts[index] + curIndex[index]].w = valSize;

	curIndex[index]++;
}

?? 快捷鍵說明

復(fù)制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
日韩在线一区二区三区| 136国产福利精品导航| 成人三级伦理片| 亚洲一区中文在线| 国产无一区二区| 欧美视频在线观看一区| 国产激情一区二区三区四区| 亚洲午夜激情网站| 中文av一区二区| 精品国产免费人成在线观看| 欧美视频第二页| 成人手机电影网| 久久国产精品露脸对白| 五月婷婷激情综合| 亚洲免费在线播放| 国产精品三级视频| 久久久久国产精品免费免费搜索| 欧美电影影音先锋| 日本高清成人免费播放| 成人av在线播放网站| 国产一区二区三区免费| 肉色丝袜一区二区| 一区二区三区不卡视频在线观看| 中文字幕欧美国产| 久久久久综合网| 久久久久久亚洲综合| 日韩久久精品一区| 欧美一级日韩不卡播放免费| 欧美影院一区二区三区| 色综合天天综合狠狠| 成人午夜视频免费看| 国产精品乡下勾搭老头1| 久久国内精品视频| 美日韩一区二区| 久久精品免费观看| 久久国产乱子精品免费女| 麻豆久久久久久久| 久久国产精品一区二区| 美女视频黄 久久| 精品一区二区成人精品| 久久99精品国产麻豆不卡| 男人的j进女人的j一区| 天堂久久久久va久久久久| 亚洲国产中文字幕在线视频综合| 国产清纯美女被跳蛋高潮一区二区久久w| 精品国产一区久久| 欧美一级搡bbbb搡bbbb| 精品国产伦一区二区三区观看方式 | 美腿丝袜亚洲三区| 亚洲国产精品影院| 日韩激情av在线| 天天色综合天天| 蜜臀久久久99精品久久久久久| 亚洲一区成人在线| 日韩成人一级片| 日一区二区三区| 一区二区三区在线视频免费 | 国产欧美一区视频| 欧美激情资源网| 中文无字幕一区二区三区| 国产精品久久影院| 国产精品久久久久久久久免费相片| 国产精品久久久久久妇女6080 | 视频一区二区国产| 水野朝阳av一区二区三区| 美女视频黄a大片欧美| 麻豆精品久久精品色综合| 亚洲欧美在线另类| 亚洲成人福利片| 日韩黄色免费电影| 国产+成+人+亚洲欧洲自线| 国产成人在线视频播放| av成人免费在线| 色噜噜夜夜夜综合网| 欧美日韩成人一区| 日韩免费一区二区三区在线播放| 久久午夜色播影院免费高清| 国产日韩欧美高清在线| 欧美激情一区二区三区不卡 | 精品写真视频在线观看 | 成人永久免费视频| 色老汉一区二区三区| 欧美日韩视频在线一区二区| 日韩午夜小视频| 国产日本亚洲高清| 国产精品久久久久久久久免费樱桃 | 一区二区三区成人在线视频| 天天爽夜夜爽夜夜爽精品视频| 捆绑调教美女网站视频一区| 国产精品亚洲第一| 色八戒一区二区三区| 9191国产精品| 国产色产综合产在线视频| 天天影视网天天综合色在线播放| 国产一区二区在线视频| 欧美在线小视频| 日韩免费视频一区| 亚洲综合在线电影| 麻豆精品国产传媒mv男同| 国产精品小仙女| 91精品国产91久久久久久一区二区| 久久久久久久久99精品| 亚洲一区二区三区中文字幕在线| 午夜视频久久久久久| 国产成人自拍高清视频在线免费播放| 成人h动漫精品一区二区| 欧美一级电影网站| 国产精品国产三级国产aⅴ入口| 美国精品在线观看| 一本到一区二区三区| 欧美韩日一区二区三区四区| 午夜精品久久久久久久99水蜜桃| 成人avav影音| 日韩欧美高清dvd碟片| 亚洲高清视频在线| 白白色 亚洲乱淫| 久久看人人爽人人| 日日骚欧美日韩| 欧洲视频一区二区| 久久久www免费人成精品| 久久99精品国产麻豆不卡| 日本精品一区二区三区高清| 欧美电影免费观看高清完整版在| 一区二区在线观看不卡| 国产成人三级在线观看| 欧美一区二区三区在线看| 尤物av一区二区| 在线免费视频一区二区| 欧美国产一区二区| 高清成人在线观看| 日韩午夜电影av| 久久99精品国产麻豆婷婷| 欧美日韩久久不卡| 亚洲成av人**亚洲成av**| 不卡一区二区在线| 中文字幕中文字幕在线一区| 精品一区二区av| 久久久精品免费免费| 免费xxxx性欧美18vr| 欧美一区二区人人喊爽| 日韩影院精彩在线| 日韩视频在线你懂得| 日韩精品每日更新| 91麻豆精品91久久久久同性| 亚洲一区二区综合| 欧美电影一区二区三区| 亚洲成在人线在线播放| 欧美精品一二三| 亚洲午夜三级在线| 欧美一区日韩一区| 日本少妇一区二区| 精品国产乱码久久久久久1区2区| 日本女人一区二区三区| 欧美大胆人体bbbb| 久久av老司机精品网站导航| 久久毛片高清国产| 国产精品一区二区91| 中文字幕在线免费不卡| 成人av在线电影| 亚洲一区二区三区不卡国产欧美| 国产一区91精品张津瑜| 中文欧美字幕免费| 99久久伊人精品| 日韩黄色片在线观看| 欧美精品tushy高清| 国产在线不卡一区| 欧美一区二区日韩| 国产一区二区三区在线观看精品| 精品久久久久久久一区二区蜜臀| 精品一区二区三区免费观看| 国产欧美日本一区二区三区| 波多野结衣中文字幕一区| 亚洲黄色在线视频| 制服丝袜亚洲精品中文字幕| 国产一区二区久久| 国产精品高潮呻吟| 在线播放91灌醉迷j高跟美女 | 日韩一区精品视频| 久久综合久久久久88| 粉嫩高潮美女一区二区三区| 亚洲黄色小说网站| 91麻豆精品91久久久久同性| 久久9热精品视频| 中文字幕在线观看不卡视频| 在线精品国精品国产尤物884a| 亚洲欧美综合网| 日韩一区二区精品在线观看| 国产激情一区二区三区桃花岛亚洲| 国产精品成人网| 欧美高清性hdvideosex| 国产精品1024| 亚洲123区在线观看| 精品国产乱码91久久久久久网站| 在线观看日产精品| 狠狠久久亚洲欧美| 天天亚洲美女在线视频| 国产欧美日韩久久| 日韩欧美中文字幕制服| 波多野结衣一区二区三区| 久久草av在线|