亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關(guān)于我們
? 蟲蟲下載站

?? marsgpulib.cu

?? GPU實現(xiàn)的MapReduce framework,對于學(xué)習(xí)并行編程和cuda平臺的編程方面有著極好的參考價值
?? CU
?? 第 1 頁 / 共 2 頁
字號:
void StartGPUSort(Schedule_t *sched, char mode)
{
	char *d_interKeys = sched->outputSmallChunk.keys;
	char *d_interVals = sched->outputSmallChunk.vals;
	int4 *d_interIndex = (int4*)sched->outputSmallChunk.index;

	size_t allKeySize = sched->outputSmallChunk.keySize;
	size_t allValSize = sched->outputSmallChunk.valSize;
	size_t allCounts = sched->outputSmallChunk.recCount;

	char *d_outputKeys = d_interKeys;
	char *d_outputVals = d_interVals;
	int4 *d_outputIndex = NULL;
	int2 **outputKeyListRange;

	size_t interDiffKeyCount = 0;

	if (mode & MAP_SORT || mode & MAP_SORT_REDUCE)
	{
		d_outputIndex = (int4*)D_MALLOC(sizeof(int4)*allCounts);
		outputKeyListRange = (int2**)BenMalloc(sizeof(int2*));

		interDiffKeyCount = 
			GPUBitonicSortMem (d_interKeys, 
					  allKeySize, 
					  d_interVals, 
					  allValSize, 
					  d_interIndex, 
					  allCounts, 
					  d_outputKeys, 
					  d_outputVals, 
					  d_outputIndex, 
					  outputKeyListRange);
	}

	//---------------------------------------------
	//output
	//---------------------------------------------
	char *interKeys = (char*)BenMalloc(allKeySize);
	char *interVals = (char*)BenMalloc(allValSize);
	int4 *interIndex = (int4*)BenMalloc(sizeof(int4)*allCounts);
	
	if (mode & MAP_SORT || mode & MAP_SORT_REDUCE)
	{
		D_MEMCPY_D2H(interKeys, d_outputKeys, allKeySize);
		D_MEMCPY_D2H(interVals, d_outputVals, allValSize);
		D_MEMCPY_D2H(interIndex, d_outputIndex, sizeof(int4)*allCounts);
	
		sched->outputSmallChunk.diffKeyCount = interDiffKeyCount;
		sched->outputSmallChunk.keyListRange = *outputKeyListRange;	
		BenFree((char**)&outputKeyListRange, sizeof(int2**));
		//D_FREE(d_outputKeys, allKeySize);
		//D_FREE(d_outputVals, allValSize);
		D_FREE(d_outputIndex, sizeof(int4)*allCounts);
	}
	else
	{
		D_MEMCPY_D2H(interKeys, d_interKeys, allKeySize);
		D_MEMCPY_D2H(interVals, d_interVals, allValSize);
		D_MEMCPY_D2H(interIndex, (void*)d_interIndex, sizeof(int4)*allCounts);
	}
	
	sched->outputSmallChunk.keys = interKeys;
	sched->outputSmallChunk.vals = interVals;
	sched->outputSmallChunk.index = interIndex;

	D_FREE(d_interKeys, allKeySize);
	D_FREE(d_interVals, allValSize);
	D_FREE(d_interIndex, sizeof(int4)*allCounts);
}

__global__	void gpuReduceCount(char*		interKeys,
							  char*	    interVals,
							  int4*	    interOffsetSizes,
							  int2*		interKeyListRange,
							  size_t*   outputKeysSizePerTask,
							  size_t*   outputValsSizePerTask,
							  size_t*   outputCountPerTask,
							  size_t    recordNum, 
							  size_t    recordsPerTask,
							  size_t	taskNum,
							  size_t	keyOffset,
							  size_t	valOffset)
{
	size_t index = (blockIdx.x * blockDim.x + threadIdx.x);

	for (int i = 0; i <= recordsPerTask; i++)
	{
		int cindex = i*taskNum+index;
		if (cindex >= recordNum) return;
	
		int valStartIndex = interKeyListRange[cindex].x;
		int valCount = interKeyListRange[cindex].y - interKeyListRange[cindex].x;

		size_t keySize = interOffsetSizes[interKeyListRange[cindex].x].y;

		char *key = gpuGetRecordFromBuf(interKeys, 
			interOffsetSizes, valStartIndex, 0, keyOffset, valOffset);
		char *vals = gpuGetRecordFromBuf(interVals, 
			interOffsetSizes, valStartIndex, 1, keyOffset, valOffset);

		gpu_reduce_count(key,
		             vals,
				     keySize,
					 valCount,
					 interOffsetSizes,
				     outputKeysSizePerTask,
				     outputValsSizePerTask,
				     outputCountPerTask,
					 valStartIndex);
	}
}

__global__ void gpuReduce(char*		interKeys,
						char*		interVals,
						int4*		interOffsetSizes,
						int2*		interKeyListRange,
					    size_t*		psKeySizes,
					    size_t*		psValSizes,
					    size_t*		psCounts,
						char*		outputKeys,
						char*		outputVals,
						int4*		outputOffsetSizes,
						int2*		keyValOffsets,
						size_t*		curIndex,
						size_t		recordNum, 
						size_t		recordsPerTask,
						size_t		taskNum,
						size_t		keyOffset,
						size_t		valOffset)
{
	size_t index = (blockIdx.x * blockDim.x + threadIdx.x);

	outputOffsetSizes[psCounts[index]].x = psKeySizes[index];
	outputOffsetSizes[psCounts[index]].z = psValSizes[index];

	for (int i = 0; i <= recordsPerTask; i++)
	{
		int cindex = i*taskNum+index;
		if (cindex >= recordNum) return;
	 
		int valStartIndex = interKeyListRange[cindex].x;
		int valCount = interKeyListRange[cindex].y - interKeyListRange[cindex].x;

		size_t keySize = interOffsetSizes[interKeyListRange[cindex].x].y;

		char *key = gpuGetRecordFromBuf(interKeys, 
			interOffsetSizes, valStartIndex, 0, keyOffset, valOffset);
		char *vals = gpuGetRecordFromBuf(interVals, 
			interOffsetSizes, valStartIndex, 1, keyOffset, valOffset);

		gpu_reduce(key,
			   vals,
			   keySize,
			   valCount,
			   psKeySizes,
			   psValSizes,
			   psCounts,
			   keyValOffsets,
			   interOffsetSizes,
			   outputKeys,
			   outputVals,
			   outputOffsetSizes,
			   curIndex,
			   valStartIndex);
	}
}

void StartGPUReduce(Schedule_t *sched, char mode)
{
	//D_ENTER_FUNC("StartGPUReduce");
	BEN_ASSERT(sched != NULL);

	//-------------------------------------------------------
	//get reduce input data
	//-------------------------------------------------------
	//!!!
	size_t	interRecCount = sched->inputSmallChunk.recCount;
	size_t	interDiffKeyCount = sched->inputSmallChunk.diffKeyCount;
	size_t	interKeySize = sched->inputSmallChunk.keySize;
	size_t	interValSize = sched->inputSmallChunk.valSize;
	size_t	interKeyOffset = sched->inputSmallChunk.keyOffset;
	size_t	interValOffset = sched->inputSmallChunk.valOffset;
	//!!!

	if (interRecCount <= 0) return;

	//!!!
	char *interKeys = sched->inputSmallChunk.keys;
	char *interVals = sched->inputSmallChunk.vals;
	int4 *interIndex = sched->inputSmallChunk.index;
	int2 *interKeyListRange = sched->inputSmallChunk.keyListRange;
	//!!!

	//----------------------------------------------
	//determine the number of threads to run
	//----------------------------------------------
	size_t gridDim = sched->gpuReduceGridDim;
	size_t blockDim = sched->gpuReduceBlockDim;
	size_t sharedMemSize = sched->gpuReduceSharedMemSize;
	size_t threadNum = gridDim*blockDim;
	size_t recPerThread = interRecCount / threadNum;
	if (0 == recPerThread)
		recPerThread = 1;

	//---------------------------------------------------
	//transfer data to gpu device memory
	//---------------------------------------------------
	char*	d_interKeys = D_MALLOC(interKeySize);
	D_MEMCPY_H2D(d_interKeys, interKeys, interKeySize);

	char*	d_interVals = D_MALLOC(interValSize);
	D_MEMCPY_H2D(d_interVals, interVals, interValSize);

	int4*	d_interIndex = (int4*)D_MALLOC(sizeof(int4)*interRecCount);
	D_MEMCPY_H2D(d_interIndex, interIndex, sizeof(int4)*interRecCount);

	int2*	d_interKeyListRange = (int2*)D_MALLOC(sizeof(int2)*interDiffKeyCount);
	D_MEMCPY_H2D(d_interKeyListRange, interKeyListRange, sizeof(int2)*interDiffKeyCount);

	//----------------------------------------------
	//calculate output data keys'buf size 
	//	 and values' buf size
	//----------------------------------------------
	size_t*	d_outputKeySizePerThread = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);

	size_t*	d_outputValSizePerThread = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);

	size_t*	d_outputCountPerThread = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);

	gpuReduceCount<<<gridDim, blockDim, sharedMemSize>>>(d_interKeys,
										    d_interVals,
										    d_interIndex,
											d_interKeyListRange,
										    d_outputKeySizePerThread,
										    d_outputValSizePerThread,
										    d_outputCountPerThread,
										    interDiffKeyCount, 
										    recPerThread,
											threadNum,
											interKeyOffset,
											interValOffset);

	//-------------------------------------------
	//do prefix sum
	//-------------------------------------------
	size_t *d_psKeySizes = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);
	size_t allKeySize = prefexSum((int*)d_outputKeySizePerThread, (int*)d_psKeySizes, threadNum);

	size_t *d_psValSizes = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);
	size_t allValSize = prefexSum((int*)d_outputValSizePerThread, (int*)d_psValSizes, threadNum);

	size_t *d_psCounts = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);
	size_t allCounts = prefexSum((int*)d_outputCountPerThread, (int*)d_psCounts, threadNum);

	//----------------------------------------------------
	//allocate output buffer
	//----------------------------------------------------
	char*	d_outputKeys = D_MALLOC(allKeySize);
	char*	d_outputVals = D_MALLOC(allValSize);
	int4*	d_outputIndex = (int4*)D_MALLOC(sizeof(int4)*allCounts);

	//----------------------------------------------------
	//start reduce
	//----------------------------------------------------		
	int2*	d_keyValOffsets = (int2*)D_MALLOC(sizeof(int2)*threadNum);

	size_t*	d_curIndex = (size_t*)D_MALLOC(sizeof(size_t)*threadNum);
	
	gpuReduce<<<gridDim, blockDim, sharedMemSize>>>(d_interKeys,
									   d_interVals,
									   d_interIndex,
									   d_interKeyListRange,
									   d_psKeySizes,
									   d_psValSizes,
									   d_psCounts,
									   d_outputKeys,
									   d_outputVals,
									   d_outputIndex,
									   d_keyValOffsets,
									   d_curIndex,
									   interDiffKeyCount, 
									   recPerThread,
									   threadNum,
									   interKeyOffset,
									   interValOffset);
 
	//----------------------------------------------------
	//output
	//----------------------------------------------------
	char *outputKeys = (char*)BenMalloc(allKeySize);
	char *outputVals = (char*)BenMalloc(allValSize);
	int4 *outputIndex = (int4*)BenMalloc(sizeof(int4)*allCounts);

	D_MEMCPY_D2H(outputKeys, d_outputKeys, allKeySize);
	D_MEMCPY_D2H(outputVals, d_outputVals, allValSize);
	D_MEMCPY_D2H(outputIndex, d_outputIndex, sizeof(int4)*allCounts);

	sched->outputSmallChunk.keys = outputKeys;
	sched->outputSmallChunk.vals = outputVals;
	sched->outputSmallChunk.index = outputIndex;
	sched->outputSmallChunk.keySize = allKeySize;
	sched->outputSmallChunk.valSize = allValSize;
	sched->outputSmallChunk.indexSize = allCounts*sizeof(int4);
	sched->outputSmallChunk.recCount = allCounts;
	sched->outputSmallChunk.rangeSize = sched->outputSmallChunk.diffKeyCount*sizeof(int2);

	//----------------------------------------------------
	//clean
	//----------------------------------------------------
	D_FREE(d_interKeys, interKeySize);
	D_FREE(d_interVals, interValSize);
	D_FREE(d_interIndex, interRecCount * sizeof(int4));
	D_FREE(d_interKeyListRange, sizeof(int2)*interDiffKeyCount);

	D_FREE(d_outputKeySizePerThread, sizeof(size_t)*threadNum);
	D_FREE(d_outputValSizePerThread, sizeof(size_t)*threadNum);
	D_FREE(d_outputCountPerThread, sizeof(size_t)*threadNum);

	D_FREE(d_psKeySizes, sizeof(size_t)*threadNum);
	D_FREE(d_psValSizes, sizeof(size_t)*threadNum);
	D_FREE(d_psCounts, sizeof(size_t)*threadNum);

	D_FREE(d_outputKeys, allKeySize);
	D_FREE(d_outputVals, allValSize);
	D_FREE(d_outputIndex, allCounts*sizeof(int4));

	D_FREE(d_keyValOffsets, sizeof(int2)*threadNum);
	D_FREE(d_curIndex, sizeof(size_t)*threadNum);

	//D_LEAVE_FUNC("StartGPUReduce");
}

__device__ void gpuEmitCount(size_t		keySize,
						  size_t		valSize,
						  size_t*		outputKeysSizePerTask,
						  size_t*		outputValsSizePerTask,
						  size_t*		outputCountPerTask)
{
	size_t index = (blockIdx.x * blockDim.x + threadIdx.x);	

	outputKeysSizePerTask[index] += keySize;
	outputValsSizePerTask[index] += valSize;
	outputCountPerTask[index]++;
}

__device__ void gpuEmit  (char*		key, 
					   char*		val, 
					   size_t		keySize, 
					   size_t		valSize,
					   size_t*		psKeySizes, 
					   size_t*		psValSizes, 
					   size_t*		psCounts, 
					   int2*		keyValOffsets, 
					   char*		outputKeys,
					   char*		outputVals,
					   int4*		outputOffsetSizes,
					   size_t*		curIndex)
{
	size_t index = (blockIdx.x * blockDim.x + threadIdx.x);

	char4 *pKeySet = (char4*)(outputKeys + psKeySizes[index] + keyValOffsets[index].x);
	char4 *pValSet = (char4*)(outputVals + psValSizes[index] + keyValOffsets[index].y);

	copyData(pKeySet, (char4*)key, keySize);
	copyData(pValSet, (char4*)val, valSize);

	keyValOffsets[index].x += keySize;
	keyValOffsets[index].y += valSize;

	if (curIndex[index] != 0)
	{
	outputOffsetSizes[psCounts[index] + curIndex[index]].x = 
		(outputOffsetSizes[psCounts[index] + curIndex[index] - 1].x + 
		 outputOffsetSizes[psCounts[index] + curIndex[index] - 1].y);
	outputOffsetSizes[psCounts[index] + curIndex[index]].z = 
		(outputOffsetSizes[psCounts[index] + curIndex[index] - 1].z + 
		 outputOffsetSizes[psCounts[index] + curIndex[index] - 1].w);
	}
	
	outputOffsetSizes[psCounts[index] + curIndex[index]].y = keySize;
	
	outputOffsetSizes[psCounts[index] + curIndex[index]].w = valSize;

	curIndex[index]++;
}

?? 快捷鍵說明

復(fù)制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
国产精品美女久久久久久久网站| 欧美日韩亚州综合| 国产精品全国免费观看高清| 国产成人自拍网| 国产精品黄色在线观看| 99久久精品一区| 一区二区三区四区不卡在线| 欧美亚洲动漫制服丝袜| 免费成人美女在线观看.| 日韩精品一区二| 国产精品一区二区三区99| 中文字幕一区二区不卡| 色婷婷狠狠综合| 五月婷婷综合在线| 精品毛片乱码1区2区3区| 成人性色生活片免费看爆迷你毛片| 中文无字幕一区二区三区| 色综合激情五月| 久久精品国产一区二区| 国产欧美一区二区精品秋霞影院| 97精品视频在线观看自产线路二| 亚洲国产成人av网| 久久蜜桃av一区二区天堂| 色综合久久中文综合久久牛| 亚洲成人高清在线| 久久久亚洲精品一区二区三区| 成a人片亚洲日本久久| 天天操天天干天天综合网| 精品久久久久一区| 色妹子一区二区| 精品一区二区av| 亚洲电影你懂得| 久久综合成人精品亚洲另类欧美| 色婷婷综合激情| 国内成人免费视频| 午夜婷婷国产麻豆精品| 中文一区在线播放| 678五月天丁香亚洲综合网| 成人av综合一区| 欧美aaa在线| 亚洲一区二区三区四区五区黄| 久久久亚洲精品石原莉奈| 欧美日韩在线播放三区| 成人免费视频一区二区| 人人超碰91尤物精品国产| 亚洲精品久久7777| 中文字幕精品三区| 精品国产乱子伦一区| 欧美午夜片在线看| 97精品电影院| 另类专区欧美蜜桃臀第一页| 一区二区三区日韩欧美精品| 欧美国产一区二区| 精品成人佐山爱一区二区| 欧美日韩亚洲不卡| 在线视频一区二区三| 成人黄色a**站在线观看| 麻豆精品新av中文字幕| 天堂一区二区在线| 亚洲精品欧美专区| 最近日韩中文字幕| 国产精品热久久久久夜色精品三区 | 91精品国产综合久久精品app | 自拍偷拍亚洲欧美日韩| 久久人人爽人人爽| 精品国产伦一区二区三区观看方式 | 久久91精品久久久久久秒播| 亚洲第一福利视频在线| 亚洲黄色录像片| 亚洲色图清纯唯美| 国产视频不卡一区| 久久精品水蜜桃av综合天堂| 久久综合一区二区| 久久免费偷拍视频| 国产亚洲欧美色| 国产欧美日韩亚州综合| 国产清纯白嫩初高生在线观看91 | 国产午夜精品福利| 久久久精品中文字幕麻豆发布| 日韩一区二区精品| 91精品国产综合久久久久久漫画 | 亚洲国产精品精华液网站| 亚洲精品免费播放| 亚洲国产视频在线| 视频一区在线播放| 日韩av电影免费观看高清完整版在线观看| 一区二区三区在线视频免费| 一区二区三区四区不卡在线| 亚洲一区二区三区四区在线| 视频一区欧美日韩| 九九精品一区二区| 成人精品视频一区二区三区| eeuss鲁片一区二区三区| 99九九99九九九视频精品| 色菇凉天天综合网| 91麻豆精品久久久久蜜臀 | 在线观看视频91| 91精品国产综合久久小美女| 欧美精品一区二区久久久| 欧美国产在线观看| 亚洲国产成人tv| 久久99国产精品尤物| 粉嫩欧美一区二区三区高清影视| caoporm超碰国产精品| 日本电影亚洲天堂一区| 5月丁香婷婷综合| 久久久精品国产免大香伊| 国产精品护士白丝一区av| 午夜精品久久久久| 国内精品伊人久久久久av影院| 高清不卡一区二区| 欧美综合亚洲图片综合区| 日韩三级精品电影久久久| 国产免费观看久久| 亚洲激情成人在线| 精品一区二区三区免费视频| 91在线国内视频| 7777精品伊人久久久大香线蕉| 久久久久久99精品| 亚洲综合视频网| 激情小说亚洲一区| 色综合视频一区二区三区高清| 日韩欧美一级片| 亚洲素人一区二区| 蜜桃视频一区二区| 91免费观看视频在线| 日韩亚洲欧美在线| 亚洲欧洲制服丝袜| 国产伦精品一区二区三区免费迷 | 成人午夜激情影院| 欧美日韩极品在线观看一区| 久久久久久久一区| 天天综合天天综合色| 成人性视频网站| 宅男在线国产精品| 亚洲人成亚洲人成在线观看图片 | 成人国产精品免费网站| 欧美日韩一本到| 国产精品久久午夜夜伦鲁鲁| 日日夜夜免费精品| 91在线你懂得| 欧美国产欧美综合| 久久福利资源站| 欧美日韩一区中文字幕| 亚洲视频狠狠干| 福利电影一区二区| 精品捆绑美女sm三区| 视频一区二区不卡| 色狠狠综合天天综合综合| 国产精品毛片大码女人| 韩国av一区二区三区| 91精品欧美综合在线观看最新| 欧美不卡一二三| 欧美日韩国产免费| 国产精品理伦片| 国产一区二区福利| 欧美一卡在线观看| 五月婷婷综合激情| 欧美日韩在线播放三区| 亚洲人成在线观看一区二区| 成人黄色小视频| 国产欧美日韩卡一| 国产精品一二三四五| 久久久久久久久久久久久久久99| 日本伊人精品一区二区三区观看方式| 在线精品视频免费播放| 亚洲另类在线视频| 91福利小视频| 一区二区三区四区国产精品| 色综合一区二区| 综合久久久久久久| 91亚洲国产成人精品一区二区三 | 午夜久久久久久电影| 91欧美一区二区| 国产欧美一区二区在线| 国产精品一二三| 国产亚洲成年网址在线观看| 国产乱码精品一区二区三| 久久这里只有精品视频网| 激情深爱一区二区| 亚洲精品在线观| 懂色中文一区二区在线播放| 国产精品情趣视频| 9色porny自拍视频一区二区| 自拍偷拍亚洲综合| 91官网在线免费观看| 爽好多水快深点欧美视频| 日韩精品一区二区三区三区免费| 国产在线日韩欧美| 国产精品麻豆欧美日韩ww| 色综合久久99| 日本视频一区二区| 久久久亚洲精品石原莉奈| kk眼镜猥琐国模调教系列一区二区| 亚洲天天做日日做天天谢日日欢| 色吊一区二区三区| 久久er精品视频| 国产精品久久久久久久久快鸭 | 欧美精品123区| 精品一二三四在线|