亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? marssort.cu

?? GPU實現的MapReduce framework,對于學習并行編程和cuda平臺的編程方面有著極好的參考價值
?? CU
?? 第 1 頁 / 共 4 頁
字號:
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	if(pos<rLen)
	{
		cmp_type_t value=d_input[pos];
		value.z=d_value[pos].x;
		value.w=d_value[pos].y;
		d_input[pos]=value;
	}
}

void getIntYArray(int2 *d_data, int rLen, int* d_output)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getIntYArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_output);
	} 
	cudaThreadSynchronize();
}

void getXYArray(cmp_type_t *d_data, int rLen, int2* d_output)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getXYArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_output);
	} 
	cudaThreadSynchronize();
}

void getZWArray(cmp_type_t *d_data, int rLen, int2* d_output)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getZWArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_output);
	} 
	cudaThreadSynchronize();
}

void setXYArray(cmp_type_t *d_data, int rLen, int2* d_value)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		setXYArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_value);
	} 
	cudaThreadSynchronize();
}

void setZWArray(cmp_type_t *d_data, int rLen, int2* d_value)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		setZWArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_value);
	} 
	cudaThreadSynchronize();
}
__global__ void copyChunks_kernel(void *d_source, int startPos, int2* d_Rin, int rLen, int *d_sum, void *d_dest)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<rLen)
	{
		int2 value=d_Rin[pos];
		int offset=value.x;
		int size=value.y;
		int startWritePos=d_sum[pos];
		int i=0;
		char *source=(char*)d_source;
		char *dest=(char*)d_dest;
		for(i=0;i<size;i++)
		{
			dest[i+startWritePos]=source[i+offset];
		}
		value.x=startWritePos;
		d_Rin[pos]=value;
	}
}

__global__ void getChunkBoundary_kernel(void* d_rawData, int startPos, cmp_type_t *d_Rin, 
										int rLen, int* d_startArray)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<rLen)
	{
		int result=0;
		if(pos==0)//the start position
		{
			result=1;
		}
		else
		{
			cmp_type_t cur=d_Rin[pos];
			cmp_type_t left=d_Rin[pos-1];
			if(getCompareValue(d_rawData, cur, left)!=0)
			{
				result=1;
			}
		}
		d_startArray[pos]=result;	
	}
}

__global__ void setBoundaryInt2_kernel(int* d_boundary, int startPos, int numKey, int rLen,
										  int2* d_boundaryRange)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<numKey)
	{
		int2 flag;
		flag.x=d_boundary[pos];
		if((pos+1)!=numKey)
			flag.y=d_boundary[pos+1];
		else
			flag.y=rLen;
		d_boundaryRange[pos]=flag;
	}
}

__global__ void writeBoundary_kernel(int startPos, int rLen, int* d_startArray,
									int* d_startSumArray, int* d_bounary)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<rLen)
	{
		int flag=d_startArray[pos];
		int writePos=d_startSumArray[pos];
		if(flag==1)
			d_bounary[writePos]=pos;
	}
}

void copyChunks(void *d_source, int2* d_Rin, int rLen, void *d_dest)
{
	//extract the size information for each chunk
	int* d_size;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_size), sizeof(int)*rLen) );	
	getIntYArray(d_Rin, rLen, d_size);
	//compute the prefix sum for the output positions.
	int* d_sum;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_sum), sizeof(int)*rLen) );
	saven_initialPrefixSum(rLen);
	prescanArray(d_sum,d_size,rLen);
	cudaFree(d_size);
	//output
	int numThreadsPerBlock_x=128;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		copyChunks_kernel<<<grid,thread>>>(d_source, start, d_Rin, rLen, d_sum, d_dest);
	} 
	cudaThreadSynchronize();
	
	cudaFree(d_sum);
	
}
//return the number of chunks.
int getChunkBoundary(void *d_source, cmp_type_t* d_Rin, int rLen, int2 ** h_outputKeyListRange)
{
	int resultNumChunks=0;
	//get the chunk boundary[start of chunk0, start of chunk 1, ...]
	int* d_startArray;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_startArray), sizeof(int)*rLen) );	
	
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getChunkBoundary_kernel<<<grid,thread>>>(d_source, start, d_Rin, rLen, d_startArray);
	} 
	cudaThreadSynchronize();
	//prefix sum for write positions.
	int* d_startSumArray;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_startSumArray), sizeof(int)*rLen) );
	saven_initialPrefixSum(rLen);
	prescanArray(d_startSumArray,d_startArray,rLen);

	//gpuPrint(d_startSumArray, rLen, "d_startSumArray");

	int lastValue=0;
	int partialSum=0;
	CUDA_SAFE_CALL( cudaMemcpy( &lastValue, d_startArray+(rLen-1), sizeof(int), cudaMemcpyDeviceToHost) );
	//gpuPrint(d_startArray, rLen, "d_startArray");
	CUDA_SAFE_CALL( cudaMemcpy( &partialSum, d_startSumArray+(rLen-1), sizeof(int), cudaMemcpyDeviceToHost) );
	//gpuPrint(d_startSumArray, rLen, "d_startSumArray");
	resultNumChunks=lastValue+partialSum;

	int* d_boundary;//[start of chunk0, start of chunk 1, ...]
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_boundary), sizeof(int)*resultNumChunks) );

	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		writeBoundary_kernel<<<grid,thread>>>(start, rLen, d_startArray,
									d_startSumArray, d_boundary);
	} 
	cudaFree(d_startArray);
	cudaFree(d_startSumArray);	

	//set the int2 boundary. 
	int2 *d_outputKeyListRange;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_outputKeyListRange), sizeof(int2)*resultNumChunks) );
	numChunk=resultNumChunks/chunkSize;
	if(resultNumChunks%chunkSize!=0)
		numChunk++;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>resultNumChunks)
			end=resultNumChunks;
		setBoundaryInt2_kernel<<<grid,thread>>>(d_boundary, start, resultNumChunks, rLen, d_outputKeyListRange);
	} 
	cudaThreadSynchronize();

	*h_outputKeyListRange=(int2*)BenMalloc(sizeof(int2)*resultNumChunks);
	CUDA_SAFE_CALL( cudaMemcpy( *h_outputKeyListRange, d_outputKeyListRange, sizeof(int2)*resultNumChunks, cudaMemcpyDeviceToHost) );
	
	cudaFree(d_boundary);
	cudaFree(d_outputKeyListRange);
	return resultNumChunks;

}

int GPUBitonicSortMem (void * d_inputKeyArray, int totalKeySize, void * d_inputValArray, int totalValueSize, 
		  cmp_type_t * d_inputPointerArray, int rLen, 
		  void * d_outputKeyArray, void * d_outputValArray, 
		  cmp_type_t * d_outputPointerArray, int2 ** h_outputKeyListRange
		  )
{
	saven_initialPrefixSum(rLen);
	//array_startTime(1);
	int numDistinctKey=0;
	int totalLenInBytes=-1;
	bitonicSortGPU(d_inputKeyArray, totalLenInBytes, d_inputPointerArray, rLen, d_outputPointerArray);
	//array_endTime("sort", 1);
	//!we first scatter the values and then the keys. so that we can reuse d_PA. 
	int2 *d_PA;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_PA), sizeof(int2)*rLen) );	
	//scatter the values.
	if(d_inputValArray!=NULL)
	{
		getZWArray(d_outputPointerArray, rLen, d_PA);
		copyChunks(d_inputValArray, d_PA, rLen, d_outputValArray);
		setZWArray(d_outputPointerArray, rLen, d_PA);
	}
	
	//scatter the keys.
	if(d_inputKeyArray!=NULL)
	{
		getXYArray(d_outputPointerArray, rLen, d_PA);
		copyChunks(d_inputKeyArray, d_PA, rLen, d_outputKeyArray);	
		setXYArray(d_outputPointerArray, rLen, d_PA);
	}
	//find the boudary for each key.

	numDistinctKey=getChunkBoundary(d_outputKeyArray, d_outputPointerArray, rLen, h_outputKeyListRange);

	return numDistinctKey;

}

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
奇米亚洲午夜久久精品| 91在线视频免费91| 99久久er热在这里只有精品15| 欧美视频中文字幕| 国产欧美久久久精品影院| 亚洲18色成人| 91农村精品一区二区在线| 日韩欧美一级精品久久| 又紧又大又爽精品一区二区| 国产美女视频一区| 欧美放荡的少妇| 亚洲一区二区影院| 成人av片在线观看| 久久精品一区四区| 久久精品国产亚洲a| 欧美精品一卡两卡| 一区二区三区视频在线看| 成人午夜看片网址| 久久婷婷国产综合国色天香| 喷水一区二区三区| 91精品国产色综合久久久蜜香臀| 亚洲三级免费观看| 97精品久久久午夜一区二区三区| 久久久不卡网国产精品二区| 久草这里只有精品视频| 91精品国产91热久久久做人人| 亚洲一区二区在线免费观看视频 | 91麻豆精品在线观看| 久久久久99精品国产片| 久久99久久久欧美国产| 欧美精品在欧美一区二区少妇 | 国产精品三级av在线播放| 激情图片小说一区| 精品国产制服丝袜高跟| 免费成人av在线播放| 日韩欧美国产wwwww| 另类小说视频一区二区| 日韩一级完整毛片| 国产自产v一区二区三区c| 久久综合九色综合97_久久久| 国产一区二区三区黄视频 | 国产农村妇女精品| 成人影视亚洲图片在线| 亚洲人xxxx| 在线一区二区视频| 日韩—二三区免费观看av| 91精品国产乱码久久蜜臀| 久久国产视频网| 国产网站一区二区三区| 972aa.com艺术欧美| 亚洲激情在线播放| 欧美丰满美乳xxx高潮www| 久久er精品视频| 国产精品毛片大码女人| 欧洲一区在线观看| 免费在线看一区| 欧美激情在线观看视频免费| 99视频一区二区| 午夜欧美电影在线观看| 日韩欧美一级在线播放| 99视频在线观看一区三区| 五月天中文字幕一区二区| 精品成人佐山爱一区二区| 成人国产一区二区三区精品| 午夜视频久久久久久| 久久久久久久网| 欧美色涩在线第一页| 国产精品一区二区三区乱码| 亚洲欧美偷拍卡通变态| 精品美女在线观看| 成人一区二区三区在线观看| 午夜天堂影视香蕉久久| 亚洲国产精品成人综合| 91精品国产一区二区三区 | 亚洲精品久久嫩草网站秘色| 日韩欧美第一区| 日本国产一区二区| 国产一区二区在线看| 亚洲午夜影视影院在线观看| 精品不卡在线视频| 欧美色男人天堂| 丁香啪啪综合成人亚洲小说| 亚洲电影你懂得| 国产欧美日韩卡一| 精品黑人一区二区三区久久| 日本黄色一区二区| 国产激情视频一区二区在线观看| 天天色 色综合| 一区二区三区在线观看视频| 久久亚洲精品国产精品紫薇 | 91九色02白丝porn| 国产精品影音先锋| 欧美aaaaa成人免费观看视频| 亚洲天天做日日做天天谢日日欢| 26uuu精品一区二区| 在线不卡免费欧美| 欧美专区日韩专区| 91蜜桃视频在线| 成人av电影观看| 国产一区二区在线看| 日本不卡高清视频| 午夜久久久久久电影| 一个色综合网站| 亚洲精品网站在线观看| 1000精品久久久久久久久| 久久精品亚洲精品国产欧美| 日韩欧美一区二区视频| 欧美剧情片在线观看| 欧美最猛性xxxxx直播| 色www精品视频在线观看| 91美女片黄在线观看| 色香蕉久久蜜桃| 色吊一区二区三区| 欧美日韩精品一区二区三区蜜桃 | 日韩中文字幕1| 丝袜亚洲精品中文字幕一区| 婷婷夜色潮精品综合在线| 亚洲成av人影院在线观看网| 亚洲一线二线三线视频| 亚洲国产另类av| 蜜桃一区二区三区在线| 老司机午夜精品| 国产一区亚洲一区| 成人黄页在线观看| av电影一区二区| 91福利资源站| 欧美日本一区二区在线观看| 欧美一二三区在线| 精品处破学生在线二十三| 欧美韩日一区二区三区四区| 国产精品久久久久9999吃药| 自拍偷拍国产精品| 一区二区免费在线| 日韩不卡手机在线v区| 久久99国产乱子伦精品免费| 国产精品1区2区| 99国产精品视频免费观看| 欧美艳星brazzers| 日韩欧美一区二区在线视频| 欧美极品aⅴ影院| 一区二区三区四区高清精品免费观看 | 一区二区三区91| 日本免费在线视频不卡一不卡二| 久久精品国产**网站演员| 国产精品1区2区| 91久久精品网| 精品久久久久久久久久久久包黑料| 久久女同互慰一区二区三区| 亚洲图片另类小说| 伦理电影国产精品| 99久久综合精品| 91精品久久久久久久久99蜜臂 | 欧美一区午夜视频在线观看| 久久这里只有精品6| 亚洲私人影院在线观看| 视频在线观看91| 成人国产视频在线观看| 欧美人妖巨大在线| 国产亚洲一区二区三区在线观看| 亚洲精品国产精品乱码不99| 精品一区二区成人精品| 在线免费观看视频一区| 久久亚区不卡日本| 天天亚洲美女在线视频| 91丨国产丨九色丨pron| 久久亚洲欧美国产精品乐播| 午夜电影久久久| 91在线视频播放地址| 日韩欧美一级精品久久| 亚洲一区二区不卡免费| 粉嫩在线一区二区三区视频| 91麻豆精品国产91久久久久久久久 | 99久久精品免费| xnxx国产精品| 日韩福利电影在线观看| 91久久精品一区二区三区| 亚洲国产成人私人影院tom| 理论片日本一区| 欧美另类一区二区三区| 自拍偷拍欧美精品| 粉嫩欧美一区二区三区高清影视| 91精品国产欧美一区二区成人| 亚洲国产日日夜夜| 91麻豆免费看| 亚洲乱码精品一二三四区日韩在线| 国产一区二区三区观看| 欧美变态凌虐bdsm| 青青草一区二区三区| 欧美日韩一级二级| 亚洲高清视频中文字幕| 在线视频亚洲一区| 一区二区三区不卡在线观看 | 日韩精品一区第一页| 欧美午夜一区二区三区免费大片| 综合精品久久久| 91国在线观看| 一区二区三区色| 欧美视频一区二| 日本v片在线高清不卡在线观看| 欧美精品久久久久久久久老牛影院|