亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? marssort.cu

?? GPU實現的MapReduce framework,對于學習并行編程和cuda平臺的編程方面有著極好的參考價值
?? CU
?? 第 1 頁 / 共 4 頁
字號:
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	if(pos<rLen)
	{
		cmp_type_t value=d_input[pos];
		value.z=d_value[pos].x;
		value.w=d_value[pos].y;
		d_input[pos]=value;
	}
}

void getIntYArray(int2 *d_data, int rLen, int* d_output)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getIntYArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_output);
	} 
	cudaThreadSynchronize();
}

void getXYArray(cmp_type_t *d_data, int rLen, int2* d_output)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getXYArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_output);
	} 
	cudaThreadSynchronize();
}

void getZWArray(cmp_type_t *d_data, int rLen, int2* d_output)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getZWArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_output);
	} 
	cudaThreadSynchronize();
}

void setXYArray(cmp_type_t *d_data, int rLen, int2* d_value)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		setXYArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_value);
	} 
	cudaThreadSynchronize();
}

void setZWArray(cmp_type_t *d_data, int rLen, int2* d_value)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		setZWArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_value);
	} 
	cudaThreadSynchronize();
}
__global__ void copyChunks_kernel(void *d_source, int startPos, int2* d_Rin, int rLen, int *d_sum, void *d_dest)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<rLen)
	{
		int2 value=d_Rin[pos];
		int offset=value.x;
		int size=value.y;
		int startWritePos=d_sum[pos];
		int i=0;
		char *source=(char*)d_source;
		char *dest=(char*)d_dest;
		for(i=0;i<size;i++)
		{
			dest[i+startWritePos]=source[i+offset];
		}
		value.x=startWritePos;
		d_Rin[pos]=value;
	}
}

__global__ void getChunkBoundary_kernel(void* d_rawData, int startPos, cmp_type_t *d_Rin, 
										int rLen, int* d_startArray)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<rLen)
	{
		int result=0;
		if(pos==0)//the start position
		{
			result=1;
		}
		else
		{
			cmp_type_t cur=d_Rin[pos];
			cmp_type_t left=d_Rin[pos-1];
			if(getCompareValue(d_rawData, cur, left)!=0)
			{
				result=1;
			}
		}
		d_startArray[pos]=result;	
	}
}

__global__ void setBoundaryInt2_kernel(int* d_boundary, int startPos, int numKey, int rLen,
										  int2* d_boundaryRange)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<numKey)
	{
		int2 flag;
		flag.x=d_boundary[pos];
		if((pos+1)!=numKey)
			flag.y=d_boundary[pos+1];
		else
			flag.y=rLen;
		d_boundaryRange[pos]=flag;
	}
}

__global__ void writeBoundary_kernel(int startPos, int rLen, int* d_startArray,
									int* d_startSumArray, int* d_bounary)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<rLen)
	{
		int flag=d_startArray[pos];
		int writePos=d_startSumArray[pos];
		if(flag==1)
			d_bounary[writePos]=pos;
	}
}

void copyChunks(void *d_source, int2* d_Rin, int rLen, void *d_dest)
{
	//extract the size information for each chunk
	int* d_size;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_size), sizeof(int)*rLen) );	
	getIntYArray(d_Rin, rLen, d_size);
	//compute the prefix sum for the output positions.
	int* d_sum;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_sum), sizeof(int)*rLen) );
	saven_initialPrefixSum(rLen);
	prescanArray(d_sum,d_size,rLen);
	cudaFree(d_size);
	//output
	int numThreadsPerBlock_x=128;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		copyChunks_kernel<<<grid,thread>>>(d_source, start, d_Rin, rLen, d_sum, d_dest);
	} 
	cudaThreadSynchronize();
	
	cudaFree(d_sum);
	
}
//return the number of chunks.
int getChunkBoundary(void *d_source, cmp_type_t* d_Rin, int rLen, int2 ** h_outputKeyListRange)
{
	int resultNumChunks=0;
	//get the chunk boundary[start of chunk0, start of chunk 1, ...]
	int* d_startArray;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_startArray), sizeof(int)*rLen) );	
	
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getChunkBoundary_kernel<<<grid,thread>>>(d_source, start, d_Rin, rLen, d_startArray);
	} 
	cudaThreadSynchronize();
	//prefix sum for write positions.
	int* d_startSumArray;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_startSumArray), sizeof(int)*rLen) );
	saven_initialPrefixSum(rLen);
	prescanArray(d_startSumArray,d_startArray,rLen);

	//gpuPrint(d_startSumArray, rLen, "d_startSumArray");

	int lastValue=0;
	int partialSum=0;
	CUDA_SAFE_CALL( cudaMemcpy( &lastValue, d_startArray+(rLen-1), sizeof(int), cudaMemcpyDeviceToHost) );
	//gpuPrint(d_startArray, rLen, "d_startArray");
	CUDA_SAFE_CALL( cudaMemcpy( &partialSum, d_startSumArray+(rLen-1), sizeof(int), cudaMemcpyDeviceToHost) );
	//gpuPrint(d_startSumArray, rLen, "d_startSumArray");
	resultNumChunks=lastValue+partialSum;

	int* d_boundary;//[start of chunk0, start of chunk 1, ...]
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_boundary), sizeof(int)*resultNumChunks) );

	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		writeBoundary_kernel<<<grid,thread>>>(start, rLen, d_startArray,
									d_startSumArray, d_boundary);
	} 
	cudaFree(d_startArray);
	cudaFree(d_startSumArray);	

	//set the int2 boundary. 
	int2 *d_outputKeyListRange;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_outputKeyListRange), sizeof(int2)*resultNumChunks) );
	numChunk=resultNumChunks/chunkSize;
	if(resultNumChunks%chunkSize!=0)
		numChunk++;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>resultNumChunks)
			end=resultNumChunks;
		setBoundaryInt2_kernel<<<grid,thread>>>(d_boundary, start, resultNumChunks, rLen, d_outputKeyListRange);
	} 
	cudaThreadSynchronize();

	*h_outputKeyListRange=(int2*)BenMalloc(sizeof(int2)*resultNumChunks);
	CUDA_SAFE_CALL( cudaMemcpy( *h_outputKeyListRange, d_outputKeyListRange, sizeof(int2)*resultNumChunks, cudaMemcpyDeviceToHost) );
	
	cudaFree(d_boundary);
	cudaFree(d_outputKeyListRange);
	return resultNumChunks;

}

int GPUBitonicSortMem (void * d_inputKeyArray, int totalKeySize, void * d_inputValArray, int totalValueSize, 
		  cmp_type_t * d_inputPointerArray, int rLen, 
		  void * d_outputKeyArray, void * d_outputValArray, 
		  cmp_type_t * d_outputPointerArray, int2 ** h_outputKeyListRange
		  )
{
	saven_initialPrefixSum(rLen);
	//array_startTime(1);
	int numDistinctKey=0;
	int totalLenInBytes=-1;
	bitonicSortGPU(d_inputKeyArray, totalLenInBytes, d_inputPointerArray, rLen, d_outputPointerArray);
	//array_endTime("sort", 1);
	//!we first scatter the values and then the keys. so that we can reuse d_PA. 
	int2 *d_PA;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_PA), sizeof(int2)*rLen) );	
	//scatter the values.
	if(d_inputValArray!=NULL)
	{
		getZWArray(d_outputPointerArray, rLen, d_PA);
		copyChunks(d_inputValArray, d_PA, rLen, d_outputValArray);
		setZWArray(d_outputPointerArray, rLen, d_PA);
	}
	
	//scatter the keys.
	if(d_inputKeyArray!=NULL)
	{
		getXYArray(d_outputPointerArray, rLen, d_PA);
		copyChunks(d_inputKeyArray, d_PA, rLen, d_outputKeyArray);	
		setXYArray(d_outputPointerArray, rLen, d_PA);
	}
	//find the boudary for each key.

	numDistinctKey=getChunkBoundary(d_outputKeyArray, d_outputPointerArray, rLen, h_outputKeyListRange);

	return numDistinctKey;

}

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
欧美日韩不卡一区| 欧美国产成人精品| 26uuu亚洲婷婷狠狠天堂| 精品成人在线观看| 国产视频不卡一区| 亚洲美女淫视频| 久久99蜜桃精品| 9久草视频在线视频精品| 欧美午夜精品一区二区三区| 91精品国产一区二区| 久久免费偷拍视频| 亚洲一区二区五区| 国产剧情一区二区三区| 91福利在线导航| 26uuu色噜噜精品一区| 亚洲精品videosex极品| 久久aⅴ国产欧美74aaa| 日韩欧美高清一区| 中文字幕一区二区三区蜜月| 美女脱光内衣内裤视频久久影院| 国产乱对白刺激视频不卡| 56国语精品自产拍在线观看| 中文字幕不卡在线| 蜜臀久久99精品久久久久宅男 | 久久精品亚洲国产奇米99| 亚洲日韩欧美一区二区在线| 黄页视频在线91| 欧美自拍偷拍一区| 亚洲三级在线看| 成人性生交大片免费| 久久久久久影视| 韩国成人精品a∨在线观看| 91精品国产黑色紧身裤美女| 一区二区成人在线视频| 色噜噜夜夜夜综合网| 国产精品夫妻自拍| 成人一级片网址| 国产精品久久精品日日| 国产成人午夜99999| 日本一区二区综合亚洲| 国产成人综合在线| 国产精品热久久久久夜色精品三区| 精品无人区卡一卡二卡三乱码免费卡| 欧美日韩你懂的| 欧美怡红院视频| 天堂成人国产精品一区| 制服.丝袜.亚洲.中文.综合| 日本欧美在线看| 久久丝袜美腿综合| av一区二区久久| 亚洲一区在线播放| 欧美成人性战久久| 成人免费毛片高清视频| 国产精品久久久久久久久免费丝袜| 97久久超碰国产精品| 亚洲国产裸拍裸体视频在线观看乱了 | 天天综合色天天综合| 欧美一区二区成人| 大尺度一区二区| 亚洲愉拍自拍另类高清精品| 4438x成人网最大色成网站| 国产一区啦啦啦在线观看| 亚洲三级久久久| 欧美成人女星排行榜| 91在线观看污| 麻豆成人91精品二区三区| 国产精品人妖ts系列视频| 777a∨成人精品桃花网| 成人福利视频网站| 日韩精品亚洲一区二区三区免费| 久久综合99re88久久爱| 欧美在线综合视频| 粉嫩aⅴ一区二区三区四区| 一区二区国产盗摄色噜噜| 国产拍欧美日韩视频二区| 欧美日韩视频在线观看一区二区三区 | 蜜臀av一级做a爰片久久| 国产精品久久久久精k8| 精品电影一区二区三区| 欧美日高清视频| 欧美性受xxxx黑人xyx性爽| 国产乱码精品一区二区三区av| 亚洲国产一区视频| 久久久久国色av免费看影院| 国产成人亚洲精品狼色在线| 亚洲欧美偷拍三级| 久久精品欧美一区二区三区麻豆 | 性久久久久久久久久久久| 国产精品久久久久桃色tv| 精品电影一区二区| 精品国产一区二区三区久久影院| 欧美写真视频网站| 日本韩国欧美一区二区三区| 成人激情av网| 日本高清不卡在线观看| 国产伦精品一区二区三区视频青涩| 日本在线不卡视频一二三区| 午夜电影一区二区| 日本伊人色综合网| 老司机午夜精品99久久| 日韩电影一区二区三区四区| 国产精品国产三级国产a | 一区二区三区不卡视频在线观看 | 欧美日韩国产精品成人| 欧美性大战久久久久久久蜜臀| 96av麻豆蜜桃一区二区| 成人午夜免费av| 91一区二区在线| 成人午夜激情影院| 欧美日韩在线综合| 91麻豆精品国产91久久久久久久久| 欧美三级欧美一级| 日韩一级欧美一级| 国产精品国产三级国产普通话99| 亚洲女子a中天字幕| 日韩av电影免费观看高清完整版 | 亚洲国产精品久久不卡毛片| 日韩影院精彩在线| 国产一区二区不卡| 91亚洲精品久久久蜜桃网站| 欧美精品粉嫩高潮一区二区| 国产亚洲欧美中文| 午夜精品在线看| 99久久精品免费| 欧美成人三级电影在线| 亚洲男人的天堂在线观看| 久久精品国产亚洲一区二区三区| 九色综合狠狠综合久久| 欧美日韩在线电影| 国产蜜臀av在线一区二区三区| 亚洲国产wwwccc36天堂| 成人av资源在线| 久久理论电影网| 七七婷婷婷婷精品国产| 色av一区二区| 亚洲素人一区二区| 成人免费视频免费观看| 国产日韩欧美亚洲| 极品少妇一区二区| 欧美电影免费观看完整版| 亚洲国产精品久久不卡毛片| 色拍拍在线精品视频8848| 欧美韩国日本一区| 国产传媒久久文化传媒| 久久久影视传媒| 国产精品自拍在线| 久久综合色鬼综合色| 国内精品伊人久久久久av影院| 日韩一级二级三级| 久久电影国产免费久久电影| 日韩免费福利电影在线观看| 久久疯狂做爰流白浆xx| 欧美电影免费观看高清完整版在 | 国产精品一区二区久久精品爱涩 | 亚洲在线成人精品| 在线观看视频一区| 三级久久三级久久| 日韩欧美中文字幕一区| 精品亚洲国内自在自线福利| 久久久久一区二区三区四区| 不卡视频一二三| 亚洲18色成人| 久久天堂av综合合色蜜桃网| 99久久777色| 日韩影院精彩在线| 国产亚洲一本大道中文在线| 成人免费视频视频| 国产馆精品极品| 国产精品丝袜久久久久久app| 91香蕉视频在线| 寂寞少妇一区二区三区| 国产精品青草综合久久久久99| 欧美色倩网站大全免费| 国产一区二区三区精品视频| 亚洲欧美日韩国产手机在线| 欧美三级日韩三级国产三级| 成人综合婷婷国产精品久久蜜臀| 亚洲视频狠狠干| 一本到不卡精品视频在线观看| 亚洲亚洲人成综合网络| 欧美精品一区男女天堂| 色综合久久中文综合久久牛| 久久aⅴ国产欧美74aaa| 亚洲精品视频在线观看免费| 精品国产sm最大网站免费看| 色综合激情久久| 国产成人激情av| 日韩精品亚洲专区| 亚洲一区二区视频在线观看| 中文字幕+乱码+中文字幕一区| 91精品国产美女浴室洗澡无遮挡| av动漫一区二区| 国产一区激情在线| 久久国产生活片100| 亚洲第一搞黄网站| 一区二区三区中文在线| 国产精品另类一区| 中文在线资源观看网站视频免费不卡 | 91成人看片片| 成人性生交大片免费看中文|