亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? marssort.cu

?? GPU實現的MapReduce framework,對于學習并行編程和cuda平臺的編程方面有著極好的參考價值
?? CU
?? 第 1 頁 / 共 4 頁
字號:
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	if(pos<rLen)
	{
		cmp_type_t value=d_input[pos];
		value.z=d_value[pos].x;
		value.w=d_value[pos].y;
		d_input[pos]=value;
	}
}

void getIntYArray(int2 *d_data, int rLen, int* d_output)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getIntYArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_output);
	} 
	cudaThreadSynchronize();
}

void getXYArray(cmp_type_t *d_data, int rLen, int2* d_output)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getXYArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_output);
	} 
	cudaThreadSynchronize();
}

void getZWArray(cmp_type_t *d_data, int rLen, int2* d_output)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getZWArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_output);
	} 
	cudaThreadSynchronize();
}

void setXYArray(cmp_type_t *d_data, int rLen, int2* d_value)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		setXYArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_value);
	} 
	cudaThreadSynchronize();
}

void setZWArray(cmp_type_t *d_data, int rLen, int2* d_value)
{
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		setZWArray_kernel<<<grid,thread>>>(d_data, start, rLen, d_value);
	} 
	cudaThreadSynchronize();
}
__global__ void copyChunks_kernel(void *d_source, int startPos, int2* d_Rin, int rLen, int *d_sum, void *d_dest)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<rLen)
	{
		int2 value=d_Rin[pos];
		int offset=value.x;
		int size=value.y;
		int startWritePos=d_sum[pos];
		int i=0;
		char *source=(char*)d_source;
		char *dest=(char*)d_dest;
		for(i=0;i<size;i++)
		{
			dest[i+startWritePos]=source[i+offset];
		}
		value.x=startWritePos;
		d_Rin[pos]=value;
	}
}

__global__ void getChunkBoundary_kernel(void* d_rawData, int startPos, cmp_type_t *d_Rin, 
										int rLen, int* d_startArray)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<rLen)
	{
		int result=0;
		if(pos==0)//the start position
		{
			result=1;
		}
		else
		{
			cmp_type_t cur=d_Rin[pos];
			cmp_type_t left=d_Rin[pos-1];
			if(getCompareValue(d_rawData, cur, left)!=0)
			{
				result=1;
			}
		}
		d_startArray[pos]=result;	
	}
}

__global__ void setBoundaryInt2_kernel(int* d_boundary, int startPos, int numKey, int rLen,
										  int2* d_boundaryRange)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<numKey)
	{
		int2 flag;
		flag.x=d_boundary[pos];
		if((pos+1)!=numKey)
			flag.y=d_boundary[pos+1];
		else
			flag.y=rLen;
		d_boundaryRange[pos]=flag;
	}
}

__global__ void writeBoundary_kernel(int startPos, int rLen, int* d_startArray,
									int* d_startSumArray, int* d_bounary)
{
	const int by = blockIdx.y;
	const int bx = blockIdx.x;
	const int tx = threadIdx.x;
	const int ty = threadIdx.y;	
	const int tid=tx+ty*blockDim.x;
	const int bid=bx+by*gridDim.x;
	const int numThread=blockDim.x;
	const int resultID=(bid)*numThread+tid;
	int pos=startPos+resultID;
	
	if(pos<rLen)
	{
		int flag=d_startArray[pos];
		int writePos=d_startSumArray[pos];
		if(flag==1)
			d_bounary[writePos]=pos;
	}
}

void copyChunks(void *d_source, int2* d_Rin, int rLen, void *d_dest)
{
	//extract the size information for each chunk
	int* d_size;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_size), sizeof(int)*rLen) );	
	getIntYArray(d_Rin, rLen, d_size);
	//compute the prefix sum for the output positions.
	int* d_sum;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_sum), sizeof(int)*rLen) );
	saven_initialPrefixSum(rLen);
	prescanArray(d_sum,d_size,rLen);
	cudaFree(d_size);
	//output
	int numThreadsPerBlock_x=128;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		copyChunks_kernel<<<grid,thread>>>(d_source, start, d_Rin, rLen, d_sum, d_dest);
	} 
	cudaThreadSynchronize();
	
	cudaFree(d_sum);
	
}
//return the number of chunks.
int getChunkBoundary(void *d_source, cmp_type_t* d_Rin, int rLen, int2 ** h_outputKeyListRange)
{
	int resultNumChunks=0;
	//get the chunk boundary[start of chunk0, start of chunk 1, ...]
	int* d_startArray;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_startArray), sizeof(int)*rLen) );	
	
	int numThreadsPerBlock_x=512;
	int numThreadsPerBlock_y=1;
	int numBlock_x=512;
	int numBlock_y=1;
	int chunkSize=numBlock_x*numThreadsPerBlock_x;
	int numChunk=rLen/chunkSize;
	if(rLen%chunkSize!=0)
		numChunk++;

	dim3  thread( numThreadsPerBlock_x, numThreadsPerBlock_y, 1);
	dim3  grid( numBlock_x, numBlock_y , 1);
	int i=0;
	int start=0;
	int end=0;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		getChunkBoundary_kernel<<<grid,thread>>>(d_source, start, d_Rin, rLen, d_startArray);
	} 
	cudaThreadSynchronize();
	//prefix sum for write positions.
	int* d_startSumArray;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_startSumArray), sizeof(int)*rLen) );
	saven_initialPrefixSum(rLen);
	prescanArray(d_startSumArray,d_startArray,rLen);

	//gpuPrint(d_startSumArray, rLen, "d_startSumArray");

	int lastValue=0;
	int partialSum=0;
	CUDA_SAFE_CALL( cudaMemcpy( &lastValue, d_startArray+(rLen-1), sizeof(int), cudaMemcpyDeviceToHost) );
	//gpuPrint(d_startArray, rLen, "d_startArray");
	CUDA_SAFE_CALL( cudaMemcpy( &partialSum, d_startSumArray+(rLen-1), sizeof(int), cudaMemcpyDeviceToHost) );
	//gpuPrint(d_startSumArray, rLen, "d_startSumArray");
	resultNumChunks=lastValue+partialSum;

	int* d_boundary;//[start of chunk0, start of chunk 1, ...]
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_boundary), sizeof(int)*resultNumChunks) );

	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>rLen)
			end=rLen;
		writeBoundary_kernel<<<grid,thread>>>(start, rLen, d_startArray,
									d_startSumArray, d_boundary);
	} 
	cudaFree(d_startArray);
	cudaFree(d_startSumArray);	

	//set the int2 boundary. 
	int2 *d_outputKeyListRange;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_outputKeyListRange), sizeof(int2)*resultNumChunks) );
	numChunk=resultNumChunks/chunkSize;
	if(resultNumChunks%chunkSize!=0)
		numChunk++;
	for(i=0;i<numChunk;i++)
	{
		start=i*chunkSize;
		end=start+chunkSize;
		if(end>resultNumChunks)
			end=resultNumChunks;
		setBoundaryInt2_kernel<<<grid,thread>>>(d_boundary, start, resultNumChunks, rLen, d_outputKeyListRange);
	} 
	cudaThreadSynchronize();

	*h_outputKeyListRange=(int2*)BenMalloc(sizeof(int2)*resultNumChunks);
	CUDA_SAFE_CALL( cudaMemcpy( *h_outputKeyListRange, d_outputKeyListRange, sizeof(int2)*resultNumChunks, cudaMemcpyDeviceToHost) );
	
	cudaFree(d_boundary);
	cudaFree(d_outputKeyListRange);
	return resultNumChunks;

}

int GPUBitonicSortMem (void * d_inputKeyArray, int totalKeySize, void * d_inputValArray, int totalValueSize, 
		  cmp_type_t * d_inputPointerArray, int rLen, 
		  void * d_outputKeyArray, void * d_outputValArray, 
		  cmp_type_t * d_outputPointerArray, int2 ** h_outputKeyListRange
		  )
{
	saven_initialPrefixSum(rLen);
	//array_startTime(1);
	int numDistinctKey=0;
	int totalLenInBytes=-1;
	bitonicSortGPU(d_inputKeyArray, totalLenInBytes, d_inputPointerArray, rLen, d_outputPointerArray);
	//array_endTime("sort", 1);
	//!we first scatter the values and then the keys. so that we can reuse d_PA. 
	int2 *d_PA;
	CUDA_SAFE_CALL( cudaMalloc( (void**) (&d_PA), sizeof(int2)*rLen) );	
	//scatter the values.
	if(d_inputValArray!=NULL)
	{
		getZWArray(d_outputPointerArray, rLen, d_PA);
		copyChunks(d_inputValArray, d_PA, rLen, d_outputValArray);
		setZWArray(d_outputPointerArray, rLen, d_PA);
	}
	
	//scatter the keys.
	if(d_inputKeyArray!=NULL)
	{
		getXYArray(d_outputPointerArray, rLen, d_PA);
		copyChunks(d_inputKeyArray, d_PA, rLen, d_outputKeyArray);	
		setXYArray(d_outputPointerArray, rLen, d_PA);
	}
	//find the boudary for each key.

	numDistinctKey=getChunkBoundary(d_outputKeyArray, d_outputPointerArray, rLen, h_outputKeyListRange);

	return numDistinctKey;

}

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
日韩欧美不卡一区| 亚洲情趣在线观看| 曰韩精品一区二区| 国产一区二区网址| 午夜精品久久久久影视| 国产乱对白刺激视频不卡| 欧美日本在线观看| 亚洲国产乱码最新视频| 国产福利一区在线观看| 欧美成人猛片aaaaaaa| 亚洲图片有声小说| 91丨porny丨国产| 欧美成人伊人久久综合网| 亚洲免费观看在线视频| 成人爱爱电影网址| 国产日韩欧美激情| 丰满少妇在线播放bd日韩电影| 日韩一区二区电影| 精品在线播放午夜| 欧美成人激情免费网| 亚洲成人自拍网| 欧美日韩精品是欧美日韩精品| 亚洲精品视频在线观看免费| 成人的网站免费观看| 国产精品丝袜久久久久久app| 国产精品一区在线观看乱码| 精品久久一区二区三区| 国产成人在线视频播放| 亚洲色图第一区| 欧美视频精品在线| 波多野结衣视频一区| 久久尤物电影视频在线观看| 一本色道久久综合狠狠躁的推荐 | 玉米视频成人免费看| 日韩网站在线看片你懂的| 视频一区二区三区入口| 欧美视频一区二区三区在线观看| 日产国产高清一区二区三区| 亚洲精品v日韩精品| 日本福利一区二区| 欧美大胆人体bbbb| 青青草91视频| 555www色欧美视频| 奇米777欧美一区二区| 日韩一区二区三区三四区视频在线观看 | 精品国产伦一区二区三区观看方式 | 色婷婷久久久综合中文字幕| 一区二区三区丝袜| 欧美日本一道本| 亚洲mv在线观看| 欧美v日韩v国产v| 色综合久久九月婷婷色综合| 美女视频黄免费的久久| 国产精品久久久久久福利一牛影视| 日本韩国欧美国产| 激情五月播播久久久精品| 亚洲精选视频免费看| 久久综合九色综合欧美亚洲| 91国产成人在线| 国产成人一区二区精品非洲| 亚洲高清三级视频| 国产精品另类一区| 精品少妇一区二区三区日产乱码 | 久久久久久毛片| 在线观看一区二区视频| 欧美日韩精品一区二区天天拍小说| 99久久久久免费精品国产| av电影一区二区| www.亚洲人| 91免费国产在线| 欧美日韩高清影院| 激情文学综合插| 亚洲欧洲99久久| 久久久影视传媒| 欧美色综合网站| 欧洲另类一二三四区| 国产精品女同互慰在线看| 丁香激情综合国产| 欧美v国产在线一区二区三区| 国产毛片精品一区| 亚洲欧美电影一区二区| 欧美日韩一卡二卡三卡| 久久er99精品| 综合久久久久久久| 日韩欧美中文字幕制服| 懂色av噜噜一区二区三区av| 夜夜嗨av一区二区三区网页| 这里是久久伊人| 777午夜精品免费视频| 91麻豆精品国产91久久久资源速度| 欧美午夜在线一二页| 欧美无人高清视频在线观看| 97se亚洲国产综合自在线| 在线观看视频一区| 欧美日韩国产精品成人| 日韩一区二区三区在线视频| 精品处破学生在线二十三| 日韩一区二区三区三四区视频在线观看 | 成人av在线电影| 91女神在线视频| 欧美剧在线免费观看网站| 日韩一级免费一区| 欧美国产禁国产网站cc| 欧美韩国日本综合| 一区二区三区四区av| 日本成人超碰在线观看| 国产精品一二三区在线| 国产精品一区久久久久| 色综合久久久久综合体桃花网| 欧美另类高清zo欧美| 欧美成人精品二区三区99精品| 亚洲欧洲日产国码二区| 久久99国产精品麻豆| 久久亚洲精品小早川怜子| 国产馆精品极品| 欧美一区日本一区韩国一区| 婷婷中文字幕一区三区| 91在线观看美女| 亚洲欧洲一区二区三区| 懂色av一区二区夜夜嗨| 99久久精品免费观看| 欧美三级欧美一级| 国产欧美日本一区视频| 亚洲成a人片在线不卡一二三区 | 欧美日韩一级二级三级| 国产91高潮流白浆在线麻豆| 蜜桃视频在线观看一区二区| 亚洲综合激情另类小说区| 欧美电影免费观看完整版| 精品一区在线看| 午夜精品aaa| 亚洲制服丝袜一区| 成人欧美一区二区三区视频网页| 精品国产一二三区| 日韩亚洲欧美中文三级| 亚洲精品欧美综合四区| 国产精品99久久久久久有的能看 | 精品一区二区三区不卡| 国产成人免费在线观看| 欧美精品在线观看播放| 亚洲精品国久久99热| 99久久国产综合精品女不卡| 国产无遮挡一区二区三区毛片日本| 亚洲国产你懂的| 日本高清不卡aⅴ免费网站| 亚洲欧洲制服丝袜| 一本到三区不卡视频| 国产日韩欧美高清| 岛国av在线一区| 国产精品成人在线观看| 岛国精品在线播放| 国产精品免费久久久久| 成人动漫一区二区| 亚洲美女淫视频| 91蜜桃传媒精品久久久一区二区| 国产清纯白嫩初高生在线观看91| 国产综合久久久久久久久久久久| 欧美成人精品3d动漫h| 免费看日韩精品| 日本一区二区动态图| 在线视频中文字幕一区二区| 国产91精品一区二区麻豆网站| 日本视频免费一区| 久久婷婷国产综合国色天香| 91在线视频播放| 精品一区二区三区久久久| 亚洲一区二区三区激情| 国产亚洲成年网址在线观看| 欧美在线观看视频一区二区| 中文字幕一区二区三中文字幕| 国内精品国产三级国产a久久| 亚洲综合色丁香婷婷六月图片| 亚洲一二三四区| 日本最新不卡在线| 欧美手机在线视频| 在线电影院国产精品| 欧美一区二区成人6969| 亚洲精品在线三区| 国产嫩草影院久久久久| 国产精品福利一区二区三区| 亚洲乱码国产乱码精品精小说 | 国产乱码精品一区二区三区忘忧草| 另类欧美日韩国产在线| 久久久影视传媒| 欧美色男人天堂| 成人美女视频在线看| 婷婷成人综合网| 中文字幕在线不卡国产视频| 日韩一二在线观看| 欧美人伦禁忌dvd放荡欲情| 精品一区二区免费在线观看| 亚洲男同性视频| 国产精品久久毛片av大全日韩| 欧美在线综合视频| 国产一区二区成人久久免费影院| 亚洲一区二区av在线| 欧美高清在线一区| 337p粉嫩大胆噜噜噜噜噜91av| 日本高清免费不卡视频| 成人免费观看男女羞羞视频|