亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? backup_simplestreams.cu

?? md5_cuda編程
?? CU
?? 第 1 頁 / 共 2 頁
字號:
	*ob++ = *ib++;
	ib = inblock; fb = fret;	/* R[i]=L[i] XOR f(R[i-1],key)  */
	*ob++ = *ib++ ^ *fb++;
	*ob++ = *ib++ ^ *fb++;
	*ob++ = *ib++ ^ *fb++;
	*ob++ = *ib++ ^ *fb++;
}

__device__ void endes(unsigned char *inblock, unsigned char *outblock)			/* encrypt 64-bit inblock	*/
{
	unsigned char iters[17][8];		/* workspace for each iteration */
	unsigned char swap[8];			/* place to interchange L and R */
	register int i;
	register unsigned char *s, *t;

	permute(inblock,iperm,iters[0]);/* apply initial permutation	*/
	for (i=0; i<16; i++)			/* 16 churning operations	*/
		iter(i,iters[i],iters[i+1]);
									/* don't re-copy to save space  */
	s = swap; t = &iters[16][4];	/* interchange left		*/
	*s++ = *t++; *s++ = *t++; *s++ = *t++; *s++ = *t++;
	t = &iters[16][0];				/* and right			*/
	*s++ = *t++; *s++ = *t++; *s++ = *t++; *s++ = *t++;
	permute(swap,fperm,outblock);   /* apply final permutation	*/
}

__device__ void dedes(unsigned char *inblock, unsigned char *outblock)	/* decrypt 64-bit inblock	*/
{	unsigned char iters[17][8];				/* workspace for each iteration */
	unsigned char swap[8];					/* place to interchange L and R */
	register int i;
	register unsigned char *s, *t;

	permute(inblock,iperm,iters[0]);/* apply initial permutation	*/
	for (i=0; i<16; i++)		/* 16 churning operations	*/
		iter(15-i,iters[i],iters[i+1]);
					/* reverse order from encrypting*/
	s = swap; t = &iters[16][4];	/* interchange left		*/
	*s++ = *t++; *s++ = *t++; *s++ = *t++; *s++ = *t++;
	t = &iters[16][0];		/* and right			*/
	*s++ = *t++; *s++ = *t++; *s++ = *t++; *s++ = *t++;
	permute(swap,fperm,outblock);   /* apply final permutation	*/
}

__global__ void kernel_endes(unsigned char *in, unsigned char *out, int size)			
{
	int start= (THREAD_NUM*blockIdx.x) + threadIdx.x;

	int idx =  start;

	__syncthreads();

	do{
		endes(in+idx, out+idx);
		__syncthreads();

		idx+= (THREAD_NUM<<THREAD_BLK_NUM_LOG2<<3);
	} while (idx < size);
}

__global__ void kernel_dedes(unsigned char *in, unsigned char *out, int size)	
{
	int start= (THREAD_NUM*blockIdx.x) + threadIdx.x;

	int idx =  start;

	__syncthreads();

	do{
		dedes(in+idx, out+idx);
		__syncthreads();

		idx+= (THREAD_NUM<<THREAD_BLK_NUM_LOG2<<3);
	} while (idx < size);
}

__host__ void cpy_const_data_from_host_to_device()
{
	cudaMemcpyToSymbol(iperm, iperm_host, sizeof(iperm));
	cudaMemcpyToSymbol(fperm, fperm_host, sizeof(fperm));
	cudaMemcpyToSymbol(s, s_host, sizeof(s));
	cudaMemcpyToSymbol(p32, p32_host, sizeof(p32));
	cudaMemcpyToSymbol(kn, kn_host, sizeof(kn));
}

void generate_input(unsigned char* input, int num)
{
	for (int i=0; i<num; i++)
		input[i]= 'a' + (i%26);
}

int correct_data(unsigned char *input,  unsigned char *out, const int n)
{
    for(int i = 0; i < n; i++)
        if(input[i] != 'a' + (i%26))
            return 0;
    return 1;
}

///////////////////////////////////////////////////////////////////////////////////////////////////////////
/*
__device__ int get_value(const int c);
unsigned int get_value_host(const int c);


__global__ void init_array(int *values, int *out, int size)
{ 
	int i= (THREAD_NUM*blockIdx.x) + threadIdx.x;

      do {
	    // 1) Copy input to .
    	int v = values[i];
		int o =0;

		// 2) Synchronization
	    __syncthreads();

		// 3) Process

		 o = get_value(v);


		 // 5) Write the result to device memory.
		 //values[i] = o; 
		 out[i] =o;

		// 4) Synchronization again
         __syncthreads();


		 i+= (THREAD_NUM<<THREAD_BLK_NUM_LOG2);//(THREAD_NUM)*THREAD_BLK_NUM; 

		 } while (i < size ); 

}

__device__ int get_value(const int in)
{
	unsigned int out=in;

	 for (int loop=0; loop<REPEAT_TIME; loop++)
	 {
		 out=(out*out) & 0xFFFF;
	 }

	return out;
}

unsigned int get_value_host(const int in)
{
	unsigned int out=in;

	 for (int loop=0; loop<REPEAT_TIME; loop++)
	 {
		 out=(out*out) & 0xFFFF;
	 }
	return out;

}

*/

int test();

int main(int argc, unsigned char *argv[])
{
    int nstreams = 8;               // number of streams for CUDA calls
    int nbytes = NUM * 8;   // number of data bytes
    dim3 threads, blocks;           // kernel launch configuration
    float elapsed_time=0, time_memcpy=0, time_kernel=0;   // timing variables

	test();

	unsigned char keyx[9]= "cxskq";
	desinit(keyx);			/* set up tables for DES */

    // check the compute capability of the device
    int num_devices=0;
    CUDA_SAFE_CALL( cudaGetDeviceCount(&num_devices) );
    if(0==num_devices)
    {
        printf("your system does not have a CUDA capable device\n");
        return 1;
	}
    cudaDeviceProp device_properties;
    CUDA_SAFE_CALL( cudaGetDeviceProperties(&device_properties, 0) );
    if( (1 == device_properties.major) && (device_properties.minor < 1))
        printf("%s does not have compute capability 1.1 or later\n\n", device_properties.name);
	
	printf("////////////////////////////////////////////////\n");
	printf("64-bit_array_size=%d(M) (total size=%d (M))thread_blk_num=%d\n", NUM/1024/1024, 2*NUM/1024/1024, THREAD_BLK_NUM);

    // allocate host
    unsigned char *a = 0;                     // pointer to the array data in host memory
	unsigned char *out=0;
    // allocate host memory (pinned is required for achieve asynchronicity)
    CUDA_SAFE_CALL( cudaMallocHost((void**)&a, nbytes) );
	memset(a,0,nbytes);

    CUDA_SAFE_CALL( cudaMallocHost((void**)&out, nbytes) );
	memset(out,0,nbytes);

    // allocate and initialize an array of stream handles
    cudaStream_t *streams = (cudaStream_t*) malloc(nstreams * sizeof(cudaStream_t));
    for(int i = 0; i < nstreams; i++)
        CUDA_SAFE_CALL( cudaStreamCreate(&(streams[i])) );

    // create CUDA event handles
    cudaEvent_t start_event, stop_event;
    CUDA_SAFE_CALL( cudaEventCreate(&start_event) );
    CUDA_SAFE_CALL( cudaEventCreate(&stop_event) );

    // allocate device memory
    unsigned char *d_a = 0, *d_o = 0;             // pointers to data and init value in the device memory
    CUDA_SAFE_CALL( cudaMalloc((void**)&d_a, nbytes) );
    CUDA_SAFE_CALL( cudaMalloc((void**)&d_o, nbytes) );

	cudaMemset(d_a, 0, nbytes);
	cudaMemset(d_o, 0, nbytes);

    /*
	// time memcopy from device
    cudaEventRecord(start_event, 0);     // record in stream-0, to ensure that all previous CUDA calls have completed

    CUDA_SAFE_CALL( cudaMemcpy(d_a, a, nbytes, cudaMemcpyHostToDevice) );

    cudaMemcpyAsync(out, d_o, nbytes, cudaMemcpyDeviceToHost, streams[0]);
    cudaEventRecord(stop_event, 0);
    cudaEventSynchronize(stop_event);   // block until the event is actually recorded
    CUDA_SAFE_CALL( cudaEventElapsedTime(&time_memcpy, start_event, stop_event) );
    printf("2-way memcopy :\t%.2f\n", time_memcpy);
    
    // time kernel
    threads=dim3(THREAD_NUM, 1);
	blocks=dim3(THREAD_BLK_NUM, 1);
    cudaEventRecord(start_event, 0);
	cpy_const_data_from_host_to_device();
	kernel_endes<<<blocks, threads, SHARED_MEM_SIZE, streams[0]>>>(d_a, d_o, nbytes/8);
    cudaEventRecord(stop_event, 0);
    cudaEventSynchronize(stop_event);
    CUDA_SAFE_CALL( cudaEventElapsedTime(&time_kernel, start_event, stop_event) );
    printf("kernel computing: %.2f\n", time_kernel);

    //////////////////////////////////////////////////////////////////////
    // time non-streamed execution for reference
    threads=dim3(THREAD_NUM, 1);
	blocks=dim3(1, 1);

	memset(a,0,nbytes);

	cudaEventRecord(start_event, 0);

    CUDA_SAFE_CALL( cudaMemcpy(d_a, a, nbytes, cudaMemcpyHostToDevice) );

	cpy_const_data_from_host_to_device();
	kernel_endes<<<blocks, threads,SHARED_MEM_SIZE>>>(d_a, d_o, nbytes/8);
    cudaMemcpy(out, d_o, nbytes, cudaMemcpyDeviceToHost);
    cudaEventRecord(stop_event, 0);
    cudaEventSynchronize(stop_event);
    CUDA_SAFE_CALL( cudaEventElapsedTime(&elapsed_time, start_event, stop_event) );
	printf("Time to 2-way memcpy & execute:\n");
    printf("non-streamed:\t%.2f (%.2f expected = %.2f + %.2f)\n", elapsed_time,  time_kernel + time_memcpy,
		time_kernel, time_memcpy);
	*/

    //////////////////////////////////////////////////////////////////////
    // time execution with nstreams streams
    threads=dim3(THREAD_NUM,1);
    //blocks=dim3(n/(nstreams*threads.x),1);
	blocks=dim3(THREAD_BLK_NUM,1);
    //memset(a, c, nbytes);     // set host memory bits to all 1s, for testing correctness
    //cudaMemset(d_a, 0, nbytes); // set device memory to all 0s, for testing correctness

	generate_input(a,nbytes);

	clock_t start_clock_gpu, end_clock_gpu;
	start_clock_gpu = clock();

	cudaMemset(d_a, 0, nbytes); 
	cudaMemset(d_o, 0, nbytes); 
    cudaEventRecord(start_event, 0);

	// asynchronoously launch nstreams memcopies.  Note that memcopy in stream x will only
    //   commence executing when all previous CUDA calls in stream x have completed
    for(int i = 0; i < nstreams; i++)
        cudaMemcpyAsync(d_a + i * nbytes / nstreams, a + i * nbytes / nstreams, nbytes / nstreams, cudaMemcpyHostToDevice, streams[i]);

	cpy_const_data_from_host_to_device();
    // asynchronously launch nstreams kernels, each operating on its own portion of data
    for(int i = 0; i < nstreams; i++)
		kernel_endes<<<blocks, threads, SHARED_MEM_SIZE, streams[i]>>>(d_a + i * nbytes / nstreams, d_o + i * nbytes / nstreams, nbytes/nstreams);

    // asynchronoously launch nstreams memcopies.  Note that memcopy in stream x will only
    //   commence executing when all previous CUDA calls in stream x have completed
    for(int i = 0; i < nstreams; i++)
        cudaMemcpyAsync(out + i * nbytes / nstreams, d_o + i * nbytes / nstreams, nbytes / nstreams, cudaMemcpyDeviceToHost, streams[i]);

	
	printf("Encoding ... \n" );
	for (int i=0; i<80; i++)
		printf("i=%d in=%x out=%x correct=%d\n", i, a[i] & 0xFF, out[i]& 0xFF, 0 );

	memset(a,0,nbytes);

	/// Decode
	// asynchronoously launch nstreams memcopies.  Note that memcopy in stream x will only
    //   commence executing when all previous CUDA calls in stream x have completed
    for(int i = 0; i < nstreams; i++)
        cudaMemcpyAsync(d_o + i * nbytes / nstreams, out + i * nbytes / nstreams, nbytes / nstreams, cudaMemcpyHostToDevice, streams[i]);

	cpy_const_data_from_host_to_device();
    // asynchronously launch nstreams kernels, each operating on its own portion of data
    for(int i = 0; i < nstreams; i++)
		kernel_dedes<<<blocks, threads, SHARED_MEM_SIZE, streams[i]>>>(d_o + i * nbytes / nstreams, d_a + i * nbytes / nstreams, nbytes/nstreams);

    // asynchronoously launch nstreams memcopies.  Note that memcopy in stream x will only
    //   commence executing when all previous CUDA calls in stream x have completed
    for(int i = 0; i < nstreams; i++)
        cudaMemcpyAsync(a + i * nbytes / nstreams, d_a + i * nbytes / nstreams, nbytes / nstreams, cudaMemcpyDeviceToHost, streams[i]);

	cudaEventRecord(stop_event, 0);
    cudaEventSynchronize(stop_event);
    CUDA_SAFE_CALL( cudaEventElapsedTime(&elapsed_time, start_event, stop_event) );

	end_clock_gpu = clock();
    printf("%d streams:\t%.2f (%.2f expected = %.2f + %.2f/%d)\n", nstreams, elapsed_time , time_kernel + time_memcpy / nstreams,
		time_kernel, time_memcpy,nstreams );

     printf("Time taken by GPU using clock()= %d ms\n", end_clock_gpu - start_clock_gpu);

	printf("Decoding ... \n" );
	for (int i=0; i<80; i++)
		printf("i=%d in=%x out=%x correct=%d\n", i, a[i] & 0xFF, out[i] & 0xFF, 0 );

    // check whether the output is correct
    printf("------------VERIFY USING CPU-------------------\n");

	//for (int i=0; i<80; i++)
	//	printf("i=%d in=%x out=%x correct=%x\n", i, a[i], out[i], get_value_host(a[i]));

	clock_t start_clock_cpu, end_clock_cpu;

	start_clock_cpu = clock();
	int verification = correct_data(a, out, nbytes);
	end_clock_cpu = clock();

    if(verification)
        printf("Test PASSED\n");
    else
        printf("Test FAILED\n");

     printf("Time taken by single threaded CPU using clock()= %d ms\n", end_clock_cpu - start_clock_cpu);

	 float faster_time = (end_clock_gpu - start_clock_gpu)<1 ? 1: (end_clock_cpu - start_clock_cpu) / (end_clock_gpu - start_clock_gpu);

     printf("GPU is %.2f (%d/%d) times faster than CPU.\n", faster_time,
		 (end_clock_cpu - start_clock_cpu) , (end_clock_gpu - start_clock_gpu));

    // release resources
    for(int i = 0; i < nstreams; i++)
        cudaStreamDestroy(streams[i]);
    cudaEventDestroy(start_event);
    cudaEventDestroy(stop_event);
    cudaFreeHost(a);
    cudaFree(d_a);
    //cudaFree(d_c);

    CUT_EXIT(argc, argv);

    return 0;
}

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
蜜桃精品视频在线观看| 国产精品久久久久久久久图文区 | 成人动漫精品一区二区| 蜜臀av亚洲一区中文字幕| 日韩理论片一区二区| 国产日韩欧美制服另类| 欧美本精品男人aⅴ天堂| 国产精品自拍三区| 国内久久精品视频| 精久久久久久久久久久| 黄色日韩网站视频| 国产麻豆视频一区二区| 国产福利精品一区| 色88888久久久久久影院野外| 色八戒一区二区三区| 91看片淫黄大片一级在线观看| 色综合久久中文字幕综合网| 91亚洲国产成人精品一区二三| 欧美精品自拍偷拍| 6080国产精品一区二区| 在线观看91av| 国产精品视频第一区| 综合网在线视频| 亚洲私人黄色宅男| 亚洲一卡二卡三卡四卡| 午夜精品成人在线| 91视频在线观看| 日韩一区二区免费电影| 久久综合九色综合久久久精品综合 | 日本一区中文字幕| 97久久超碰精品国产| 99久久er热在这里只有精品15| 本田岬高潮一区二区三区| 欧美一区二区三区白人| 亚洲人吸女人奶水| 国产成人在线电影| 欧美一级日韩一级| 亚洲精品免费电影| 成人毛片视频在线观看| 欧美经典三级视频一区二区三区| 亚洲午夜激情网页| 欧洲中文字幕精品| 国产精品久久久久精k8| 看电视剧不卡顿的网站| 3d成人动漫网站| 亚洲精品亚洲人成人网| 国产一区91精品张津瑜| 欧美裸体bbwbbwbbw| 亚洲电影中文字幕在线观看| 国产成人99久久亚洲综合精品| 日本道在线观看一区二区| 老司机精品视频在线| 国产一区91精品张津瑜| 91精品国产综合久久精品app| 日欧美一区二区| 日韩免费一区二区三区在线播放| 日本成人在线电影网| 精品成人免费观看| 色先锋aa成人| 捆绑调教一区二区三区| 国产精品美女一区二区| 在线观看免费一区| 精品一二三四在线| 亚洲男人的天堂一区二区| 在线精品观看国产| 五月婷婷综合激情| 久久只精品国产| 久久精品99国产精品日本| 久久久久久电影| av不卡免费在线观看| 夜夜爽夜夜爽精品视频| 久久久久国色av免费看影院| 色成人在线视频| 制服丝袜国产精品| 99久久精品久久久久久清纯| 青青草成人在线观看| 久久精品夜色噜噜亚洲aⅴ| 在线观看国产91| 国产丶欧美丶日本不卡视频| 亚洲18女电影在线观看| 欧美国产日本视频| 日韩欧美专区在线| 色狠狠综合天天综合综合| 国产在线播放一区二区三区 | 国产寡妇亲子伦一区二区| 日韩电影在线免费观看| 亚洲成年人网站在线观看| 亚洲影院免费观看| aaa欧美色吧激情视频| 国产黑丝在线一区二区三区| 麻豆精品新av中文字幕| 久久国产精品色婷婷| 久久激五月天综合精品| 国内成人自拍视频| 国产suv精品一区二区6| 韩国女主播成人在线| 日本在线播放一区二区三区| 亚洲一区二区三区小说| 日韩电影在线免费| 国产高清在线精品| 99久久精品国产一区| 色噜噜狠狠色综合欧洲selulu| 色拍拍在线精品视频8848| 欧美日韩国产成人在线91| 欧洲一区在线电影| 日韩欧美视频一区| 国产免费成人在线视频| 午夜影视日本亚洲欧洲精品| 开心九九激情九九欧美日韩精美视频电影 | 美女一区二区三区| 成人在线综合网| 91在线视频观看| 欧美乱熟臀69xxxxxx| 精品美女在线观看| 国产精品久99| 免费成人在线网站| 色综合久久99| 久久久精品国产免费观看同学| 一区二区三区不卡在线观看| 久久99久久99精品免视看婷婷| 91丨porny丨在线| 久久亚洲春色中文字幕久久久| 国产精品久久久久久久第一福利 | 日本成人超碰在线观看| 欧美视频精品在线| 亚洲一区二区三区影院| 欧美性做爰猛烈叫床潮| 日本一区二区三区免费乱视频| 久久精品国产秦先生| 亚洲欧美日韩在线| 色综合久久中文字幕| 国产精品二三区| 成人精品一区二区三区中文字幕| 欧美成人女星排名| 毛片av中文字幕一区二区| 色综合天天狠狠| 亚洲图片欧美视频| 欧美高清视频一二三区| 丝袜亚洲精品中文字幕一区| 91成人在线免费观看| 亚洲欧洲精品一区二区精品久久久 | youjizz久久| 亚洲青青青在线视频| 成人激情校园春色| 亚洲欧洲日韩一区二区三区| 国产精品99久久久久久宅男| 欧美国产一区二区| 亚洲精品在线三区| 99久久精品免费看| 亚洲成在人线在线播放| 精品国产网站在线观看| 国产精品69毛片高清亚洲| 中文字幕免费在线观看视频一区| 成人精品免费看| 亚洲一区二区成人在线观看| 欧美一区二区三区婷婷月色| 国产原创一区二区| 亚洲欧洲另类国产综合| 在线观看日韩高清av| 青青草成人在线观看| 亚洲三级在线看| 久久久一区二区| 在线视频国内自拍亚洲视频| 理论片日本一区| 丝袜诱惑亚洲看片| 亚洲五码中文字幕| 国产精品婷婷午夜在线观看| 一区二区视频免费在线观看| 欧美xxxxxxxx| 欧美色欧美亚洲另类二区| 国产成人精品影视| 久久狠狠亚洲综合| 欧美aⅴ一区二区三区视频| 一区二区三区精品视频在线| 欧美岛国在线观看| aaa国产一区| 99在线精品免费| 日本韩国欧美在线| 色播五月激情综合网| 9色porny自拍视频一区二区| 福利电影一区二区| 丁香网亚洲国际| 色综合久久久久综合| 色狠狠av一区二区三区| 色噜噜狠狠色综合中国| 色综合久久66| 在线综合视频播放| 日韩三级伦理片妻子的秘密按摩| 欧美一区二区三区免费观看视频| 8v天堂国产在线一区二区| 欧美精品日韩一本| 2021中文字幕一区亚洲| 国产三区在线成人av| 亚洲美女免费视频| 日本欧美大码aⅴ在线播放| 国产精品1区2区| 欧美在线观看视频一区二区| 这里只有精品视频在线观看| 欧美成人精品二区三区99精品| 国产欧美日本一区二区三区|