?? reversearray_singleblock.cu
字號:
/*
* Copyright 1993-2008 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws. Users and possessors of this source code
* are hereby granted a nonexclusive, royalty-free license to use this code
* in individual and commercial software.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*
* Any use of this source code in individual and commercial software must
* include, in the user documentation and internal comments to the code,
* the above Disclaimer and U.S. Government End Users Notice.
*/
// includes, system
#include <stdio.h>
#include <assert.h>
// Simple utility function to check for CUDA runtime errors
void checkCUDAError(const char* msg);
// Part 1 of 1: implement the kernel
__global__ void reverseArrayBlock(int *d_out, int *d_in)
{
int in = threadIdx.x;
int out = blockDim.x - 1 - threadIdx.x;
d_out[out] = d_in[in];
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main( int argc, char** argv)
{
// pointer for host memory and size
int *h_a;
int dimA = 256;
// pointer for device memory
int *d_b, *d_a;
// define grid and block size
int numBlocks = 1;
int numThreadsPerBlock = dimA;
// allocate host and device memory
size_t memSize = numBlocks * numThreadsPerBlock * sizeof(int);
h_a = (int *) malloc(memSize);
cudaMalloc( (void **) &d_a, memSize );
cudaMalloc( (void **) &d_b, memSize );
// Initialize input array on host
for (int i = 0; i < dimA; ++i)
{
h_a[i] = i;
}
// Copy host array to device array
cudaMemcpy( d_a, h_a, memSize, cudaMemcpyHostToDevice );
// launch kernel
dim3 dimGrid(numBlocks);
dim3 dimBlock(numThreadsPerBlock);
reverseArrayBlock<<< dimGrid, dimBlock >>>( d_b, d_a );
// block until the device has completed
cudaThreadSynchronize();
// check if kernel execution generated an error
// Check for any CUDA errors
checkCUDAError("kernel invocation");
// device to host copy
cudaMemcpy( h_a, d_b, memSize, cudaMemcpyDeviceToHost );
// Check for any CUDA errors
checkCUDAError("memcpy");
// verify the data returned to the host is correct
for (int i = 0; i < dimA; i++)
{
assert(h_a[i] == dimA - 1 - i );
}
// free device memory
cudaFree(d_a);
cudaFree(d_b);
// free host memory
free(h_a);
// If the program makes it this far, then the results are correct and
// there are no run-time errors. Good work!
printf("Correct!\n");
return 0;
}
void checkCUDAError(const char *msg)
{
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) );
exit(EXIT_FAILURE);
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -