标签:
原文链接
1、cudaMemcpy()<--> cudaMalloc() //线性内存拷贝
1 //线性内存拷贝 2 cudaMalloc((void**)&dev_A, data_size); 3 cudaMemcpy(dev_A, host_A, data_size, cudaMemcpyHostToDevice);
2、cudaMemcpy2D()<-->cudaMallocPitch() //线性内存拷贝
cudaError_t cudaMemcpy2D(
void * dst,
size_t dpitch,
const void * src,
size_t spitch,
size_t width,
size_t height,
enum cudaMemcpyKind kind
)
例:
1 cudaMallocPitch((void**)&devPtr, &pitch, width * sizeof(float), height); 2 cudaMemcpy2D( void* dst,size_t dpitch,const void* src,size_t spitch,size_t width,size_t height,enum cudaMemcpyKind kind )
3、cudaMemcpy2DToArray()<-->cudaMallocArray() //(二维)线性内存到2维数组的拷贝
1 cudaError_t cudaMemcpy2DToArray ( 2 struct cudaArray * dst, 3 size_t wOffset, 4 size_t hOffset, 5 const void * src, 6 size_t spitch, 7 size_t width, 8 size_t height, 9 enum cudaMemcpyKind kind 10 )
例:
1 void mv(float *y, float *A, float *x, int m, int n)
2 {
3 int blkNum = (m >> 4) + ((m & 15) ? 1 : 0);
4 int height = blkNum << 4;
5 int width = (n & 255) ? (((n >> 8) + 1) << 8) : n;
6 dim3 threads(16, 16);
7 dim3 grid(blkNum, 1);
8 cudaArray *d_A;
9 float *d_x, *d_y;
10
11 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float4>();
12 cudaMallocArray(&d_A, &channelDesc, width >> 2, height);
13 cudaMemcpy2DToArray(d_A, 0, 0, A, n * sizeof(float), n * sizeof(float), m, cudaMemcpyHostToDevice);
14 cudaBindTextureToArray(texRefA, d_A);
15 cudaMalloc((void **) &d_x, n * sizeof(float));
16 cudaMalloc((void **) &d_y, m * sizeof(float));
17
18 cudaMemcpy(d_x, x, n * sizeof(float), cudaMemcpyHostToDevice);
19 mv_kernel<<< grid, threads >>>(d_y, d_A, d_x, m, n);
20 cudaMemcpy(y, d_y, m * sizeof(float), cudaMemcpyDeviceToHost);
21
22 cudaFree(d_y);
23 cudaFree(d_x);
24 cudaUnbindTexture(texRefA);
25 cudaFreeArray(d_A);
26 }
4、cudaMemcpyToArray()<-->cudaMallocArray() //(1维)线性内存到2维数组的拷贝
1 cudaError_t cudaMemcpyToArray( 2 struct cudaArray * dst, 3 size_t wOffset, 4 size_t hOffset, 5 const void * src, 6 size_t count, 7 enum cudaMemcpyKind kind 8 )
例:
1 void initCudaTexture(float *h_volume, float2 *velocity)
2 {
3 cudaChannelFormatDesc desc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
4
5 cudaMallocArray(&d_volumeArray, &desc, 128, 128);
6
7 cudaMemcpyToArray(d_volumeArray, 0, 0, h_volume, sizeof(float)*128*128, cudaMemcpyDeviceToDevice);
8
9 tex.normalized = true;
10 tex.filterMode = cudaFilterModeLinear;
11 tex.addressMode[0] = cudaAddressModeWrap;
12 tex.addressMode[1] = cudaAddressModeWrap;
13
14 cutilSafeCall(cudaBindTextureToArray(tex, d_volumeArray));
15
16 }
5、cudaMemcpy3D()<-->cudaMalloc3DArray() //(1维)线性内存到3维数组的拷贝
1 cudaError_t cudaMemcpy3D(const struct cudaMemcpy3DParms * p)
2
3 struct cudaExtent {
4 size_t width;
5 size_t height;
6 size_t depth;
7 };
8 struct cudaExtent make_cudaExtent(size_t w, size_t h, size_t d);
9
10 struct cudaPos {
11 size_t x;
12 size_t y;
13 size_t z;
14 };
15 struct cudaPos make_cudaPos(size_t x, size_t y, size_t z);
16
17 struct cudaMemcpy3DParms {
18 struct cudaArray *srcArray;
19 struct cudaPos srcPos;
20 struct cudaPitchedPtr srcPtr;
21 struct cudaArray *dstArray;
22 struct cudaPos dstPos;
23 struct cudaPitchedPtr dstPtr;
24 struct cudaExtent extent;
25 enum cudaMemcpyKind kind;
26 };
例:
1 void initCudaTexture(const uchar *h_volume, cudaExtent volumeSize)
2 {
3 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar>();
4
5 cutilSafeCall(cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize));
6
7 cudaMemcpy3DParms copyParams = {0};
8 copyParams.srcPtr = make_cudaPitchedPtr((void*)h_volume, volumeSize.width*sizeof(uchar), volumeSize.width, volumeSize.height);
9 copyParams.dstArray = d_volumeArray;
10 copyParams.extent = volumeSize;
11 copyParams.kind = cudaMemcpyHostToDevice;
12 cutilSafeCall(cudaMemcpy3D(©Params));
13
14 tex.normalized = true;
15 tex.filterMode = cudaFilterModeLinear;
16 tex.addressMode[0] = cudaAddressModeWrap;
17 tex.addressMode[1] = cudaAddressModeWrap;
18 tex.addressMode[2] = cudaAddressModeWrap;
19
20 cutilSafeCall(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));
21 }
6、cudaMemcpyToSymbol() //拷贝到常数存储器
1 __constant__ float constData[256]; 2 float data[256]; 3 cudaMemcpyToSymbol(constData, data, sizeof(data)); 4 cudaMemcpyFromSymbol(data, constData, sizeof(data)); 5 __device__ float devData; float value = 3.14f; 6 cudaMemcpyToSymbol(devData, &value, sizeof(float)); 7 __device__ float* devPointer; float* ptr; 8 cudaMalloc(&ptr, 256 * sizeof(float)); 9 cudaMemcpyToSymbol(devPointer, &ptr, sizeof(ptr));
标签:
原文地址:http://www.cnblogs.com/liangliangdetianxia/p/4198843.html