码迷,mamicode.com
首页 > 其他好文 > 详细

CUDA Vector Add Test 2048x1024

时间:2017-03-21 22:19:39      阅读:297      评论:0      收藏:0      [点我收藏+]

标签:int   bool   stderr   oca   -o   err   abs   gpu   make   

  1 #include "cuda_runtime.h"  // CUDAVectorAdd.cu
  2 #include "device_launch_parameters.h"
  3 #include "IML_PrecisionTimer.h"
  4 
  5 #include <stdio.h>
  6 #define MEM_SIZE  (2048*1024)
  7 
  8 __global__ void addKernel(float *c, float *a, float *b, int N)
  9 {
 10     int i = blockIdx.x * blockDim.x +threadIdx.x;
 11     if (i<N)
 12     {
 13         c[i] = a[i] + b[i];
 14     }
 15 }
 16 
 17 int main()
 18 {
 19     PrecisionTimer g_timer;
 20     float gfFrametime;
 21     qPrecisionTimer_Init(&g_timer);
 22 
 23     float *a;
 24     float *b;
 25     float *c;
 26 
 27     int size =MEM_SIZE; 
 28     int i;
 29 
 30     a = (float*)malloc(size*sizeof(float));
 31     b = (float*)malloc(size*sizeof(float));
 32     c = (float*)malloc(size*sizeof(float));
 33 
 34     for( i = 1; i< size; i++)
 35     {
 36         a[i] = i;
 37         b[i] = i;
 38     }
 39     g_timer.Start( &g_timer );
 40 
 41     float *dev_a;
 42     float *dev_b;
 43     float *dev_c;
 44 
 45 
 46     cudaError_t cudaStatus;
 47 
 48     // Allocate GPU buffers for three vectors (two input, one output)    .
 49     cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(float));
 50     if (cudaStatus != cudaSuccess) {
 51         fprintf(stderr, "cudaMalloc failed!");
 52     }
 53 
 54     cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(float));
 55     if (cudaStatus != cudaSuccess) {
 56         fprintf(stderr, "cudaMalloc failed!");
 57     }
 58 
 59     cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(float));
 60     if (cudaStatus != cudaSuccess) {
 61         fprintf(stderr, "cudaMalloc failed!");
 62     }
 63 
 64     // Copy input vectors from host memory to GPU buffers.
 65     cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(float), cudaMemcpyHostToDevice);
 66     if (cudaStatus != cudaSuccess) {
 67         fprintf(stderr, "cudaMemcpy failed!");
 68     }
 69 
 70     cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(float), cudaMemcpyHostToDevice);
 71     if (cudaStatus != cudaSuccess) {
 72         fprintf(stderr, "cudaMemcpy failed!");
 73     }
 74 
 75     // Launch a kernel on the GPU with one thread for each element.
 76     addKernel<<<65535,1024>>>(dev_c, dev_a, dev_b, size);
 77 
 78   
 79     // Copy output vector from GPU buffer to host memory.
 80     cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(float), cudaMemcpyDeviceToHost);
 81 
 82     gfFrametime = g_timer.End( &g_timer );
 83     printf("Time = %f Sec \n", gfFrametime);
 84 
 85     float eps = 1e-16;
 86     bool bflag = true;
 87     for (i = 0; i<size; i++)
 88     {
 89         if(abs(c[i] - 2*i)>eps)
 90         {
 91             bflag = false;
 92             break;
 93         }
 94     }
 95     if(bflag)
 96     {
 97         printf("Result OK!\n");
 98     }
 99     else
100     {
101         printf("Result ERROR!\n");
102     }
103 #if 0
104     for (i=0 ; i<5; i++)
105     {
106         printf("c[%d] = %f\n", i, c[i]);
107     }
108     for (i=size-5 ; i<size; i++)
109     {
110         printf("c[%d] = %f\n",i, c[i]);
111     }
112 #endif
113 
114     cudaFree(dev_c);
115     cudaFree(dev_a);
116     cudaFree(dev_b);
117 
118     free(a);
119     free(b);
120     free(c);
121     
122     return 0;
123 }
 1 CUDA_PATH ?=/usr/local/cuda-7.0
 2 NVCC      :=$(CUDA_PATH)/bin/nvcc -ccbin g++
 3 INCLUDE   :=-I/usr/local/cuda-7.0/include/ 4            -I/usr/local/cuda/samples/common/inc 5         -I/usr/include/c++ 6         -I../shareIMLTimer 7         -I./
 8 LINKPATH  :=/usr/lib/
 9 LIBRARIES :=-L/usr/local/cuda/lib64 -lcudart
10 TARGETS   :=CUDAVectorAdd
11 OBJECTS   :=$(addsuffix .o, $(TARGETS))
12 TIMEOBJECTS :=IML_PrecisionTimer.o13                               stimer.o
14 
15 .SUFFIXES:.o .cu .cpp
16 .cu.o: 
17     $(NVCC) -arch=sm_20  $(INCLUDE) -c -g -o $@  $< $(LIBRARIES)
18 .cpp.o:
19     $(CXX) $(INCLUDE) -c -g -o $@ $< $(LIBRARIES)
20 
21 all:$(OBJECTS) $(TIMEOBJECTS)
22     #sudo cp /usr/local/cuda/lib64/libcufft.so.7.0 /usr/lib
23     ln -s $(LINKPATH)libcudart.so.7.0  libcudart.so
24     ln -s $(LINKPATH)libcudart.so.7.0  libcudart.so.7
25     g++    $(INCLUDE) -o $(TARGETS) $^ $(LIBRARIES)
26 
27 $(TIMEOBJECTS):
28         cd ../shareIMLTimer && make && cp *.o ../$(TARGETS) 
29 run:
30     ./$(TARGETS)
31 clean:
32     rm -rf *.o kernel libcudart.so libcudart.so.7 $(TARGETS)

$./CUDAVectorAdd
Time = 0.089507 Sec
Result OK!

CUDA Vector Add Test 2048x1024

标签:int   bool   stderr   oca   -o   err   abs   gpu   make   

原文地址:http://www.cnblogs.com/ILoveOCT/p/6596949.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!