I got a cuda Segmentation fault after running this code.
The reason for this code: I wanted to know the maximum size ian array can declare in register memory, maximum array for each thread per block:
#include "common.h"
#include <cuda_runtime.h>
#include "stdio.h"
#define N 10
#define Nblock 10
__global__ void add(int *c)
{
int X[N];
int tID = blockIdx.x * blockDim.x threadIdx.x;
for(int o = 0; o < N;o ) {
X[o]=1;
c[tID] =X[o];
}
}
int main(int argc, char **argv)
{
// set up device
int dev = 0;
cudaDeviceProp deviceProp;
CHECK(cudaGetDeviceProperties(&deviceProp, dev));
printf("%s test struct of array at ", argv[0]);
printf("device %d: %s \n", dev, deviceProp.name);
CHECK(cudaSetDevice(dev));
int c[N*Nblock];
int *dev_c;
cudaMalloc((void **) &dev_c, N*Nblock*sizeof(int));
add<<<Nblock,N>>>(dev_c);
cudaMemcpy(c, dev_c, N*Nblock*sizeof(int), cudaMemcpyDeviceToHost);
int sum = 0 ;
for (int i = 0; i < N*Nblock; i )
{
sum =c[i];
}
printf("sum= %d\n", sum);
free(dev_c);
// reset device
CHECK(cudaDeviceReset());
return EXIT_SUCCESS;
}
Got this message after running the code:
line 4: 18244 Segmentation error (memory stack flushed to disk)
CodePudding user response:
The seg fault is happening at this line:
free(dev_c);
You don't free a device pointer (allocated with cudaMalloc
) that way.
The correct thing would be:
cudaFree(dev_c);