Segmentation Fault at runtime in a simple C CUDA code-CodePudding

I got a cuda Segmentation fault after running this code.

The reason for this code: I wanted to know the maximum size ian array can declare in register memory, maximum array for each thread per block:

#include "common.h"
#include <cuda_runtime.h>
#include "stdio.h"

#define N 10
#define Nblock 10

__global__ void add(int *c)
{
   int X[N];
   int tID = blockIdx.x * blockDim.x   threadIdx.x;
   for(int o = 0; o < N;o  ) {
      X[o]=1;
      c[tID]  =X[o];
 }
}
int main(int argc, char **argv)
{

 // set up device
 int dev = 0;
 cudaDeviceProp deviceProp;
 CHECK(cudaGetDeviceProperties(&deviceProp, dev));
 printf("%s test struct of array at ", argv[0]);
 printf("device %d: %s \n", dev, deviceProp.name);
 CHECK(cudaSetDevice(dev));

 int c[N*Nblock];
 int *dev_c;

 cudaMalloc((void **) &dev_c, N*Nblock*sizeof(int));

 add<<<Nblock,N>>>(dev_c);
 cudaMemcpy(c, dev_c, N*Nblock*sizeof(int), cudaMemcpyDeviceToHost);
 int sum = 0 ;
 for (int i = 0; i < N*Nblock; i  )
 {
 sum  =c[i];
 }
 printf("sum= %d\n", sum);
 free(dev_c);
    // reset device
    CHECK(cudaDeviceReset());
    return EXIT_SUCCESS;
}

Got this message after running the code:

line 4: 18244 Segmentation error (memory stack flushed to disk)

CodePudding user response：

The seg fault is happening at this line:

free(dev_c);

You don't free a device pointer (allocated with cudaMalloc) that way.

The correct thing would be:

cudaFree(dev_c);