I'm trying to use strsep
to remove extra characters in an CSV file. The problem is that when I run it, it gives me Segmentation Fault and I can't figure out why. Here's the code:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <ctype.h>
typedef struct {
int id, followers, following, public_gists;
int public_repos;
char *login;
char *type;
char *created_at;
int *follower_list;
int *following_list;
} *User;
void checkUsersFile();
FILE *createCSV();
void corrigirFicheiro();
User criarUser();
int count = 0;
void checkUsersFile() {
//Ficheiro "users.csv"
FILE *file = fopen("ficheirosG1/users-set2.csv", "r");
//Verifica se o ficheiro "users.csv" existe
if(!file) {
printf("Ficheiro não encontrado");
return;
}
//Cria ficheiro "users-ok.csv"
FILE *newFile = createCSV("users-ok.csv");
corrigirFicheiro(file, newFile);
printf("%d\n", count);
}
//Cria e retorna ficheiro "users-ok.csv"
FILE *createCSV(char *nome) {
FILE *file = fopen(nome, "w");
return file;
}
//Função responsável por intrepretar o ficheiro "users.csv" e colocar os dados corretos no ficheiro "users-ok.csv"
void corrigirFicheiro(FILE *file, FILE *newFile) {
//imprimirPrimeiraLinha(file, newFile);
char string[200000];
//Uma linha do ficheiro com, no máximo, 200.000 caracteres
while ((fgets(string, 200000, file))) {
if (string[0] != '\0') {
//1. Criar user
//2. Print user
User user = criarUser(&string);
if (user != NULL) {
printf("ok\n");
}
free(user);
}
}
//free(string);
}
//Cria um User a partir de uma linha do ficheiro
User criarUser(char *str) {
User novoUser;
novoUser = (User) malloc(sizeof(User));
for(int i = 0; i<10; i ) {
//char *a = strdup(strsep(&str, ";"));
//char *b = strdup(strsep(&a, "\n"));
char *p = strsep(&str, ";\n\r");
if (strlen(p) == 0) {
count ;
free(novoUser);
return NULL;
}
}
return novoUser;
}
int main(){
checkUsersFile();
return 0;
}
Using gdb to debug the code, it says that it occurs in the line if(strlen(p) == 0 {
So it doesn't even enter the switch case.
I don't know why this is happening.
Thank you
CodePudding user response:
I see no reason to think that the strsep()
call is responsible for the error you encounter.
This is wrong, however:
User novoUser = (User) malloc(sizeof(User));
and it very likely is responsible for your error.
User
is a pointer type, so sizeof(User)
is the size of a pointer, which is not large enough for a structure of the kind that a User
points to. When you later try to assign to the members of the structure to which it points (omitted) or to access them in printUser()
(also omitted), you will overrun the bounds of the allocated object. That's exactly the kind of thing that might cause a segfault.
An excellent idiom for expressing an allocation such as that uses the receiving variable to establish the amount of space to allocate:
User novoUser = malloc(sizeof(*novoUser));
Note that I have also removed the unneeded cast.
As I expressed in comments, however, it is poor style to hide pointer nature behind a typedef
, as your User
does, and personally, I don't much care even for most typedef
s that avoid that pitfall.
Here is how you could do it with a better typedef:
typedef struct {
int id, followers, following, public_gists;
int public_repos;
char *login;
char *type;
char *created_at;
int *follower_list;
int *following_list;
} User; // not a pointer
// ...
User *criarUser(char *str) {
// ...
User *novoUser = malloc(sizeof(*novoUser)); // Note: no change on the right-hand side
// ...
return novoUser;
}
But this is how I would do it, without a typedef:
struct user {
int id, followers, following, public_gists;
int public_repos;
char *login;
char *type;
char *created_at;
int *follower_list;
int *following_list;
};
// ...
struct user *criarUser(char *str) {
// ...
struct user *novoUser = malloc(sizeof(*novoUser)); // still no change on the right-hand side
// ...
return novoUser;
}
CodePudding user response:
In answer to you question I wrote a short example that illustrates what is needed. In essence you need to allocate storage for string and pass the address of string
to criarUser()
. You cannot use an array because the type passed to criarUser()
would be pointer-to-array not pointer-to-pointer. (note: you can use the array so long as it is allowed to decay to a pointer so taking the address does not result in pointer-to-array -- example at end below)
An (close) example of corrigirFicheiro()
with needed changes to use with allocated storage is:
void corrigirFicheiro(FILE *file, FILE *newFile)
{
char *string = malloc (200000); /* allocate for string */
/* valdiate allocation here */
char *original_ptr = string; /* save original pointer */
while ((fgets(string, 200000, file))) {
if (string[0] != '\0') {
User *user = criarUser(&string); /* pass address of string */
if (user != NULL) {
printUser(user, newFile);
}
free(user);
}
}
free (original_ptr); /* free original pointer */
}
An abbreviated struct was used in the following example, but the principal is the same. For sample input, you can simply pipe a couple of lines from printf
and read on stdin
and write to stdout
. I used:
$ printf "one;two;3\nfour;five;6\n" | ./bin/strsep_example
A short MCVE (where I have taken liberty to remove the typedef'ed pointer) would be
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXC 128
#define DELIM ";\r\n"
#define NFIELDS 3
typedef struct {
char login[MAXC];
char type[MAXC];
int public_repos;
} User;
void printUser (User *user, FILE *newFile)
{
fprintf (newFile, "\nlogin : %s\ntype : %s\nrepo : %d\n",
user->login, user->type, user->public_repos);
}
User *criarUser(char **str)
{
User *novoUser = malloc(sizeof *novoUser);
/* validate allocation here */
for(int i = 0; i<NFIELDS; i ) {
char *p = strsep(str, DELIM);
switch (i) {
case 0: strcpy (novoUser->login, p);
break;
case 1: strcpy (novoUser->type, p);
break;
case 2: novoUser->public_repos = atoi(p);
break;
}
if (strlen(p) == 0) {
free(novoUser);
return NULL;
}
}
return novoUser;
}
void corrigirFicheiro(FILE *file, FILE *newFile)
{
char *string = malloc (200000); /* allocate for string */
/* valdiate allocation here */
char *original_ptr = string; /* save original pointer */
while ((fgets(string, 200000, file))) {
if (string[0] != '\0') {
User *user = criarUser(&string); /* pass address of string */
if (user != NULL) {
printUser(user, newFile);
}
free(user);
}
}
free (original_ptr); /* free original pointer */
}
int main (int argc, char **argv) {
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
corrigirFicheiro(fp, stdout);
if (fp != stdin) /* close file if not stdin */
fclose (fp);
}
Example Use/Output
$ printf "one;two;3\nfour;five;6\n" | ./bin/strsep_example
login : one
type : two
repo : 3
login : four
type : five
repo : 6
Memory Use/Error Check
In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.
It is imperative that you use a memory error checking program to ensure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.
For Linux valgrind
is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.
$ printf "one;two;3\nfour;five;6\n" | valgrind ./bin/strsep_example
==6411== Memcheck, a memory error detector
==6411== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==6411== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==6411== Command: ./bin/strsep_example
==6411==
login : one
type : two
repo : 3
login : four
type : five
repo : 6
==6411==
==6411== HEAP SUMMARY:
==6411== in use at exit: 0 bytes in 0 blocks
==6411== total heap usage: 5 allocs, 5 frees, 205,640 bytes allocated
==6411==
==6411== All heap blocks were freed -- no leaks are possible
==6411==
==6411== For counts of detected and suppressed errors, rerun with: -v
==6411== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Always confirm that you have freed all memory you have allocated and that there are no memory errors.
Look things over and let me know if you have questions.
Using An Array
When you pass the array as a parameter it decays to a pointer by virtue of array/pointer conversion. If that is done and the "array" type is no longer associated with the pointer you can then take the address and use automatic storage with strsep()
as pointed out by @thebusybee.
The changes to the program above to do so would be:
User *criarUser(char *str)
{
User *novoUser = malloc(sizeof *novoUser);
/* validate allocation here */
for(int i = 0; i<NFIELDS; i ) {
char *p = strsep(&str, DELIM);
switch (i) {
case 0: strcpy (novoUser->login, p);
break;
case 1: strcpy (novoUser->type, p);
break;
case 2: novoUser->public_repos = atoi(p);
break;
}
if (strlen(p) == 0) {
free(novoUser);
return NULL;
}
}
return novoUser;
}
void corrigirFicheiro(FILE *file, FILE *newFile)
{
char string[200000];
while ((fgets(string, 200000, file))) {
if (string[0] != '\0') {
User *user = criarUser(string); /* pass string, as pointer */
if (user != NULL) {
printUser(user, newFile);
}
free(user);
}
}
}
But note, without the benefit of array/pointer conversion when passing string
as a parameter, you must use allocated storage. Up to you, but know the caveat.