Home > database >  splitting a string up using different delimiters in C
splitting a string up using different delimiters in C

Time:01-07

I am trying to split up a string by using different delimiters. After hours of trial and error using strtok(), I have finally got a way to make it work. However it uses NULLs in the place of given strings in strtok, and I dont fully understand how it works.

I have tried to split it up so I can save it in separate variables so i can use them to return functions within my main function, but it doesnt work, which leads me to believe it is incredibly flimsy way of splitting the string up.

the input string is read from a config file and is in this format:

(6,2) SLUG 1 0 EAST

the current code i'm using is this:

void createSlug(char* data) {
        int slugPosX, slugPosY, slugAge;
        char *slugDir;
        char *token1;
        char *token2;

        slugPosX = atoi(strtok(data, "("));

        token1 = strtok(data, ",");
        slugPosY = atoi(strtok(strtok(NULL, ","), ")"));

        token2 = strtok(strtok(NULL, ","), ")");
        slugAge = atoi(strtok(token2, " SLUG "));

        slugDir = strtok(NULL, " 0 ");

        printf("slug position is: (%d,%d), with age %d, and direction: %s", slugPosX, slugPosY, slugAge, slugDir);

}

the output would be printed as:

slug position is: (6,2), with age 1, and direction: EAST

The input file changes but is always in the above format. It is also worth mentioning that the '0' in the input string is always 0, so I ignored that part of it, as I could use it as the delimiter.

Is there an easier way of doing this? I'm very very new to C so any help would be greatly appreciated

CodePudding user response:

The input file changes but is always in the above format

Scanf?

int main() {
    int w, x,y,z;
    char a[60] = "", b[60] = "";
    printf("%d\n", sscanf("(6,2) SLUG 1 0 EAST", "(%d,%d) %s %d%d%s", &w,&x,a,&y,&z,b));
    printf("w = %d, x = %d, y = %d, z = %d, a = '%s', b = '%s'\n" ,w,x,y,z,a,b);
}

https://godbolt.org/z/Woa94sb1Y

CodePudding user response:

It's a poor craftsman who blames his tools.

To use strtok() it is probably easiest to only invoke it in one place. The function should "know" the record layout and can be written to "segment" the string to suit.

#include <stdio.h>
#include <stdlib.h>

void createSlug( char* data ) {
    enum { ePosx, ePosy, eName, eAge, eXXX, eDir };

    // ALWAYS initialise variables to avoid sporadic functioning and possible UB
    int slugPosX = 0, slugPosY = 0, slugAge = 0;
    char *slugDir = "";

    int stage = 0;
    for( char *cp = data; ( cp = strtok( cp, "(,) \n" ) ) != NULL; cp = NULL )
        switch( stage   ) {
            case ePosx: slugPosX = atoi( cp ); break;
            case ePosy: slugPosY = atoi( cp ); break;
            case eName: break;
            case eAge:  slugAge = atoi( cp ); break;
            case eXXX:  break;
            case eDir:  slugDir = cp; break;
            default:
                puts( "Extra fields??!!" );
                break;
        }

    printf("slug position is: (%d,%d), with age %d, and direction: %s", slugPosX, slugPosY, slugAge, slugDir);

}

int main( void ) {
    char str[] = "(6,2) SLUG 1 0 EAST\n";

    createSlug( str );

    return 0;
}
slug position is: (6,2), with age 1, and direction: EAST

Still using atoi() here, but strtol() may be a better translation function.

CodePudding user response:

If you are sure that the characters '(', ',', ')' and ' ' are only used as delimiters and don't ever occur in the tokens, then you can simply use "(,) " as the delimiter string in all calls to strtok:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void createSlug( char* data )
{
    int slugPosX, slugPosY, slugAge;
    char *slugDir;
    const char *delim = "(,) ";

    slugPosX = atoi( strtok(data, delim) );
    slugPosY = atoi( strtok(NULL, delim) );

    //ignore the "SLUG" token
    strtok( NULL, delim );

    slugAge = atoi( strtok(NULL, delim) );

    //ignore the "0" token
    strtok( NULL, delim );

    slugDir = strtok( NULL, delim );

    printf( "slug position is: (%d,%d), with age %d, and direction: %s", slugPosX, slugPosY, slugAge, slugDir );
}

int main( void )
{
    char str[] = "(6,2) SLUG 1 0 EAST";

    createSlug( str );
}

However, this program may crash if strtok ever returns NULL due to the input not being in the expected format. Here is a different version which does a lot more input validation and prints an error message instead of crashing:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void createSlug( char* data )
{
    int slugPosX, slugPosY, slugAge;
    char *slugDir;
    char *token, *p;
    const char *delim = "(,) ";

    //attempt to find first token
    token = strtok( data, delim );
    if ( token == NULL )
    {
        fprintf( stderr, "Unable to find first token!\n" );
        exit( EXIT_FAILURE );
    }

    //attempt to convert first token to an integer
    slugPosX = strtol( token, &p, 10 );
    if ( *p != '\0' )
    {
        fprintf( stderr, "Unable to convert first token to an integer!\n" );
        exit( EXIT_FAILURE );
    }

    //attempt to find second token
    token = strtok( NULL, delim );
    if ( token == NULL )
    {
        fprintf( stderr, "Unable to find second token!\n" );
        exit( EXIT_FAILURE );
    }

    //attempt to convert second token to an integer
    slugPosY = strtol( token, &p, 10 );
    if ( *p != '\0' )
    {
        fprintf( stderr, "Unable to convert second token to an integer!\n" );
        exit( EXIT_FAILURE );
    }

    //attempt to find third token
    token = strtok( NULL, delim );
    if ( token == NULL )
    {
        fprintf( stderr, "Unable to find third token!\n" );
        exit( EXIT_FAILURE );
    }

    //verify that third token contains "SLUG"
    if ( strcmp( token, "SLUG" ) != 0 )
    {
        fprintf( stderr, "Invalid content of third token!\n" );
        exit( EXIT_FAILURE );
    }

    //attempt to find fourth token
    token = strtok( NULL, delim );
    if ( token == NULL )
    {
        fprintf( stderr, "Unable to find fourth token!\n" );
        exit( EXIT_FAILURE );
    }

    //attempt to convert fourth token to an integer
    slugAge = strtol( token, &p, 10 );
    if ( *p != '\0' )
    {
        fprintf( stderr, "Unable to convert fourth token to an integer!\n" );
        exit( EXIT_FAILURE );
    }

    //attempt to find fifth token
    token = strtok( NULL, delim );
    if ( token == NULL )
    {
        fprintf( stderr, "Unable to find fifth token!\n" );
        exit( EXIT_FAILURE );
    }

    //verify that fifth token contains "0"
    if ( strcmp( token, "0" ) != 0 )
    {
        fprintf( stderr, "Invalid content of fifth token!\n" );
        exit( EXIT_FAILURE );
    }

    //attempt to find sixth token
    slugDir = strtok( NULL, delim );
    if ( slugDir == NULL )
    {
        fprintf( stderr, "Unable to find sixth token!\n" );
        exit( EXIT_FAILURE );
    }

    printf( "slug position is: (%d,%d), with age %d, and direction: %s", slugPosX, slugPosY, slugAge, slugDir );
}

int main( void )
{
    char str[] = "(6,2) SLUG 1 0 EAST";

    createSlug( str );
}
  • Related