Home > Enterprise >  Parsing string between two sets of characters without regex
Parsing string between two sets of characters without regex

Time:10-17

Say we have a string that goes like "((the)) weather is usually good ((when)) its ((spring))" How can I parse only the words between '((' and '))' without using regex.

CodePudding user response:

You can use strstr() defined in <string.h> to search for "((", then search from there for "))" to find the end of the match:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *get_word(const char *s, size_t *pos) {
    const char *p1;  // pointer to the next match
    const char *p2;  // pointer to the match end
    p1 = strstr(s   *pos, "((");
    if (p1 == NULL) {
        *pos  = strlen(s   *pos);
        return NULL;
    }
    p1  = 2;
    p2 = strstr(p1, "))");
    if (p2 == NULL) {
        // missing end string
        *pos  = strlen(s   *pos);
        return NULL;
    }
    *pos = p2   2 - s;
    return strndup(p1, p2 - p1);  // allocate a copy of the match
}

int main() {
    const char *str = "((the)) weather is usually good ((when)) it's ((spring))";
    size_t pos = 0;
    char *p;

    while ((p = get_word(s, &pos)) != NULL) {
        printf("%s\n", p);
        free(p);
    }
    return 0;
}

CodePudding user response:

strspn and strcspn can be used to find and count delimiters.

#include <stdio.h>
#include <string.h>

int main ( void) {
    char *text = "((the)) weather is usually good ((when)) its ((spring))";
    char *parse = text;

    while ( *parse) {
        parse  = strcspn ( parse, "("); // advance pointer to a (
        size_t span = strspn ( parse, "("); // count how many (
        if ( 2 == span) {
            char *token = parse   span; // set pointer to follow (
            char *end = token;
            end  = strcspn ( token, ")"); // advance pointer to a )
            span = strspn ( end, ")"); // count how many )
            if ( 2 == span) {
                printf ( "parsed:   %.*s\n", (int)( end - token), token);
            }
            parse = end   span;
        }
        else {
            parse  = span;
        }
    }

    return 0;
}

EDIT To store the sub-string in an array:

#include <stdio.h>
#include <string.h>

#define ROWS 10
#define COLS 20

int main ( void) {
    char *text =
    "((the)) weather is usually good ((when)) its ((spring))";
    char *parse = text;
    char array[ROWS][COLS] = { { 0}};
    int row = 0;

    while ( *parse) {
        parse  = strcspn ( parse, "("); // advance pointer to a (
        size_t span = strspn ( parse, "("); // count how many (
        if ( 2 == span) {
            char *token = parse   span; // set pointer to follow (
            char *end = token;
            end  = strcspn ( token, ")"); // advance pointer to a )
            span = strspn ( end, ")"); // count how many )
            if ( 2 == span) {
                printf ( "parsed:   %.*s\n", (int)( end - token), token);
                size_t length = (size_t)( end - token);
                if ( length   1 < COLS) {
                    strncpy ( array[row], token, length);
                    array[row][length] = 0;
                      row;
                    if ( row == ROWS) {
                        break;
                    }
                }
            }
            parse = end   span;
        }
        else {
            parse  = span;
        }
    }

    for ( int each = 0; each < row;   each) {
        printf ( "%s\n", array[each]);
    }

    return 0;
}
  • Related