Home > Software engineering >  Looking for a workaround for strtok
Looking for a workaround for strtok

Time:11-14

I have to make a C shell interpreter that can handle multiple ' | ' operators. So if I write something like this: cat test.txt | sort | uniq -c | sort -nr it works correctly. The problem comes when I try to use more complex functions, for example: cat test.txt | awk '/" 404 / {print%7}' | sort | uniq -c | sort -nr | head. It breaks when 'awk' parameters are separated by strtok.

The code that works:

#define TRUE 1
#define FALSE 0

#define BUF_SIZE 1024
#define ROW_SIZE 64
#define MIN 100000

void failed_allocation(){
    fprintf(stderr, "Faild to allocate memory.");
    exit(EXIT_FAILURE);
}

char* read_line(){
    int buf_size = BUF_SIZE;
    int pos = 0;
    char* buffer = malloc(sizeof(char) * buf_size);
    int c; // using int because EOF is -1

    if(buffer == NULL)
        failed_allocation();

    // read char by char
    while(TRUE){
        c = getchar();

        // look for EOF or end of line
        if(c == EOF || c == '\n'){
            buffer[pos] = '\0';
            return buffer;
        }
        else{
            buffer[pos] = c;
        }
        pos  ;

        // if buffer max size is reached, then extend buffer
        if(pos >= buf_size){
            buf_size  = BUF_SIZE;
            buffer = realloc(buffer, buf_size);
            if(buffer == NULL)
                failed_allocation();
        }
    }
}

char** split_to_lines(char* str, char* delim){
    int buf_size = ROW_SIZE;
    int pos = 0;
    char* buffer;
    char** buffer_list = malloc(buf_size * sizeof(char*));

    if(buffer_list == NULL)
        failed_allocation();

    // split into list
    buffer = strtok(str, delim);
    while(buffer != NULL){
        buffer_list[pos] = buffer;
        pos  ;

        // if buffer max size is reached, then extend buffer
        if(pos >= buf_size){
            buf_size  = ROW_SIZE;
            buffer_list = realloc(buffer_list, buf_size * sizeof(char*));
            if(buffer_list != NULL)
                failed_allocation();
        }

        buffer = strtok(NULL, delim); // continue reading str
    }
    buffer_list[pos] = NULL; // end list
    return buffer_list;
}

int start_proc(char** args){
    int fd[2];
    int prev_fd = STDIN_FILENO;
    int i;
    char** list = NULL;
    
    for(i = 0; args[i   1] != NULL;   i){
        if(pipe(fd) == -1){
            perror("Pipe error: ");
            return 1;
        }
        
        int pid = fork();
        if(pid < 0){
            perror("Fork error:");
            return 1;
        }
        else if(pid == 0){
            if(prev_fd != STDIN_FILENO){
                dup2(prev_fd, STDIN_FILENO);
                close(prev_fd);
            }
            
            dup2(fd[1], STDOUT_FILENO);
            close(fd[1]);
            
            list = split_to_lines(args[i], " \t\r\n");
            execvp(list[0], list);
            perror("Execvp error:");
            exit(EXIT_FAILURE);
        }
        
        close(prev_fd);
        close(fd[1]);
        prev_fd = fd[0];
        free(list);
    }
    
    if(prev_fd != STDIN_FILENO){
        dup2(prev_fd, STDIN_FILENO);
        close(prev_fd);
    }
    
    list = split_to_lines(args[i], " \t\r\n");
    execvp(list[0], list);
    
    perror("Execvp error:");
}



int main(){
    int flag = TRUE;

    while(flag == TRUE){
        // input
        printf("\n> ");
        char* input = read_line();
        char** list = NULL;

        // exit condition
        if(strcmp(input, "exit") == 0)
            flag = FALSE;

        if(flag == TRUE){
            list = split_to_lines(input, "|");
            start_proc(list);
        }

        // free memory
        free(input);
        free(list);
    }
    return 0;
}

I tried implementing my own way of separating strings, but in no vain as when trying to execute the code, it randomly creates empty space strings and then tries to execute them resulting in execvp errors.

This is the implementation that I tried:

int get_length(char* str){
    int counter = 0;
    for(int i = 0; str[i] != '\0';   i){
          counter;
    }
    return counter;
}

int find_in_string(char* str, char look_for, int from){
    int length = get_length(str);
    if(from > length)
        return -1;
    
    for(int i = from; i < length;   i){
        if(str[i] == look_for){
            return i;
        }
    }
    return -1;
}

char* substr(char* str, int begin, int end){
    int length = get_length(str);
    if(end > length || begin > length){
        fprintf(stderr, "Substr error: invalid interval values.");
        exit(EXIT_FAILURE);
    }
    
    if(end < 0)
        end = length;
    else if(begin < 0)
        begin = 0;
    
    char* buffer = malloc((end - begin) * sizeof(char));
    int pos = 0;
    for(int i = begin; i <= end;   i){
        buffer[pos] = str[i];
        pos  ;
    }
    buffer[pos] = '\0';
    return buffer;
}

char** test_split_to_lines(char* str, char* delim){
    char* buffer;
    char** buffer_list = malloc(ROW_SIZE * sizeof(char*));
    int pos = 0;
    
    int cursor_pos = 0;
    int cursor_delim = 0;
    int length = get_length(str);
    
    loop:
    int delim_pos = MIN;
    for(int i = 0; delim[i] != '\0';   i){
        int temp = find_in_string(str, delim[i], cursor_delim);
        if((temp < delim_pos && temp > 0) || (temp < 0 && delim_pos == MIN) || (delim_pos < 0 && temp > -1))
            delim_pos = temp;
    }
    
    if(delim_pos == -1){
        buffer = substr(str, cursor_pos, -1);
        if(get_length(buffer) != 0){
            buffer_list[pos] = buffer;
            pos  ;
        }
        buffer_list[pos] = NULL;
        return buffer_list;
    }
    
    int q_begin = find_in_string(str, 39, cursor_pos);
    int q_end = find_in_string(str, 39, q_begin   1);
    
    if(delim_pos < q_begin || delim_pos > q_end){
        buffer = substr(str, cursor_pos, delim_pos - 1);
        buffer_list[pos] = buffer;
        pos  ;
        
        cursor_pos = delim_pos   1;
        cursor_delim = cursor_pos;
    }
    else{
        cursor_delim = q_end;
    }
    goto loop;
}

So, basically I need help writing a function, that correctly separates strings.

CodePudding user response:

If you want to split something to separate strings (it uses the original string, destroying it.) If argv == NULL it only count the number of strings

size_t splitstring(char **argv, size_t argvsize, char *str, const char *delim)
{
    size_t pos = 0;
    if(str && *str)
    {
        if(argv)
        {
            memset(argv, 0, argvsize * sizeof(*argv));
            argv[pos] = str;
        }
        pos  = 1;
        while(*str)
        {
            if(strchr(delim, *str))
            {
                *str   = 0;
                if(argv)
                {
                    argv[pos] = str;
                    if(pos >= argvsize) break;
                }
                pos  ;
            }
            else
            {
                str  ;
            }
        }
    }
    return pos;
}

CodePudding user response:

I apologize in advance; this might not answer your question, but I wanted to demonstrate a novel technique. Instead of strtok, which has its problems, you can use fmemopen and getdelim, and use open_memstream to create an array to store the tokens that automatically grows big enough. The latter trick avoids error-prone calls to realloc. This also avoids having to write to the original string; although we don't declare it as const, it could be.

Note that trying to tokenize a string like this is only safe (whether you use this method or strtok) if you assume that '|' cannot occur as part of another multibyte character. I don't think ISO C guarantees this, but if the locale's encoding uses UTF-8, then you're safe.

#define _POSIX_C_SOURCE 200809L
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>

int main(void) {
    if(!setlocale(LC_ALL, "")) {
        fputs("Failed to enable default locale\n", stderr);
        exit(EXIT_FAILURE);
    }

    char command[] = "cat test.txt | awk '/\" 404 / {print%7}' | sort | uniq -c | sort -nr | head";

    union {
        /* ISO C guarantees that the representations of char * and void * are
         * the same, so type-punning with a union is the same as doing a cast. */
        char *arrbytes;
        void *untyped;
    } outarray;
    size_t outarraybytecount;
    FILE *const outstream = open_memstream(&outarray.arrbytes, &outarraybytecount);
    if(!outstream) {
        perror("Failed to create memory stream");
        exit(EXIT_FAILURE);
    }

    FILE *const readstream = fmemopen(command, sizeof(command) - 1, "r");
    if(!readstream) {
        perror("Failed to create memory stream");
        goto endoutstream;
    }

    do {

        char *tok = NULL;
        ssize_t k = getdelim(&tok, &(size_t){0}, '|', readstream);
        if(k == -1) {
            free(tok);
            if(ferror(readstream)) {
                perror("Failed to read from memory stream");
                goto endreadstream;
            }
        } else {
            if(tok[k-1] == '|') {
                /* You can leave the delimiter in if you want. */
                tok[k-1] = '\0';
            }
            /* This is a subtle trick: we're writing the contents of our pointer
             * variable to the stream. */
            if(!fwrite(&tok, sizeof(tok), 1, outstream)) {
                perror("Failed to write to memory stream");
                goto endreadstream;
            }
        }
    } while(!feof(readstream));

    /* Now we're done. */
    if(fclose(readstream) == EOF) {
        perror("Failed to close memory stream");
        goto endoutstream;
    }
    if(fclose(outstream) == EOF) {
        perror("Failed to close memory stream");
        exit(EXIT_FAILURE);
    }

    char **array = outarray.untyped;
    size_t arraylen = outarraybytecount/sizeof(*array);
    for(size_t i = 0; i < arraylen; i  ) {
        puts(array[i]);
        free(array[i]);
    }
    free(array);
    exit(EXIT_SUCCESS);

endreadstream:
    if(fclose(readstream) == EOF) {
        perror("Failed to close memory stream");
    }
endoutstream:
    if(fclose(outstream) == EOF) {
        perror("Failed to close memory stream");
    } else {
        char **array = outarray.untyped;
        for(size_t i = 0; i < outarraybytecount/sizeof(char*); i  ) {
            free(array[i]);
        }
        free(array);
    }
    exit(EXIT_FAILURE);
}
  • Related