Home > Back-end >  How to detect a repeated " user input String " in a file?
How to detect a repeated " user input String " in a file?

Time:04-23

The idea is like compare line by line of strings and detecting the duplicated ones to evade putting theme in another file after I fill my file with names and created a new file to put all strings without the duplicated ones, I used this loop, but I don't know if it's right or nah. It didn't work

FILE *Tr , *temp;
char test[50] , test1[50];

Tr = fopen("test.txt","w");
temp = fopen("temp1.txt" , "r");

while( !feof(temp) )
{  
fgets(test , 50 , temp);
     
    while( !feof(temp) ){

                    if ( fgets(test , 50 , temp) == fgets(test1 , 50 , temp) ){
                      printf("a string exist in the file");
                    }
                                                
                     else{  fprintf(Tr, "%s" , test1);  
                    }
                       }
                }

CodePudding user response:

The following line is wrong:

if ( fgets(test , 50 , temp) == fgets(test1 , 50 , temp) ){

Using == on pointers will compare the actual pointer values, i.e. the memory addresses. If you want to compare the actual string contents (i.e. what the pointers are pointing to), then you must use strcmp instead.

Also, you should only read from the input file, not the output file.

You should also remember all strings that you have read. Otherwise, you will have no way of determining whether the current line is a duplicate or not.

Additionally, it does not make sense having both an outer loop and an inner loop with the same loop condition:

while( !feof(temp) )

Also, using !feof(temp) as a loop condition is generally wrong. See this question for further information:

Why is “while ( !feof (file) )” always wrong?

The following program will remember up to 100 strings, each up to 100 chars in length (including the terminating null character).

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>

#define MAX_STRINGS    100
#define MAX_STRING_LEN 100

int main( void )
{
    FILE *input, *output;
    char strings[MAX_STRINGS][MAX_STRING_LEN];
    int num_strings = 0;
    char line[MAX_STRING_LEN];

    //open input file
    input = fopen( "input.txt", "r" );
    if ( input == NULL )
    {
        fprintf( stderr, "Error opening input file!\n" );
        exit( EXIT_FAILURE );
    }

    //open output file
    output = fopen( "output.txt", "w" );
    if ( output == NULL )
    {
        fprintf( stderr, "Error opening output file!\n" );
        exit( EXIT_FAILURE );
    }

    //read one line of input per loop iteration
    while ( fgets( line, sizeof line, input ) != NULL )
    {
        bool is_duplicate = false;
        char *p;

        //find newline character
        p = strchr( line, '\n' );

        //make sure that input buffer was large enough to
        //read entire line, and remove newline character
        //if it exists
        if ( p == NULL )
        {
            if ( !feof( input ) )
            {
                fprintf( stderr, "Line was too long for input buffer!\n" );
                exit( EXIT_FAILURE );
            }
        }
        else
        {
            //remove newline character
            *p = '\0';
        }

        //determine whether line is duplicate
        for ( int i = 0; i < num_strings; i   )
        {
            if ( strcmp( line, strings[i] ) == 0 )
            {
                is_duplicate = true;
                break;
            }
        }

        if ( !is_duplicate )
        {
            //remember string
            strcpy( strings[num_strings  ], line );

            //write string to output file
            fprintf( output, "%s\n", line );
        }
    }

    //cleanup
    fclose( output );
    fclose( input );
}

Given the input

String1
String2
String3
String4
String5
String1
String6
String2
String1
String7
String8
String1
String2

this program has the following output:

String1
String2
String3
String4
String5
String6
String7
String8

As you can see, all duplicate strings were properly filtered out of the output.

However, using a statically sized array is a bit of a waste of space, and it also imposes a hard limit. Therefore, it may be better to use dynamic memory allocation instead:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>

#define INITIAL_CAPACITY 100
#define MAX_LINE_LEN 200

int main( void )
{
    FILE *input, *output;
    char **strings;
    size_t strings_capacity;
    int num_strings = 0;
    char line[MAX_LINE_LEN];

    //open input file
    input = fopen( "input.txt", "r" );
    if ( input == NULL )
    {
        fprintf( stderr, "Error opening input file!\n" );
        exit( EXIT_FAILURE );
    }

    //open output file
    output = fopen( "output.txt", "w" );
    if ( output == NULL )
    {
        fprintf( stderr, "Error opening output file!\n" );
        exit( EXIT_FAILURE );
    }

    //set capacity of "strings" array to INITIAL_CAPACITY
    strings_capacity = INITIAL_CAPACITY;
    strings = malloc( strings_capacity * sizeof *strings );
    if ( strings == NULL )
    {
        fprintf( stderr, "Memory allocation failure!\n" );
        exit( EXIT_FAILURE );
    }
    
    //read one line of input per loop iteration
    while ( fgets( line, sizeof line, input ) != NULL )
    {
        bool is_duplicate = false;
        char *p;

        //find newline character
        p = strchr( line, '\n' );

        //make sure that input buffer was large enough to
        //read entire line, and remove newline character
        //if it exists
        if ( p == NULL )
        {
            if ( !feof( input ) )
            {
                fprintf( stderr, "Line was too long for input buffer!\n" );
                exit( EXIT_FAILURE );
            }
        }
        else
        {
            //remove newline character
            *p = '\0';
        }

        //determine whether line is duplicate
        for ( int i = 0; i < num_strings; i   )
        {
            if ( strcmp( line, strings[i] ) == 0 )
            {
                is_duplicate = true;
                break;
            }
        }

        if ( !is_duplicate )
        {
            //expand capacity of "strings" array if necessary
            if ( num_strings == strings_capacity )
            {
                strings_capacity *= 2;
                strings = realloc( strings, strings_capacity * sizeof *strings );
                if ( strings == NULL )
                {
                    fprintf( stderr, "Memory allocation failure!\n" );
                    exit( EXIT_FAILURE );
                }    
            }

            //remember string
            strings[num_strings] = malloc( strlen( line )   1 );
            if ( strings[num_strings] == NULL )
            {
                fprintf( stderr, "Memory allocation failure!\n" );
                exit( EXIT_FAILURE );
            }   
            strcpy( strings[num_strings], line );
            num_strings  ;

            //write string to output file
            fprintf( output, "%s\n", line );
        }
    }

    //cleanup

    //free all dynamically allocated memory
    for ( int i = 0; i < num_strings; i   )
        free( strings[i] );
    free( strings );

    //close file handles
    fclose( output );
    fclose( input );
}
  •  Tags:  
  • c
  • Related