Home > Software design >  How to create a function that transforms multiple strings inside csv cells with python?
How to create a function that transforms multiple strings inside csv cells with python?

Time:02-20

I have a csv file with 1 column where each cell has a list with multiple strings. I have created a function that adds HTML tags depending on the symbol (or absence thereof) that precedes each string. When I try to insert the code into a function, to have it applied to each cell, Python removes most of the strings (leaving only one per cell). Instead, I would like the same strings I had before, but with the added HTML tags. Could someone help me by fixing the function? Thank you! The code:

import pandas as pd
df = pd.read_csv("input_file.csv")

def insert_HTML(row):

    single_dash_prev_line = False
    double_dash_prev_line = False

    for line in row["text"]:
        # print(line[0:2])
        current_line = line
    
        if line[0:1] != "$":
            # print(line[0:1])
            new_line = "</li></ul></b></strong><p>"   current_line   "</p>"
            return new_line
    
        elif line[0:1] == "$":
            # print("got here first")
            if line[0:2] != "$$":
                # print(line[1:2])
                if single_dash_prev_line == False:
                    new_line = "</b></strong><ul><li>"   current_line[1:]
                    return new_line
                    single_dash_prev_line = True
                elif single_dash_prev_line == True:
                    new_line = "</b></strong></li><li>"   current_line[1:]
                    return new_line
                    single_dash_prev_line = True
        
            elif line[0:2] == "$$":
                # print("got here")
                if single_dash_prev_line == True:
                    new_line = "</li></ul><b><strong>"   line[2:]
                    return new_line
                    double_dash_prev_line = True
                elif double_dash_prev_line == True:
                    new_line = "</b></strong></li></ul>"   line[2:]
                    return new_line
                    double_dash_prev_line = True
    
        elif single_dash_prev_line == True:
            new_line = "</b></strong></li></ul>"   current_line
            return new_line
            single_dash_prev_line = False

        elif double_dash_prev_line == True:
            new_line = "</b></strong></li></ul>"   current_line[1:]
            return new_line
            single_dash_prev_line = False

df["new_text"] = df.apply(lambda row: insert_HTML(row), axis=1)

This is how the text of one cell looks like:

['$String1.',
 '$String2.',
 '$String3).',
 '$String4.',
 'String5.',
 '$$String6.',
 'String7.',
 '']

This is how the output should look like (I am aware that the HTML code is not clean; I will take care of that with an another tool):

</b></strong><ul><li>String1.
</b></strong></li><li>String2.
</b></strong></li><li>String3.
</b></strong></li><li>String4.
</li></ul></b></strong><p>String5.</p>
</li></ul><b><strong>String6.
</li></ul></b></strong><p>String7.</p>
</li></ul></b></strong><p></p>

CodePudding user response:

Instead of "return new_line" for every item in the list just keep on appending the intermediate result into a string separated by a new line (result = new_line "\n"). Then return the string "result" once all the items in the list are dealt with.

Here's the code.

import pandas as pd
df = pd.read_csv("input_file.csv")

def insert_HTML(row):

    single_dash_prev_line = False
    double_dash_prev_line = False
    result = ""

    for line in row["text"]:
        # print(line[0:2])
        current_line = line
    
        if line[0:1] != "$":
            # print(line[0:1])
            new_line = "</li></ul></b></strong><p>"   current_line   "</p>"
            result  = new_line   "\n"
    
        elif line[0:1] == "$":
            # print("got here first")
            if line[0:2] != "$$":
                # print(line[1:2])
                if single_dash_prev_line == False:
                    new_line = "</b></strong><ul><li>"   current_line[1:]
                    result  = new_line   "\n"
                    single_dash_prev_line = True
                elif single_dash_prev_line == True:
                    new_line = "</b></strong></li><li>"   current_line[1:]
                    result  = new_line   "\n"
                    single_dash_prev_line = True
        
            elif line[0:2] == "$$":
                # print("got here")
                if single_dash_prev_line == True:
                    new_line = "</li></ul><b><strong>"   line[2:]
                    result  = new_line   "\n"
                    double_dash_prev_line = True
                elif double_dash_prev_line == True:
                    new_line = "</b></strong></li></ul>"   line[2:]
                    result  = new_line   "\n"
                    double_dash_prev_line = True
    
        elif single_dash_prev_line == True:
            new_line = "</b></strong></li></ul>"   current_line
            result  = new_line   "\n"
            single_dash_prev_line = False

        elif double_dash_prev_line == True:
            new_line = "</b></strong></li></ul>"   current_line[1:]
            result  = new_line   "\n"
            single_dash_prev_line = False
    
    return result

df["new_text"] = df.apply(lambda row: insert_HTML(row), axis=1)
  • Related