I have a csv file with 1 column where each cell has a list with multiple strings. I have created a function that adds HTML tags depending on the symbol (or absence thereof) that precedes each string. When I try to insert the code into a function, to have it applied to each cell, Python removes most of the strings (leaving only one per cell). Instead, I would like the same strings I had before, but with the added HTML tags. Could someone help me by fixing the function? Thank you! The code:
import pandas as pd
df = pd.read_csv("input_file.csv")
def insert_HTML(row):
single_dash_prev_line = False
double_dash_prev_line = False
for line in row["text"]:
# print(line[0:2])
current_line = line
if line[0:1] != "$":
# print(line[0:1])
new_line = "</li></ul></b></strong><p>" current_line "</p>"
return new_line
elif line[0:1] == "$":
# print("got here first")
if line[0:2] != "$$":
# print(line[1:2])
if single_dash_prev_line == False:
new_line = "</b></strong><ul><li>" current_line[1:]
return new_line
single_dash_prev_line = True
elif single_dash_prev_line == True:
new_line = "</b></strong></li><li>" current_line[1:]
return new_line
single_dash_prev_line = True
elif line[0:2] == "$$":
# print("got here")
if single_dash_prev_line == True:
new_line = "</li></ul><b><strong>" line[2:]
return new_line
double_dash_prev_line = True
elif double_dash_prev_line == True:
new_line = "</b></strong></li></ul>" line[2:]
return new_line
double_dash_prev_line = True
elif single_dash_prev_line == True:
new_line = "</b></strong></li></ul>" current_line
return new_line
single_dash_prev_line = False
elif double_dash_prev_line == True:
new_line = "</b></strong></li></ul>" current_line[1:]
return new_line
single_dash_prev_line = False
df["new_text"] = df.apply(lambda row: insert_HTML(row), axis=1)
This is how the text of one cell looks like:
['$String1.',
'$String2.',
'$String3).',
'$String4.',
'String5.',
'$$String6.',
'String7.',
'']
This is how the output should look like (I am aware that the HTML code is not clean; I will take care of that with an another tool):
</b></strong><ul><li>String1.
</b></strong></li><li>String2.
</b></strong></li><li>String3.
</b></strong></li><li>String4.
</li></ul></b></strong><p>String5.</p>
</li></ul><b><strong>String6.
</li></ul></b></strong><p>String7.</p>
</li></ul></b></strong><p></p>
CodePudding user response:
Instead of "return new_line" for every item in the list just keep on appending the intermediate result into a string separated by a new line (result = new_line "\n"). Then return the string "result" once all the items in the list are dealt with.
Here's the code.
import pandas as pd
df = pd.read_csv("input_file.csv")
def insert_HTML(row):
single_dash_prev_line = False
double_dash_prev_line = False
result = ""
for line in row["text"]:
# print(line[0:2])
current_line = line
if line[0:1] != "$":
# print(line[0:1])
new_line = "</li></ul></b></strong><p>" current_line "</p>"
result = new_line "\n"
elif line[0:1] == "$":
# print("got here first")
if line[0:2] != "$$":
# print(line[1:2])
if single_dash_prev_line == False:
new_line = "</b></strong><ul><li>" current_line[1:]
result = new_line "\n"
single_dash_prev_line = True
elif single_dash_prev_line == True:
new_line = "</b></strong></li><li>" current_line[1:]
result = new_line "\n"
single_dash_prev_line = True
elif line[0:2] == "$$":
# print("got here")
if single_dash_prev_line == True:
new_line = "</li></ul><b><strong>" line[2:]
result = new_line "\n"
double_dash_prev_line = True
elif double_dash_prev_line == True:
new_line = "</b></strong></li></ul>" line[2:]
result = new_line "\n"
double_dash_prev_line = True
elif single_dash_prev_line == True:
new_line = "</b></strong></li></ul>" current_line
result = new_line "\n"
single_dash_prev_line = False
elif double_dash_prev_line == True:
new_line = "</b></strong></li></ul>" current_line[1:]
result = new_line "\n"
single_dash_prev_line = False
return result
df["new_text"] = df.apply(lambda row: insert_HTML(row), axis=1)