I have he below code like this which splits the address column and pops if length of add dict equal to 4 or 5(see code).
import pandas as pd
data = {'address': ["William J. Clare\\n290 Valley Dr.\\nCasper, WY 82604\\nUSA",
"1180 Shelard Tower\\nMinneapolis, MN 55426\\nUSA",
"William N. Barnard\\n145 S. Durbin\\nCasper, WY 82601\\nUSA",
"215 S 11th ST"]
}
df = pd.DataFrame(data)
df_dict = df.to_dict('records')
for row in df_dict:
add = row["address"]
print(add.split("\\n"), len(add.split("\\n")))
if len(add.split("\\n"))==4:
target = add.split("\\n")
target.pop(0)
target = '\\n'.join(target)
if len(add.split("\\n"))==5:
target = add.split("\\n")
target.pop(0)
target.pop(1)
target = '\\n'.join(target)
print(target)
However, instead of giving condition to pop, I would like to retain last three elements of the dict with if len(add.split("\\n") > 3
I need a command which retains only last three elements instead of popping the elements.
like this:
address
290 Valley Dr.\\nCasper, WY 82604\\nUSA
1180 Shelard Tower\\nMinneapolis, MN 55426\\nUSA
145 S. Durbin\\nCasper, WY 82601\\nUSA
215 S 11th ST
``
Your help will be greatly appreciated. Thanks in advance
CodePudding user response:
Instead of using pop(), it might work to use slicing, like target = target[-3:]
. This tells it to take only the last three items in the list.
CodePudding user response:
Simple use slicing in a if-else
condition
CODE:
import pandas as pd
data = {'address': ["William J. Clare\\n290 Valley Dr.\\nCasper, WY 82604\\nUSA",
"1180 Shelard Tower\\nMinneapolis, MN 55426\\nUSA",
"William N. Barnard\\n145 S. Durbin\\nCasper, WY 82601\\nUSA",
"215 S 11th ST"]
}
df = pd.DataFrame(data)
df_dict = df.to_dict('records')
for row in df_dict:
add = row["address"]
#print("original data",add.split("\\n"), len(add.split("\\n")))
if len(add.split("\\n"))==4:
target = add.split("\\n")[1:]
target = '\\n'.join(target)
elif len(add.split("\\n"))==5:
target = add.split("\\n")[2:]
target = '\\n'.join(target)
elif len(add.split("\\n"))<=3:
target = add.split("\\n")
target = '\\n'.join(target)
print(target)
OUTPUT:
290 Valley Dr.\nCasper, WY 82604\nUSA
1180 Shelard Tower\nMinneapolis, MN 55426\nUSA
145 S. Durbin\nCasper, WY 82601\nUSA
215 S 11th ST
more optimize code it can work for >3
even if it is 4
or a '100`.
for row in df_dict:
add = row["address"]
#print("original data",add.split("\\n"), len(add.split("\\n")))
length=len(add.split("\\n"))
print(length)
if len(add.split("\\n"))<=3:
target = add.split("\\n")
target = '\\n'.join(target)
if len(add.split("\\n"))>3:
lengths=length-3
target = add.split("\\n")[lengths:]
target = '\\n'.join(target)
print(target)