I have many rows that look like the following picture. I am trying to create a new row for each value in the very long "zones" column. Each new row would copy the data from the first three columns and leave the next "zone" value in the column.
I believe that the explode method within pandas is what I need, but my data isnt breaking out from the list in the manner I was expecting.
#Constants and Public Variables
df = pd.read_excel("input.xlsx", sheet_name=0, usecols='D,G,H,K')
df = df.dropna()
new_header = df.iloc[0] #grab the first row for the header
df = df[1:] #take the data less the header row
df.columns = new_header #set the header row as the df header
zones = [['CA2,SW1,SW3,SW2,STH2,STH3,STH0,NTH0,DR1,DR2,MID1,MID2,MID3,NE1,NE2,NE3,NTH1,NTH2,NTH3,PLN1,PLN2,PLN3,NW1,NW2'],['CA2,SW1,SW3,SW2,STH2,STH3,STH0,NTH0,DR1,DR2,MID1,MID2,MID3,NE1,NE2,NE3,NTH1,NTH2,NTH3,PLN1,PLN2,PLN3,NW1,NW2'],['CA2,SW1,SW3,SW2,STH2,STH3,STH0,NTH0,DR1,DR2,MID1,MID2,MID3,NE1,NE2,NE3,NTH1,NTH2,NTH3,PLN1,PLN2,PLN3,NW1,NW2']]
replace_values = ['All Zones', 'All Zones ', 'all']
df = df.replace(to_replace=replace_values, value=zones)
df = df.explode("ZONES")
df.to_csv("outout.csv")
CodePudding user response:
Try this:
import pandas as pd
id = [3609112]
reg_price = [3.99]
promo_price = [3.99]
zones = ["CA2,SW1,SW3,SW2"]
df = pd.DataFrame(id, columns=['id'])
df['reg_price'] = reg_price
df['promo_price'] = promo_price
df['zones'] = zones
def convert_to_list(row):
arr = row.split(',')
l = [x for x in arr]
return l
df['zones'] = df['zones'].apply(convert_to_list)
print(df.explode('zones'))