i have this csv array:
number,event_date,event_timestamp,event_name,event_params
0,20220315,1668314165054758,eventTracking1,"[{'key': 'test0', 'value': {'string_value': None1, 'int_value': 1662354225, 'float_value': None, 'double_value': None}}
{'key': 'test2', 'value': {'string_value': 'http:\test1.com', 'int_value': None, 'float_value': None, 'double_value': None}}
{'key': 'test3', 'value': {'string_value': '[email protected]', 'int_value': None, 'float_value': None, 'double_value': None}}]"
1,20220325,1668445654758556,eventTracking2,"[{'key': 'test5', 'value': {'string_value': None3, 'int_value': 16658884225, 'float_value': None, 'double_value': None}}
{'key': 'test7', 'value': {'string_value': 'http:\test2.com', 'int_value': None, 'float_value': None, 'double_value': None}}
{'key': 'test8', 'value': {'string_value': '[email protected]', 'int_value': None, 'float_value': None, 'double_value': None}}]"
2,20220335,1668317775054758,eventTracking3,"[{'key': 'test10', 'value': {'string_value': None5, 'int_value': 1662454225, 'float_value': None, 'double_value': None}}
{'key': 'test12', 'value': {'string_value': 'http:\test3.com', 'int_value': None, 'float_value': None, 'double_value': None}}
{'key': 'test13', 'value': {'string_value': '[email protected]', 'int_value': None, 'float_value': None, 'double_value': None}}]"
what i want to have is
number,event_date,event_timestamp,event_name,event_params1,event_params2
0,20220315,1668314165054758,eventTracking1,test0,None1
0,20220315,1668314165054758,eventTracking1,test2,http:\test1.com
0,20220315,1668314165054758,eventTracking1,test3,[email protected]
1,20220325,1668445654758556,eventTracking2,test6,None4
1,20220325,1668445654758556,eventTracking2,test7,http:\test2.com
1,20220325,1668445654758556,eventTracking2,test8,[email protected]
2,20220335,1668317775054758,eventTracking3,test10,None5
2,20220335,1668317775054758,eventTracking3,test12,http:\test3.com
2,20220335,1668317775054758,eventTracking3,test13,[email protected]
i have this Python code to run on csv file above. But the Code doesn't loop through all the rows, it reads the row 2 instead of row 0,1 and 2. i am newbie with python, i have tried to fix it. but i couldn't Can you please help? Thank you
import pandas as pd
import re
# >>> re.sub(r'([A-Z])(?!$)', r'\1,', 'ABC')
df = pd.read_csv("check.csv")
print(df)
for i in range(3):
print(i)
# It will take the list/dict from the relevant column
dc = df.event_params.iloc[i]
print(dc)
# The list is actually a string so we need to you eval
# Before the eval we need to fix the dict by adding comma after }}
r = re.sub("}}", r"}},", dc)
ev = eval(r)
# To get the number and event_date I just taking the column value
number = df.number.iloc[i]
event_date = df.event_date.iloc[i]
event_timestamp = df.event_timestamp.iloc[i]
event_name = df.event_name.iloc[i]
# Now lets loop over the dict and get the needed values
event_params1 = []
event_params2 = []
for i in ev:
print(i['value'])
print(i.keys())
event_params1.append(i['key'])
event_params2.append(i['value']['string_value'])
# Now creating final dataFrame and inserting all the values
df_final = pd.DataFrame()
df_final['event_params1'] = event_params1
df_final['event_params2'] = event_params2
df_final['number'] = number
df_final['event_date'] = event_date
df_final['event_timestamp'] = event_timestamp
df_final['event_name'] = event_name
df_final = df_final[['number', 'event_date', 'event_timestamp', 'event_name', 'event_params1',
'event_params2']]
print(df_final)
# Save as csv
df_final.to_csv("This_is_what_you_nees.csv")
CodePudding user response:
for i in range(3):
print(i)
# It will take the list/dict from the relevant column
dc = df.event_params.iloc[i]
print(dc)
"dc = df.event_params.iloc[i]" line in your code above is no indentation, the correct syntax is:
for i in range(3):
print(i)
# It will take the list/dict from the relevant column
dc = df.event_params.iloc[i]
print(dc)
CodePudding user response:
Here's another way you could do it that makes use of some regular expressions and pandas' Series.apply and DataFrame.explode functions
import pandas as pd
import re
df = pd.read_csv("check.csv")
key_regex = r"(?<=(\'key\': ))[^,]*(?=,)"
val_regex = r"(?<=(\'string_value\': ))[^,]*(?=,)"
def get_keys(x):
return [re.search(key_regex, line).group().replace("'", "") for line in x.split("\n")]
def get_vals(x):
return [re.search(val_regex, line).group().replace("'", "") for line in x.split("\n")]
df["event_params1"] = df["event_params"].apply(get_keys)
df["event_params2"] = df["event_params"].apply(get_vals)
del df["event_params"]
final_df = df.explode(["event_params1", "event_params2"])
print(final_df)