I am trying to flatten API response. This is the response
data = [{
"id": 1,
"status": "Public",
"Options": [
{
"id": 8,
"pId": 9
},
{
"id": 10,
"pId": 11
}
]
},
{
"id": 2,
"status": "Public",
"Options": [
{
"id": 12,
"pId": 13
},
{
"id": 14,
"pId": 15
}
]
}
]
I am trying to do this(applying ast literal eval, df.pop and json normalize). And then i am concatinating the results
def pop(child_df, column_value):
child_df = child_df.dropna(subset=[column_value])
if isinstance(child_df[column_value][0], str):
print("yes")
child_df[column_value] = child_df[column_value].apply(ast.literal_eval)
normalized_json = [json_normalize(x) for x in child_df.pop(column_value)]
expanded_child_df = child_df.join(pd.concat(normalized_json, ignore_index=True, sort=False).add_prefix(column_value '_'))
expanded_child_df.columns = [str(col).replace('\r','') for col in expanded_child_df.columns]
expanded_child_df.columns = map(str.lower, expanded_child_df.columns)
return expanded_child_df
df = pd.DataFrame.from_dict(data)
df2 = pop(df,'Options')
This is the output i am getting
id status options_id options_pid
0 1 Public 8 9
1 2 Public 10 11
But the code is skipping some values inside the Options
list.
This is the expected output
id status options_id options_pid
0 1 Public 8 9
1 1 Public 10 11
2 2 Public 12 13
3 2 Public 14 15
What am i missing here ?
CodePudding user response:
df = pd.json_normalize(data, record_path="Options", meta=['id','status'], record_prefix='options.')
CodePudding user response:
you can use:
df=pd.json_normalize(data).explode('Options')
df=df.join(df['Options'].apply(pd.Series).add_prefix('options_')).drop(['Options'],axis=1).drop_duplicates()
print(df)
'''
id status optionsid optionspId
0 1 Public 8 9
0 1 Public 10 11
1 2 Public 12 13
1 2 Public 14 15
'''
CodePudding user response:
df = pd.json_normalize(data).explode('Options')
tmp= df['Options'].apply(pd.Series)
df = pd.concat([df[['id', 'status']], tmp], axis=1)
print(df)