I have a dictionary like this
data_dict = {'Company': [
{
'Date': [{'L_End': {'@value': '2013-03-31'},
'L_Start': {'@value': '2006-10-01'},
'Type': {'@value': 'L'}},
{'O_Start': {'@value': '2006-10-01'},
'Type': {'@value': 'O'}}],
'GeoLoc': {'Location': {'AddrLn1': 'HIGHLAND ROAD',
'Country': 'ENGLAND',
'County': 'HAMPSHIRE',
'PostCode': 'PO49HU',
'Town': 'SOUTHSEA',
'UPRN': Decimal('1775')}},
'Name': 'Courtney',
'OrgId': {'@assigningAuthorityName': 'ABCDE',
'@extension': 'a12cde',
'@root': '2.16.840.1.1'},
'Status': {'@value': 'Active'},
},
{
'Date': [{'L_End': {'@value': '2013-03-31'},
'L_Start': {'@value': '2006-10-01'},
'Type': {'@value': 'L'}},
{'O_Start': {'@value': '2006-10-01'},
'Type': {'@value': 'O'}}],
'GeoLoc': {'Location': {'AddrLn1': 'VILLIERS ROAD',
'Country': 'ENGLAND',
'County': 'HAMPSHIRE',
'PostCode': 'PO52HG',
'Town': 'SOUTHSEA'}},
'Name': 'MERLIN',
'OrgId': {'@assigningAuthorityName': 'ABCDE',
'@extension': 'b12cde',
'@root': '2.16.840.1.1'},
'Status': {'@value': 'Active'}
}
]
}
My expected output is Please Click Image
I have tried doing this using below code and it works perfectly but I want to see if I can read specific data instead of splitting each row into columns multiple times.
df = pd.DataFrame.from_dict(data_dict)
df1 = pd.DataFrame(df.Company.values.tolist())
df2 = pd.concat([df1, df1.OrgId.apply(lambda x: pd.Series(x))], axis=1)
df2 = df2.drop(['OrgId', '@root', '@assigningAuthorityName'], axis=1)
df2 = df2.rename(columns={"@extension": "OrgId"})
df3 = pd.concat([df2, df2.Date.apply(lambda x: pd.Series(x))], axis=1)
df3 = df3.drop('Date', axis=1)
df4 = pd.concat([df3, df3[0].apply(lambda x: pd.Series(x))], axis=1)
df4 = df4.drop([0, 'Type'], axis=1)
....Followed the same until I got all the columns as I needed them to be
df11 = pd.concat([df10, df10.Location.apply(lambda x: pd.Series(x))], axis=1)
df11 = df11.drop('Location', axis=1)
final_df = df11.copy()
print(final_df)
What's the best way to do this?
CodePudding user response:
Try:
lst = [
{
"Name": c.get("Name"),
"OrgId": c.get("OrgId", {}).get("@extension"),
"L_Start": next((d for d in c.get("Date", []) if "L_Start" in d), {})
.get("L_Start", {})
.get("@value"),
"L_End": next((d for d in c.get("Date", []) if "L_End" in d), {})
.get("L_End")
.get("@value"),
"Status": c.get("Status", {}).get("@value"),
**c.get("GeoLoc", {}).get("Location", {}),
}
for c in data_dict["Company"]
]
df = pd.DataFrame(lst)
print(df)
Prints:
Name OrgId L_Start L_End Status AddrLn1 Country County PostCode Town UPRN
0 Courtney a12cde 2006-10-01 2013-03-31 Active HIGHLAND ROAD ENGLAND HAMPSHIRE PO49HU SOUTHSEA 1775
1 MERLIN b12cde 2006-10-01 2013-03-31 Active VILLIERS ROAD ENGLAND HAMPSHIRE PO52HG SOUTHSEA NaN