I need to convert a json file to csv file. The json file is nested as the following: as you see, for each user, there may be one role or multiple roles.
[{"userId": "82fac301", "login": "aa",
"roles": [{"dateCreated": "2019-03-25T21:18:47.438", "dateModified": "2022-04-02T02:40:48.216", "roleId": "e818ddc2"}]},
{"userId": "88998d95", "login": "ab","roles": [
{"dateCreated": "2019-03-25T21:18:48.673", "dateModified": "2022-04-02T02:40:48.217", "roleId": "007e9495"}]}, {"userId": "dd4804cb", "login": "ac", "roles": [{"dateCreated": "2019-03-25T21:18:47.438", "dateModified": "2022-04-02T02:40:48.216", "roleId": "e818ddc2"},{"dateCreated": "2019-03-25T21:18:47.438",
"dateModified": "2022-04-02T02:40:48.216",
"roleId": "e818ddc2"}, {"dateCreated": "2019-03-25T21:18:49.056", "dateModified": "2022-04-02T02:40:48.217", "roleId": "5913416f"}]}]
then in csv file, I'd like the columns header look like this (the columns of roles can be dynamic since it's possible the user has up to 10 roles):
userId login roles_1_datecreated roles_1_dateModified roles_1_roleId roles_2_datecreated roles_2_dateModified roles_2_roleId roles_3_...
Thanks.
CodePudding user response:
This is likely not prettiest solution, but it will work and accepts an unlimited number of roles.
import pandas as pd
data = [{"userId": "82fac301", "login": "aa", "roles": [{"dateCreated": "2019-03-25T21:18:47.438", "dateModified": "2022-04-02T02:40:48.216", "roleId": "e818ddc2"}]}, {"userId": "88998d95", "login": "ab","roles": [ {"dateCreated": "2019-03-25T21:18:48.673", "dateModified": "2022-04-02T02:40:48.217", "roleId": "007e9495"}]}, {"userId": "dd4804cb", "login": "ac", "roles": [{"dateCreated": "2019-03-25T21:18:47.438", "dateModified": "2022-04-02T02:40:48.216", "roleId": "e818ddc2"},{"dateCreated": "2019-03-25T21:18:47.438", "dateModified": "2022-04-02T02:40:48.216", "roleId": "e818ddc2"}, {"dateCreated": "2019-03-25T21:18:49.056", "dateModified": "2022-04-02T02:40:48.217", "roleId": "5913416f"}]}]
def csv_converter(contents):
headers = ["userId", "login"]
role_string = "roles_{0}_{1}"
rows = []
for userdict in contents:
row = [userdict["userId"], userdict["login"]]
for i, role in enumerate(userdict["roles"]):
for k,v in role.items():
header = role_string.format(i 1, k)
if header not in headers:
headers.append(header)
row.append(v)
rows.append(row)
rows = [row ([""]*(len(headers) - len(row))) for row in rows]
df = pd.DataFrame(data=rows, columns=headers)
df.to_csv('csvfile.csv')
# write_to_file(headers, rows)
#def write_to_file(headers, rows):
# with open("csvfile.csv","wt") as fd:
# fd.write(",".join(headers) "\n")
# for line in rows:
# amount = len(headers) - len(line)
# if amount > 0:
# line = line ([""]*amount)
# fd.write(",".join(line) "\n")
csv_converter(data)