I have some data that looks like this:
this_data = [{
"Name": "Bluefox",
"Sub Name": "Moonglow",
"Time Series": "{'2022-07-06': 9.5, '2022-07-07': 7.2, '2022-07-08': 10.3}",
"Probability": "{'2022-07-06': 0.2, '2022-07-07': 0.3, '2022-07-08': 0.5}",
"Max Value": 466888785.24275005,
},{
"Name": "Blackbird",
"Sub Name": "Skylight",
"Time Series": "{'2022-07-06': -16240599.020647092, '2022-07-07': -17984033.390385196}",
"Probability": "{'2022-07-06': 0.6, '2022-07-07': 0.7}",
"Max Value": 81509865.34667145,
},{
"Name": "Bluefox",
"Sub Name": "Skylight",
"Time Series": "{'2022-07-06': -123000, '2022-07-07': -245100}",
"Probability": "{'2022-07-06': 0.0, '2022-07-07': 0.0}",
"Max Value": 90409417.34667145,
}]
And I want to transform this into:
{'Bluefox': {
'Moonglow': {
'date': {
'2022-07-06': {
'Time Series' : 9.5,
'Probability' : 0.2,
},
'2022-07-07' : {
'Time Series' : 7.2,
'Probability' : 0.3,
},
'2022-07-08' : {
'Time Series' : 10.3,
'Probability' : 0.5,
}
},
'Max Value' : 466888785.24275005
},
'Skylight':{
'date': {}
}
}
},
{'Blackbird': {
'Moonglow': {
'date': {
'2022-07-06': {
'Time Series' : 9.5,
'Probability' : 0.2,
}
},
'Max Value' : 466888785.24275005
}
}
}
I am trying something like this:
import json
import ast
from collections import defaultdict
entity_to_cp = {
'Bluefox' : ['Moonglow', 'Skylight'],
'Blackbird' : ['Skylight']
}
inner = defaultdict(list)
between = defaultdict(dict)
between2 = defaultdict(dict)
outer = defaultdict(dict)
for each_dict in this_data:
for label, all_values in each_dict.items():
if label == "Name":
outer[all_values] = between
cur_e = all_values
if label == "Sub Name":
between[all_values] = between2
cur_cp = all_values
try:
if cur_cp in entity_to_cp[cur_e]:
try:
all_values = ast.literal_eval(all_values)
for k,v in all_values.items():
print(k)
between2[k] = inner
inner[label].append(v)
# inner[label].append(all_values)
except AttributeError as e:
print(e)
except SyntaxError as e:
print(e)
except ValueError as e:
print(e)
except NameError as e:
print(e)
But this doesn't work and I get duplicated values all over the place. Help!
CodePudding user response:
I don't think there is a direct way to export "Name" and "Sub Name" on different levels but you can do it by looping over your dataframes grouped by "Name" (using pandas). There are additional difficulties like converting your strings to dictionaries and switching the levels of the dates and "Time Series"/"Probability" keys:
import json
import pandas as pd
import ast
this_data = [{
"Name": "Bluefox",
"Sub Name": "Moonglow",
"Time Series": "{'2022-07-06': 9.5, '2022-07-07': 7.2, '2022-07-08': 10.3}",
"Probability": "{'2022-07-06': 0.2, '2022-07-07': 0.3, '2022-07-08': 0.5}",
"Max Value": 466888785.24275005,
},{
"Name": "Blackbird",
"Sub Name": "Skylight",
"Time Series": "{'2022-07-06': -16240599.020647092, '2022-07-07': -17984033.390385196}",
"Probability": "{'2022-07-06': 0.6, '2022-07-07': 0.7}",
"Max Value": 81509865.34667145,
},{
"Name": "Bluefox",
"Sub Name": "Skylight",
"Time Series": "{'2022-07-06': -123000, '2022-07-07': -245100}",
"Probability": "{'2022-07-06': 0.0, '2022-07-07': 0.0}",
"Max Value": 90409417.34667145,
}]
df = pd.DataFrame(this_data)
record = []
for name, sub_df in df.groupby('Name'):
sub_df.set_index('Sub Name', inplace=True)
# converting your strings to dict:
sub_df[['Time Series', 'Probability']] = sub_df[['Time Series', 'Probability']].applymap(ast.literal_eval)
# switching key levels:
sub_df['date'] = [pd.DataFrame(dic).to_dict(orient="index")
for dic in sub_df[['Time Series', 'Probability']].to_dict(orient="records")]
sub_df.drop(['Name', 'Time Series', 'Probability'], axis=1, inplace=True)
record.append({name: sub_df.to_dict(orient="index")})
print(json.dumps(record, indent=4))
Output:
[
{
"Blackbird": {
"Skylight": {
"Max Value": 81509865.34667145,
"date": {
"2022-07-06": {
"Time Series": -16240599.020647092,
"Probability": 0.6
},
"2022-07-07": {
"Time Series": -17984033.390385196,
"Probability": 0.7
}
}
}
}
},
{
"Bluefox": {
"Moonglow": {
"Max Value": 466888785.24275005,
"date": {
"2022-07-06": {
"Time Series": 9.5,
"Probability": 0.2
},
"2022-07-07": {
"Time Series": 7.2,
"Probability": 0.3
},
"2022-07-08": {
"Time Series": 10.3,
"Probability": 0.5
}
}
},
"Skylight": {
"Max Value": 90409417.34667145,
"date": {
"2022-07-06": {
"Time Series": -123000,
"Probability": 0.0
},
"2022-07-07": {
"Time Series": -245100,
"Probability": 0.0
}
}
}
}
}
]