Home > Net >  Data manipulation to nested dictionaries?
Data manipulation to nested dictionaries?

Time:07-19

I have some data that looks like this:

this_data = [{
    "Name": "Bluefox",
    "Sub Name": "Moonglow",
    "Time Series": "{'2022-07-06': 9.5, '2022-07-07': 7.2, '2022-07-08': 10.3}",
    "Probability": "{'2022-07-06': 0.2, '2022-07-07': 0.3, '2022-07-08': 0.5}",
    "Max Value": 466888785.24275005,
},{
    "Name": "Blackbird",
    "Sub Name": "Skylight",
    "Time Series": "{'2022-07-06': -16240599.020647092, '2022-07-07': -17984033.390385196}",
    "Probability": "{'2022-07-06': 0.6, '2022-07-07': 0.7}",
    "Max Value": 81509865.34667145,
},{
    "Name": "Bluefox",
    "Sub Name": "Skylight",
    "Time Series": "{'2022-07-06': -123000, '2022-07-07': -245100}",
    "Probability": "{'2022-07-06': 0.0, '2022-07-07': 0.0}",
    "Max Value": 90409417.34667145,
}]

And I want to transform this into:

{'Bluefox': {
    'Moonglow': {
        'date': {
            '2022-07-06': {
                'Time Series' : 9.5,
                'Probability' : 0.2,
                
            },
            '2022-07-07' : {
                'Time Series' : 7.2,
                'Probability' : 0.3,

            },
            '2022-07-08' : {
                'Time Series' : 10.3,
                'Probability' : 0.5,

            }
        },
        'Max Value' : 466888785.24275005
    },
    'Skylight':{
        'date': {}
        }
    }
},
{'Blackbird': {
    'Moonglow': {
        'date': {
            '2022-07-06': {
                'Time Series' : 9.5,
                'Probability' : 0.2,
                }
            },
        'Max Value' : 466888785.24275005
        }
    }
}

I am trying something like this:

import json
import ast
from collections import defaultdict

entity_to_cp = {
    'Bluefox' : ['Moonglow', 'Skylight'],
    'Blackbird' : ['Skylight']
}


inner = defaultdict(list)
between = defaultdict(dict)
between2 = defaultdict(dict)
outer = defaultdict(dict)
for each_dict in this_data:
    for label, all_values in each_dict.items():
        if label == "Name":
            outer[all_values] = between
            cur_e = all_values
        if label == "Sub Name":
            between[all_values] = between2
            cur_cp = all_values
        
        try:
            if cur_cp in entity_to_cp[cur_e]:
                try:
                    all_values = ast.literal_eval(all_values)
                    for k,v in all_values.items():
                        print(k)
                        between2[k] = inner
                        inner[label].append(v)
                    # inner[label].append(all_values)
                except AttributeError as e:
                    print(e)
                except SyntaxError as e:
                    print(e)
                except ValueError as e:
                    print(e)
        except NameError as e:
            print(e)

But this doesn't work and I get duplicated values all over the place. Help!

CodePudding user response:

I don't think there is a direct way to export "Name" and "Sub Name" on different levels but you can do it by looping over your dataframes grouped by "Name" (using pandas). There are additional difficulties like converting your strings to dictionaries and switching the levels of the dates and "Time Series"/"Probability" keys:

import json
import pandas as pd
import ast

this_data = [{
    "Name": "Bluefox",
    "Sub Name": "Moonglow",
    "Time Series": "{'2022-07-06': 9.5, '2022-07-07': 7.2, '2022-07-08': 10.3}",
    "Probability": "{'2022-07-06': 0.2, '2022-07-07': 0.3, '2022-07-08': 0.5}",
    "Max Value": 466888785.24275005,
},{
    "Name": "Blackbird",
    "Sub Name": "Skylight",
    "Time Series": "{'2022-07-06': -16240599.020647092, '2022-07-07': -17984033.390385196}",
    "Probability": "{'2022-07-06': 0.6, '2022-07-07': 0.7}",
    "Max Value": 81509865.34667145,
},{
    "Name": "Bluefox",
    "Sub Name": "Skylight",
    "Time Series": "{'2022-07-06': -123000, '2022-07-07': -245100}",
    "Probability": "{'2022-07-06': 0.0, '2022-07-07': 0.0}",
    "Max Value": 90409417.34667145,
}]

df = pd.DataFrame(this_data)
record = []
for name, sub_df in df.groupby('Name'):
    sub_df.set_index('Sub Name', inplace=True)
    # converting your strings to dict:
    sub_df[['Time Series', 'Probability']] = sub_df[['Time Series', 'Probability']].applymap(ast.literal_eval)
    # switching key levels:
    sub_df['date'] = [pd.DataFrame(dic).to_dict(orient="index")
                     for dic in sub_df[['Time Series', 'Probability']].to_dict(orient="records")]
    sub_df.drop(['Name', 'Time Series', 'Probability'], axis=1, inplace=True)
    record.append({name: sub_df.to_dict(orient="index")})

print(json.dumps(record, indent=4))

Output:

[
    {
        "Blackbird": {
            "Skylight": {
                "Max Value": 81509865.34667145,
                "date": {
                    "2022-07-06": {
                        "Time Series": -16240599.020647092,
                        "Probability": 0.6
                    },
                    "2022-07-07": {
                        "Time Series": -17984033.390385196,
                        "Probability": 0.7
                    }
                }
            }
        }
    },
    {
        "Bluefox": {
            "Moonglow": {
                "Max Value": 466888785.24275005,
                "date": {
                    "2022-07-06": {
                        "Time Series": 9.5,
                        "Probability": 0.2
                    },
                    "2022-07-07": {
                        "Time Series": 7.2,
                        "Probability": 0.3
                    },
                    "2022-07-08": {
                        "Time Series": 10.3,
                        "Probability": 0.5
                    }
                }
            },
            "Skylight": {
                "Max Value": 90409417.34667145,
                "date": {
                    "2022-07-06": {
                        "Time Series": -123000,
                        "Probability": 0.0
                    },
                    "2022-07-07": {
                        "Time Series": -245100,
                        "Probability": 0.0
                    }
                }
            }
        }
    }
]
  • Related