Below is my code, which gets an error 'Mixing dicts with non-Series may lead to ambiguous ordering'. What is the reason and how should I fix this? How do I visualise this dictionary in python in order to debug?
import pandas as pd
df = pd.read_json('https://stats.oecd.org/sdmx-json/data/QNA/AUS AUT.GDP B1_GE.CUR VOBARSA.Q/all?startTime=2009-Q2&endTime=2011-Q4')
Thanks in advance.
CodePudding user response:
Download the json file and do:
from pandas.io.json import json_normalize
import json
with open('aaa.json') as data_file:
d= json.load(data_file)
df = json_normalize(d)
which gives:
dataSets \
0 [{'action': 'Information', 'series': {'0:0:0:0...
header.id header.test \
0 a0d6c1d0-79b9-4cc2-9e7a-06050c934194 False
header.prepared header.sender.id \
0 2021-11-04T10:42:25.7631355Z OCDE
header.sender.name \
0 Organisation de coopération et de développem...
header.links \
0 [{'href': 'https://stats.oecd.org:443/sdmx-jso...
structure.links \
0 [{'href': 'https://stats.oecd.org/sdmx-json/da...
structure.name structure.description \
0 Comptes nationaux trimestriels Comptes nationaux trimestriels
structure.dimensions.series \
0 [{'keyPosition': 0, 'id': 'LOCATION', 'name': ...
structure.dimensions.observation \
0 [{'id': 'TIME_PERIOD', 'name': 'Période', 'va...
structure.attributes.dataSet \
0 []
structure.attributes.series \
0 [{'id': 'TIME_FORMAT', 'name': 'Time Format', ...
structure.attributes.observation \
0 [{'id': 'OBS_STATUS', 'name': 'Observation Sta...
structure.annotations
0 [{'title': 'Copyright OECD - All rights reserv...
Note that you have nested jsons which you will have to unnest. Use for instance:
def flatten_nested_json_df(df):
df = df.reset_index()
s = (df.applymap(type) == list).all()
list_columns = s[s].index.tolist()
s = (df.applymap(type) == dict).all()
dict_columns = s[s].index.tolist()
while len(list_columns) > 0 or len(dict_columns) > 0:
new_columns = []
for col in dict_columns:
horiz_exploded = pd.json_normalize(df[col]).add_prefix(f'{col}.')
horiz_exploded.index = df.index
df = pd.concat([df, horiz_exploded], axis=1).drop(columns=[col])
new_columns.extend(horiz_exploded.columns) # inplace
for col in list_columns:
print(f"exploding: {col}")
df = df.drop(columns=[col]).join(df[col].explode().to_frame())
new_columns.append(col)
s = (df[new_columns].applymap(type) == list).all()
list_columns = s[s].index.tolist()
s = (df[new_columns].applymap(type) == dict).all()
dict_columns = s[s].index.tolist()
return df
CodePudding user response:
You could use Dframcy.
from dframcy import DframCy
with open('aaa.json') as data_file:
d= json.load(data_file)
dataframe = dframcy.to_dataframe(d, columns=["text","start","","","","",""])