I want to convert a nested dict to dataframe
{
'2022-09-08T15:00:00Z': {
'INMET_BRASILIA_A001_M':
{
"DVENTO": [
{'value' : '95.0', 'quality': 'qf-GOOD', 'quality_flag': 'GOOD','structure' : 'INMET_BRASILIA_A001_M', 'element' : 'DVENTO'}],
"TD_MN_C":[
{'value' : '6.0', 'quality': 'qf-GOOD', 'quality_flag': 'GOOD','structure' : 'INMET_BRASILIA_A001_M', 'element' : 'TD_MN_C'},]
},
},
'2022-09-09T12:00:00Z': {
'INMET_GOIANIA_A002_M':
{
"DVENTO" : [
{'value' : '25', 'quality' : 'qf-GOOD', 'quality_flag' : 'GOOD', 'structure' : 'INMET_GOIANIA_A002_M', 'element' : 'DVENTO' }],
"TD_MN_C":[{
'value' : '3.0', 'quality' : 'qf-GOOD', 'quality_flag' : 'GOOD', 'structure' : 'INMET_GOIANIA_A002_M', 'element' : 'TD_MN_C'}],
},
}
}
I had this nested dict and now i want to convert to dataframe, something like this
DVENTO TD_MN_C
2022-09-08T15:00:00Z 95.0 6.0
2022-09-09T12:00:00Z 25 3.0
please help me, I've been trying this for days
CodePudding user response:
There are various ways to achieve the desired output.
Assuming that the dictionary is stored in the variable dictionary
, one can start by doing the following
df = pd.DataFrame.from_dict(dictionary, orient='index').stack().apply(pd.Series).reset_index(level=1, drop=True)
[Out]:
DVENTO TD_MN_C
2022-09-08T15:00:00Z [{'value': '95.0', 'quality': 'qf-GOOD', 'qual... [{'value': '6.0', 'quality': 'qf-GOOD', 'quali...
2022-09-09T12:00:00Z [{'value': '25', 'quality': 'qf-GOOD', 'qualit... [{'value': '3.0', 'quality': 'qf-GOOD', 'quali...
Then, as one wants the cells of DVENTO
and TD_MN_C
to be the value
from the list with the dictionary obtained before in each cell, one can simply use list comprehensions as follows
df['DVENTO'] = [x[0]['value'] for x in df['DVENTO']]
df['TD_MN_C'] = [x[0]['value'] for x in df['TD_MN_C']]
[Out]:
DVENTO TD_MN_C
2022-09-08T15:00:00Z 95.0 6.0
2022-09-09T12:00:00Z 25 3.0
A one-liner would be like this
df = pd.DataFrame.from_dict(dictionary, orient='index').stack().apply(pd.Series).reset_index(level=1, drop=True).applymap(lambda x: x[0]['value'] if isinstance(x, list) else x)
[Out]:
DVENTO TD_MN_C
2022-09-08T15:00:00Z 95.0 6.0
2022-09-09T12:00:00Z 25 3.0
CodePudding user response:
This should answer your question:
import pandas as pd
data = {
'2022-09-08T15:00:00Z': {
'INMET_BRASILIA_A001_M':
{
"DVENTO": [
{'value' : '95.0', 'quality': 'qf-GOOD', 'quality_flag': 'GOOD','structure' : 'INMET_BRASILIA_A001_M', 'element' : 'DVENTO'}],
"TD_MN_C":[
{'value' : '6.0', 'quality': 'qf-GOOD', 'quality_flag': 'GOOD','structure' : 'INMET_BRASILIA_A001_M', 'element' : 'TD_MN_C'},]
},
},
'2022-09-09T12:00:00Z': {
'INMET_GOIANIA_A002_M':
{
"DVENTO" : [
{'value' : '25', 'quality' : 'qf-GOOD', 'quality_flag' : 'GOOD', 'structure' : 'INMET_GOIANIA_A002_M', 'element' : 'DVENTO' }],
"TD_MN_C":[{
'value' : '3.0', 'quality' : 'qf-GOOD', 'quality_flag' : 'GOOD', 'structure' : 'INMET_GOIANIA_A002_M', 'element' : 'TD_MN_C'}],
},
}
}
data_2 = {k:v[list(v.keys())[0]] for k,v in data.items()}
data_3 = {k:{k2:v2[0]['value'] for k2,v2 in v.items()} for k,v in data_2.items()}
df = pd.DataFrame(data_3).transpose()
print(df)
Output:
DVENTO TD_MN_C
2022-09-08T15:00:00Z 95.0 6.0
2022-09-09T12:00:00Z 25 3.0
CodePudding user response:
Roll out a for loop for clarity and performance:
from collections import defaultdict
content = defaultdict(list)
for key, value in data.items():
content['dates'].append(key)
for k, v in value.items():
for ki, vi in v.items():
content[ki].append(vi[0]['value'])
print(content)
defaultdict(list,
{'dates': ['2022-09-08T15:00:00Z', '2022-09-09T12:00:00Z'],
'DVENTO': ['95.0', '25'],
'TD_MN_C': ['6.0', '3.0']})
pd.DataFrame(content)
dates DVENTO TD_MN_C
0 2022-09-08T15:00:00Z 95.0 6.0
1 2022-09-09T12:00:00Z 25 3.0