I have a pandas dataframe and would like to cast it to a dict.
item settings_id mat_id order
0 a-1 b1 32 1
1 a-1 x1 12 2
2 a-1 y4 3 3
3 a-2 k1 0 1
4 a-2 3a 2 2
5 a-2 x1 94 3
6 b-1 y4 32 1
7 b-1 b1 9 2
to
{'roots': [{'item': 'a-1',
'settings': [{'settings_id': 'b1', 'mat_id': 32, 'order': 1},
{'settings_id': 'x1', 'mat_id': 12, 'order': 2},
{'settings_id': 'y4', 'mat_id': 3, 'order': 3}]},
{'item': 'a-2',
'settings': [{'settings_id': 'k1', 'mat_id': 0, 'order': 1},
{'settings_id': '3a', 'mat_id': 2, 'order': 2},
{'settings_id': 'x1', 'mat_id': 94, 'order': 3}]},
{'item': 'b-1',
'settings': [{'settings_id': 'y4', 'mat_id': 32, 'order': 1},
{'settings_id': 'b1', 'mat_id': 9, 'order': 2}]}]}
In the pandas documentation, there exists the method to_dict. But I couldn't get it running in the way I wanted. Therefore I came up with using dataclasses for that.
However, I was wondering if there is a more convenient way?
from typing import List
from typing import Any
from dataclasses import dataclass, asdict
import pandas as pd
@dataclass
class Setting:
settings_id: str
mat_id: int
order: int
@staticmethod
def from_dict(obj: Any) -> 'Setting':
_settings_id = str(obj.get("settings_id"))
_mat_id = int(obj.get("mat_id"))
_order = int(obj.get("order"))
return Statistic(_settings_id, _mat_id, _order)
@dataclass
class ItemData:
item: str
settings: List[Setting]
@staticmethod
def from_dict(obj: Any) -> 'ItemData':
_item = str(obj.get("item"))
_settings = [Setting.from_dict(y) for y in obj.get("settings")]
return ItemData(_item, _settings)
@dataclass
class Root:
roots: List[ItemData]
@staticmethod
def from_dict(obj: Any) -> 'Root':
_roots = [ItemData.from_dict(y) for y in obj.get("ItemData")]
return Root(_roots)
df = pd.DataFrame({"item": ["a-1","a-1","a-1","a-2","a-2","a-2","b-1","b-1"],
"settings_id": ["b1","x1","y4","k1","3a","x1","y4","b1"],
"mat_id":[32,12,3,0,2,94,32,9],
"order":[1,2,3,1,2,3,1,2]
})
itemsData = []
items = df["item"].unique()
for item in items:
element = df[df["item"] == item]
settings = []
for index, row in element.iterrows():
setting = Setting(row["settings_id"],row["mat_id"],row["order"])
settings.append(setting)
itemsData.append(ItemData(item, settings))
r = Root(itemsData)
asdict(r)
Thank you in advance
CodePudding user response:
You can use to_dict
with kwarg orient="records"
while looping over df.groupby("item")
:
rec = []
for item, sub_df in df.groupby("item"):
rec.append({
"item": item,
"settings": sub_df.drop(columns="item").to_dict(orient="records")
})
pprint(rec)
Output:
[{'item': 'a-1',
'settings': [{'mat_id': 32, 'order': 1, 'settings_id': 'b1'},
{'mat_id': 12, 'order': 2, 'settings_id': 'x1'},
{'mat_id': 3, 'order': 3, 'settings_id': 'y4'}]},
{'item': 'a-2',
'settings': [{'mat_id': 0, 'order': 1, 'settings_id': 'k1'},
{'mat_id': 2, 'order': 2, 'settings_id': '3a'},
{'mat_id': 94, 'order': 3, 'settings_id': 'x1'}]},
{'item': 'b-1',
'settings': [{'mat_id': 32, 'order': 1, 'settings_id': 'y4'},
{'mat_id': 9, 'order': 2, 'settings_id': 'b1'}]}]