Home > Software engineering >  Pandas dataframe to dict using dataclass
Pandas dataframe to dict using dataclass

Time:10-31

I have a pandas dataframe and would like to cast it to a dict.

  item settings_id  mat_id  order
0  a-1          b1      32      1
1  a-1          x1      12      2
2  a-1          y4       3      3
3  a-2          k1       0      1
4  a-2          3a       2      2
5  a-2          x1      94      3
6  b-1          y4      32      1
7  b-1          b1       9      2

to

{'roots': [{'item': 'a-1',
   'settings': [{'settings_id': 'b1', 'mat_id': 32, 'order': 1},
    {'settings_id': 'x1', 'mat_id': 12, 'order': 2},
    {'settings_id': 'y4', 'mat_id': 3, 'order': 3}]},
  {'item': 'a-2',
   'settings': [{'settings_id': 'k1', 'mat_id': 0, 'order': 1},
    {'settings_id': '3a', 'mat_id': 2, 'order': 2},
    {'settings_id': 'x1', 'mat_id': 94, 'order': 3}]},
  {'item': 'b-1',
   'settings': [{'settings_id': 'y4', 'mat_id': 32, 'order': 1},
    {'settings_id': 'b1', 'mat_id': 9, 'order': 2}]}]}

In the pandas documentation, there exists the method to_dict. But I couldn't get it running in the way I wanted. Therefore I came up with using dataclasses for that.

However, I was wondering if there is a more convenient way?

from typing import List
from typing import Any
from dataclasses import dataclass, asdict
import pandas as pd

@dataclass
class Setting:
    settings_id: str
    mat_id: int
    order: int

    @staticmethod
    def from_dict(obj: Any) -> 'Setting':
        _settings_id = str(obj.get("settings_id"))
        _mat_id = int(obj.get("mat_id"))
        _order = int(obj.get("order"))
        return Statistic(_settings_id, _mat_id, _order)

@dataclass
class ItemData:
    item: str
    settings: List[Setting]

    @staticmethod
    def from_dict(obj: Any) -> 'ItemData':
        _item = str(obj.get("item"))
        _settings = [Setting.from_dict(y) for y in obj.get("settings")]
        return ItemData(_item, _settings)

@dataclass
class Root:
    roots: List[ItemData]

    @staticmethod
    def from_dict(obj: Any) -> 'Root':
        _roots = [ItemData.from_dict(y) for y in obj.get("ItemData")]
        return Root(_roots)    

df = pd.DataFrame({"item": ["a-1","a-1","a-1","a-2","a-2","a-2","b-1","b-1"],
                   "settings_id": ["b1","x1","y4","k1","3a","x1","y4","b1"],
                   "mat_id":[32,12,3,0,2,94,32,9],
                   "order":[1,2,3,1,2,3,1,2]
                  })

itemsData = []
items = df["item"].unique()
for item in items:
    element = df[df["item"] == item]
    settings = []
    for index, row in element.iterrows():
        setting = Setting(row["settings_id"],row["mat_id"],row["order"])
        settings.append(setting)
    itemsData.append(ItemData(item, settings))

r = Root(itemsData)
asdict(r)

Thank you in advance

CodePudding user response:

You can use to_dict with kwarg orient="records" while looping over df.groupby("item"):

rec = []
for item, sub_df in df.groupby("item"):
  rec.append({
    "item": item,
    "settings": sub_df.drop(columns="item").to_dict(orient="records")
  })

pprint(rec)

Output:

[{'item': 'a-1',
  'settings': [{'mat_id': 32, 'order': 1, 'settings_id': 'b1'},
               {'mat_id': 12, 'order': 2, 'settings_id': 'x1'},
               {'mat_id': 3, 'order': 3, 'settings_id': 'y4'}]},
 {'item': 'a-2',
  'settings': [{'mat_id': 0, 'order': 1, 'settings_id': 'k1'},
               {'mat_id': 2, 'order': 2, 'settings_id': '3a'},
               {'mat_id': 94, 'order': 3, 'settings_id': 'x1'}]},
 {'item': 'b-1',
  'settings': [{'mat_id': 32, 'order': 1, 'settings_id': 'y4'},
               {'mat_id': 9, 'order': 2, 'settings_id': 'b1'}]}]
  • Related