i have this code:
df.groupby(by=["Unit","Product","Scheme","Type"]).size()
Which gives:
Unit Product Scheme Type
JBBNB BP KRZPS MRTZN 13
TOPUP 5
ZBPRE 3
HB KRJZS CONST 6
FZNEW 5
..
htrcy HB TNJZS PZTCN 2
TNBTS TOEJZ 2
TU TNTPS BDDFIN 2
TOPUP 1
hwrng BP TGZPS ZBPRE 1
Length: 1632, dtype: int64
I want to convert it into dict() with nested dictionary like below
{
"JBBNB": {"BP": {"KRZPS":{"MRTZN":13,"TOPUP":5,"ZBPRE":3}},
"HB":{"KRJZS":{"CONST":6,"FZNEW":5}}
},
"htrcy": {"HB": {"TNJZS":{"PZTCN":2},
"TNBTS":{"TOEJZ":2}},
"TU":{"TNTPS":{"BDDFIN":2,"TOPUP":1}},
}
}
How can i do it with most efficient way
Edit: heres my data
data = {"Unit": {"31-08": "JBJEB", "22-11": "JBGRB", "06-04": "JBGRB", "29-11": "JBZWI", "15-02": "JBGRB", "28-02": "JBJEB", "23-03": "JBGRB", "13-06": "JBJEB", "30-06": "JBMDR", "20-07": "JBJEB", "23-07": "JBJEB", "25-07": "JBJEB", "23-08": "JBJEB", "30-09": "JBZWR", "22-10": "JBJEB", "27-12": "JBJMD", "31-12": "JBJEB", "25-03": "JBJEB", "29-03": "JBJEB", "29-04": "JBJEB", "29-07": "JBJEB", "18-06": "JBJMD", "30-07": "JBMDR", "10-08": "JBJMR", "24-03": "JBJMD", "19-07": "JBJMD", "26-12": "JBJMD", "31-03": "JBJMR", "08-09": "JBJMR", "07-10": "JBJMR", "10-05": "JBJMR", "28-07": "JBJMR", "27-09": "JBJMR", "27-11": "JBJMR", "30-11": "JBJMR", "22-12": "JBJMR", "24-07": "JBZJB", "23-09": "JBZWI", "29-09": "JBZWI", "07-02": "JBZWI", "14-10": "JBZWR", "23-12": "JBZWR", "29-08": "JBZWR", "18-11": "JBZWR", "21-12": "JBZWR", "03-01": "JBZWR", "28-08": "JBZWR", "20-09": "JBMDR", "02-12": "JBMDR"}, "Product": {"31-08": "TU", "22-11": "TU", "06-04": "TU", "29-11": "TU", "15-02": "TU", "28-02": "TU", "23-03": "TU", "13-06": "TU", "30-06": "TU", "20-07": "TU", "23-07": "TU", "25-07": "TU", "23-08": "TU", "30-09": "TU", "22-10": "TU", "27-12": "TU", "31-12": "TU", "25-03": "TU", "29-03": "TU", "29-04": "TU", "29-07": "TU", "18-06": "TU", "30-07": "TU", "10-08": "TU", "24-03": "TU", "19-07": "TU", "26-12": "TU", "31-03": "TU", "08-09": "BP", "07-10": "TU", "10-05": "TU", "28-07": "TU", "27-09": "TU", "27-11": "TU", "30-11": "TU", "22-12": "TU", "24-07": "TU", "23-09": "TU", "29-09": "TU", "07-02": "TU", "14-10": "BP", "23-12": "BP", "29-08": "TU", "18-11": "TU", "21-12": "TU", "03-01": "TU", "28-08": "TU", "20-09": "TU", "02-12": "TU"}, "Scheme": {"31-08": "GJTPS", "22-11": "UPTPS", "06-04": "UPTPS", "29-11": "UKTPS", "15-02": "UPTPS", "28-02": "GJTPS", "23-03": "UPTPS", "13-06": "GJTPS", "30-06": "MJTPS", "20-07": "GJTPS", "23-07": "GJTPS", "25-07": "GJTPS", "23-08": "GJTPS", "30-09": "RJTPS", "22-10": "GJTPS", "27-12": "GJTPS", "31-12": "GJTPS", "25-03": "GJTPS", "29-03": "GJTPS", "29-04": "GJTPS", "29-07": "GJTPS", "18-06": "GJTPS", "30-07": "MJTPS", "10-08": "RJTPS", "24-03": "GJTPS", "19-07": "GJTPS", "26-12": "GJTPS", "31-03": "RJTPS", "08-09": "RJZPS", "07-10": "RJTPS", "10-05": "RJTPS", "28-07": "RJTPS", "27-09": "RJTPS", "27-11": "RJTPS", "30-11": "RJTPS", "22-12": "RJTPS", "24-07": "UPTPS", "23-09": "UKTPS", "29-09": "UKTPS", "07-02": "UKTPS", "14-10": "RJZPS", "23-12": "RJZPS", "29-08": "RJTPS", "18-11": "RJTPS", "21-12": "RJTPS", "03-01": "RJTPS", "28-08": "RJTPS", "20-09": "MJTPS", "02-12": "MJTPS"}, "Type": {"31-08": "BDDFIN", "22-11": "BDDFIN", "06-04": "BDDFIN", "29-11": "BDDFIN", "15-02": "BDDFIN", "28-02": "BDDFIN", "23-03": "BDDFIN", "13-06": "BDDFIN", "30-06": "BDDFIN", "20-07": "BDDFIN", "23-07": "BDDFIN", "25-07": "BDDFIN", "23-08": "BDDFIN", "30-09": "BDDFIN", "22-10": "BDDFIN", "27-12": "BDDFIN", "31-12": "BDDFIN", "25-03": "BDDFIN", "29-03": "BDDFIN", "29-04": "BDDFIN", "29-07": "BDDFIN", "18-06": "BDDFIN", "30-07": "BDDFIN", "10-08": "BDDFIN", "24-03": "BDDFIN", "19-07": "BDDFIN", "26-12": "BDDFIN", "31-03": "BDDFIN", "08-09": "BDDFIN", "07-10": "BDDFIN", "10-05": "BDDFIN", "28-07": "BDDFIN", "27-09": "BDDFIN", "27-11": "BDDFIN", "30-11": "BDDFIN", "22-12": "BDDFIN", "24-07": "BDDFIN", "23-09": "BDDFIN", "29-09": "BDDFIN", "07-02": "BDDFIN", "14-10": "BDDFIN", "23-12": "BDDFIN", "29-08": "BDDFIN", "18-11": "BDDFIN", "21-12": "BDDFIN", "03-01": "BDDFIN", "28-08": "BDDFIN", "20-09": "BDDFIN", "02-12": "BDDFIN"}}
df = pd.DataFrame(data)
CodePudding user response:
Here The main idea is to convert group by table to list of tuples and loop over it.
Code:
ls = df.groupby(by=["Unit","Product","Scheme","Type"]).size().to_frame().to_records().tolist()
ls output:
[('JBGRB', 'TU', 'UPTPS', 'BDDFIN', 4),
('JBJEB', 'TU', 'GJTPS', 'BDDFIN', 13),
('JBJMD', 'TU', 'GJTPS', 'BDDFIN', 5),
('JBJMR', 'BP', 'RJZPS', 'BDDFIN', 1),
('JBJMR', 'TU', 'RJTPS', 'BDDFIN', 9),
('JBMDR', 'TU', 'MJTPS', 'BDDFIN', 4),
('JBZJB', 'TU', 'UPTPS', 'BDDFIN', 1),
('JBZWI', 'TU', 'UKTPS', 'BDDFIN', 4),
('JBZWR', 'BP', 'RJZPS', 'BDDFIN', 2),
('JBZWR', 'TU', 'RJTPS', 'BDDFIN', 6)]
Converting to dict:
dic = dict()
for k1, k2, k3, k4, val in ls:
if k1 in dic:
if k2 in dic[k1]:
if k3 in dic[k1][k2]:
dic[k1][k2][k3].update({k4:val})
else:
dic[k1][k2].update({k3:{k4:val}})
else:
dic[k1].update({k2:{k3:{k4:val}}})
else:
dic.setdefault(k1, {}).update({k2:{k3:{k4:val}}})
dic
Output:
{'JBGRB': {'TU': {'UPTPS': {'BDDFIN': 4}}},
'JBJEB': {'TU': {'GJTPS': {'BDDFIN': 13}}},
'JBJMD': {'TU': {'GJTPS': {'BDDFIN': 5}}},
'JBJMR': {'BP': {'RJZPS': {'BDDFIN': 1}}, 'TU': {'RJTPS': {'BDDFIN': 9}}},
'JBMDR': {'TU': {'MJTPS': {'BDDFIN': 4}}},
'JBZJB': {'TU': {'UPTPS': {'BDDFIN': 1}}},
'JBZWI': {'TU': {'UKTPS': {'BDDFIN': 4}}},
'JBZWR': {'BP': {'RJZPS': {'BDDFIN': 2}}, 'TU': {'RJTPS': {'BDDFIN': 6}}}}
CodePudding user response:
I had this problem a while ago and came up with this solution. Maybe it can help you. It is supposed to be a universal solution.
You can change the format by passing keys a main key / subkeys (groupby/subkeys) to the function.
In my opinion, this little lambda function: nested_dict = lambda: defaultdict(nested_dict) it the best method for creating a nested dict, since you don't have to worry about key errors. If the key doesn't exist, it will be created.
import operator
from collections import defaultdict
from copy import deepcopy
from functools import reduce
from pprint import pprint
from typing import Union
import pandas as pd
def ds_to_string(df: Union[pd.DataFrame, pd.Series]) -> Union[pd.Series, pd.DataFrame]:
nastring = str(pd.NA)
df2 = df.copy()
if isinstance(df, pd.DataFrame):
for col in df2.columns:
if not df2[col].dtype == "string":
try:
df2[col] = df2[col].fillna(nastring)
except Exception:
pass
df2[col] = df2[col].astype("string")
else:
try:
df2 = df2.fillna(nastring)
except Exception:
pass
df2 = df2.astype("string")
df2 = df2.fillna(nastring).copy()
return df2
def _to_nested_df(df: pd.DataFrame, groupby: str, subkeys: list) -> dict:
nested_dict = lambda: defaultdict(nested_dict)
def qq_ds_drop_duplicates(
df, subset=None, keep="first", inplace=False, ignore_index=False
):
df2 = ds_to_string(df)
df22 = df2.drop_duplicates(
subset=subset, keep=keep, inplace=inplace, ignore_index=ignore_index
)
return df.loc[df22.index].copy()
def getFromDict(dataDict, mapList):
# https://stackoverflow.com/a/14692747/15096247
return reduce(operator.getitem, mapList, dataDict)
nest = nested_dict()
useasindex = groupby
df2 = df.copy()
use_as_index_var = "__useasindex___"
subkeys_ = subkeys.copy()
subkeys_ = [x for x in subkeys_ if x != useasindex]
subkeys_.append(groupby)
subkeys_.append(use_as_index_var)
df2[use_as_index_var] = [useasindex] * len(df2)
withoutduplicates = qq_ds_drop_duplicates(df2, subset=useasindex)
alldictkeys = subkeys_.copy()
asdi = (
withoutduplicates.groupby(useasindex)[subkeys_]
.apply(lambda x: x.set_index(use_as_index_var).to_dict(orient="index"))
.to_dict()
)
kick_when_finished = list(asdi.keys())
successkeys = []
for key, item in asdi.items():
for ini, serie in enumerate(alldictkeys):
if serie == use_as_index_var:
continue
ini2 = 0
for val, val2 in zip(df2[serie].to_list(), df2[useasindex].to_list()):
tempindi = [val2, serie]
getFromDict(nest, tempindi)[ini2] = val
successkeys.append(tempindi.copy())
ini2 = 1
dictionary_without_ugly_lambda = deepcopy(nest)
for keys in successkeys:
allkeys = []
for tempkey in keys:
allkeys.append(tempkey)
value_in_next_key = getFromDict(nest, allkeys)
is_next_value_dict = isinstance(value_in_next_key, dict)
if is_next_value_dict:
getFromDict(dictionary_without_ugly_lambda, allkeys[:-1])[
allkeys[-1]
] = dict(
getFromDict(dictionary_without_ugly_lambda, allkeys[:-1])[
allkeys[-1]
]
)
dictionary_without_ugly_lambda = dict(dictionary_without_ugly_lambda)
for kickitem in kick_when_finished:
del dictionary_without_ugly_lambda[kickitem][useasindex]
return dictionary_without_ugly_lambda.copy()
# I had this problem a while ago and came up with this solution. Maybe it can help you.
#
data = {
"Unit": {
"31-08": "JBJEB",
"22-11": "JBGRB",
"06-04": "JBGRB",
"29-11": "JBZWI",
"15-02": "JBGRB",
"28-02": "JBJEB",
"23-03": "JBGRB",
"13-06": "JBJEB",
"30-06": "JBMDR",
"20-07": "JBJEB",
"23-07": "JBJEB",
"25-07": "JBJEB",
"23-08": "JBJEB",
"30-09": "JBZWR",
"22-10": "JBJEB",
"27-12": "JBJMD",
"31-12": "JBJEB",
"25-03": "JBJEB",
"29-03": "JBJEB",
"29-04": "JBJEB",
"29-07": "JBJEB",
"18-06": "JBJMD",
"30-07": "JBMDR",
"10-08": "JBJMR",
"24-03": "JBJMD",
"19-07": "JBJMD",
"26-12": "JBJMD",
"31-03": "JBJMR",
"08-09": "JBJMR",
"07-10": "JBJMR",
"10-05": "JBJMR",
"28-07": "JBJMR",
"27-09": "JBJMR",
"27-11": "JBJMR",
"30-11": "JBJMR",
"22-12": "JBJMR",
"24-07": "JBZJB",
"23-09": "JBZWI",
"29-09": "JBZWI",
"07-02": "JBZWI",
"14-10": "JBZWR",
"23-12": "JBZWR",
"29-08": "JBZWR",
"18-11": "JBZWR",
"21-12": "JBZWR",
"03-01": "JBZWR",
"28-08": "JBZWR",
"20-09": "JBMDR",
"02-12": "JBMDR",
},
"Product": {
"31-08": "TU",
"22-11": "TU",
"06-04": "TU",
"29-11": "TU",
"15-02": "TU",
"28-02": "TU",
"23-03": "TU",
"13-06": "TU",
"30-06": "TU",
"20-07": "TU",
"23-07": "TU",
"25-07": "TU",
"23-08": "TU",
"30-09": "TU",
"22-10": "TU",
"27-12": "TU",
"31-12": "TU",
"25-03": "TU",
"29-03": "TU",
"29-04": "TU",
"29-07": "TU",
"18-06": "TU",
"30-07": "TU",
"10-08": "TU",
"24-03": "TU",
"19-07": "TU",
"26-12": "TU",
"31-03": "TU",
"08-09": "BP",
"07-10": "TU",
"10-05": "TU",
"28-07": "TU",
"27-09": "TU",
"27-11": "TU",
"30-11": "TU",
"22-12": "TU",
"24-07": "TU",
"23-09": "TU",
"29-09": "TU",
"07-02": "TU",
"14-10": "BP",
"23-12": "BP",
"29-08": "TU",
"18-11": "TU",
"21-12": "TU",
"03-01": "TU",
"28-08": "TU",
"20-09": "TU",
"02-12": "TU",
},
"Scheme": {
"31-08": "GJTPS",
"22-11": "UPTPS",
"06-04": "UPTPS",
"29-11": "UKTPS",
"15-02": "UPTPS",
"28-02": "GJTPS",
"23-03": "UPTPS",
"13-06": "GJTPS",
"30-06": "MJTPS",
"20-07": "GJTPS",
"23-07": "GJTPS",
"25-07": "GJTPS",
"23-08": "GJTPS",
"30-09": "RJTPS",
"22-10": "GJTPS",
"27-12": "GJTPS",
"31-12": "GJTPS",
"25-03": "GJTPS",
"29-03": "GJTPS",
"29-04": "GJTPS",
"29-07": "GJTPS",
"18-06": "GJTPS",
"30-07": "MJTPS",
"10-08": "RJTPS",
"24-03": "GJTPS",
"19-07": "GJTPS",
"26-12": "GJTPS",
"31-03": "RJTPS",
"08-09": "RJZPS",
"07-10": "RJTPS",
"10-05": "RJTPS",
"28-07": "RJTPS",
"27-09": "RJTPS",
"27-11": "RJTPS",
"30-11": "RJTPS",
"22-12": "RJTPS",
"24-07": "UPTPS",
"23-09": "UKTPS",
"29-09": "UKTPS",
"07-02": "UKTPS",
"14-10": "RJZPS",
"23-12": "RJZPS",
"29-08": "RJTPS",
"18-11": "RJTPS",
"21-12": "RJTPS",
"03-01": "RJTPS",
"28-08": "RJTPS",
"20-09": "MJTPS",
"02-12": "MJTPS",
},
"Type": {
"31-08": "BDDFIN",
"22-11": "BDDFIN",
"06-04": "BDDFIN",
"29-11": "BDDFIN",
"15-02": "BDDFIN",
"28-02": "BDDFIN",
"23-03": "BDDFIN",
"13-06": "BDDFIN",
"30-06": "BDDFIN",
"20-07": "BDDFIN",
"23-07": "BDDFIN",
"25-07": "BDDFIN",
"23-08": "BDDFIN",
"30-09": "BDDFIN",
"22-10": "BDDFIN",
"27-12": "BDDFIN",
"31-12": "BDDFIN",
"25-03": "BDDFIN",
"29-03": "BDDFIN",
"29-04": "BDDFIN",
"29-07": "BDDFIN",
"18-06": "BDDFIN",
"30-07": "BDDFIN",
"10-08": "BDDFIN",
"24-03": "BDDFIN",
"19-07": "BDDFIN",
"26-12": "BDDFIN",
"31-03": "BDDFIN",
"08-09": "BDDFIN",
"07-10": "BDDFIN",
"10-05": "BDDFIN",
"28-07": "BDDFIN",
"27-09": "BDDFIN",
"27-11": "BDDFIN",
"30-11": "BDDFIN",
"22-12": "BDDFIN",
"24-07": "BDDFIN",
"23-09": "BDDFIN",
"29-09": "BDDFIN",
"07-02": "BDDFIN",
"14-10": "BDDFIN",
"23-12": "BDDFIN",
"29-08": "BDDFIN",
"18-11": "BDDFIN",
"21-12": "BDDFIN",
"03-01": "BDDFIN",
"28-08": "BDDFIN",
"20-09": "BDDFIN",
"02-12": "BDDFIN",
},
}
dfa = pd.DataFrame(data)
print(dfa)
df = _to_nested_df(dfa, groupby="Unit", subkeys=["Type", "Product", "Scheme"])
pprint(df)
{'JBGRB': {'Product': {1: 'TU', 2: 'TU', 4: 'TU', 6: 'TU'},
'Scheme': {1: 'UPTPS', 2: 'UPTPS', 4: 'UPTPS', 6: 'UPTPS'},
'Type': {1: 'BDDFIN', 2: 'BDDFIN', 4: 'BDDFIN', 6: 'BDDFIN'}},
'JBJEB': {'Product': {0: 'TU',
5: 'TU',
7: 'TU',
9: 'TU',
10: 'TU',
11: 'TU',
12: 'TU',
14: 'TU',
16: 'TU',
17: 'TU',
18: 'TU',
19: 'TU',
20: 'TU'},
'Scheme': {0: 'GJTPS',
5: 'GJTPS',
7: 'GJTPS',
9: 'GJTPS',
10: 'GJTPS',
11: 'GJTPS',
12: 'GJTPS',
14: 'GJTPS',
16: 'GJTPS',
17: 'GJTPS',
18: 'GJTPS',
19: 'GJTPS',
20: 'GJTPS'},
'Type': {0: 'BDDFIN',
5: 'BDDFIN',
7: 'BDDFIN',
9: 'BDDFIN',
10: 'BDDFIN',
11: 'BDDFIN',
12: 'BDDFIN',
14: 'BDDFIN',
16: 'BDDFIN',
17: 'BDDFIN',
18: 'BDDFIN',
19: 'BDDFIN',
20: 'BDDFIN'}},
'JBJMD': {'Product': {15: 'TU', 21: 'TU', 24: 'TU', 25: 'TU', 26: 'TU'},
'Scheme': {15: 'GJTPS',
21: 'GJTPS',
24: 'GJTPS',
25: 'GJTPS',
26: 'GJTPS'},
'Type': {15: 'BDDFIN',
21: 'BDDFIN',
24: 'BDDFIN',
25: 'BDDFIN',
26: 'BDDFIN'}},
'JBJMR': {'Product': {23: 'TU',
27: 'TU',
28: 'BP',
29: 'TU',
30: 'TU',
31: 'TU',
32: 'TU',
33: 'TU',
34: 'TU',
35: 'TU'},
'Scheme': {23: 'RJTPS',
27: 'RJTPS',
28: 'RJZPS',
29: 'RJTPS',
30: 'RJTPS',
31: 'RJTPS',
32: 'RJTPS',
33: 'RJTPS',
34: 'RJTPS',
35: 'RJTPS'},
'Type': {23: 'BDDFIN',
27: 'BDDFIN',
28: 'BDDFIN',
29: 'BDDFIN',
30: 'BDDFIN',
31: 'BDDFIN',
32: 'BDDFIN',
33: 'BDDFIN',
34: 'BDDFIN',
35: 'BDDFIN'}},
'JBMDR': {'Product': {8: 'TU', 22: 'TU', 47: 'TU', 48: 'TU'},
'Scheme': {8: 'MJTPS', 22: 'MJTPS', 47: 'MJTPS', 48: 'MJTPS'},
'Type': {8: 'BDDFIN', 22: 'BDDFIN', 47: 'BDDFIN', 48: 'BDDFIN'}},
'JBZJB': {'Product': {36: 'TU'},
'Scheme': {36: 'UPTPS'},
'Type': {36: 'BDDFIN'}},
'JBZWI': {'Product': {3: 'TU', 37: 'TU', 38: 'TU', 39: 'TU'},
'Scheme': {3: 'UKTPS', 37: 'UKTPS', 38: 'UKTPS', 39: 'UKTPS'},
'Type': {3: 'BDDFIN', 37: 'BDDFIN', 38: 'BDDFIN', 39: 'BDDFIN'}},
'JBZWR': {'Product': {13: 'TU',
40: 'BP',
41: 'BP',
42: 'TU',
43: 'TU',
44: 'TU',
45: 'TU',
46: 'TU'},
'Scheme': {13: 'RJTPS',
40: 'RJZPS',
41: 'RJZPS',
42: 'RJTPS',
43: 'RJTPS',
44: 'RJTPS',
45: 'RJTPS',
46: 'RJTPS'},
'Type': {13: 'BDDFIN',
40: 'BDDFIN',
41: 'BDDFIN',
42: 'BDDFIN',
43: 'BDDFIN',
44: 'BDDFIN',
45: 'BDDFIN',
46: 'BDDFIN'}}}
CodePudding user response:
Lets define a func using some recursion and groupby
def dictify(df, cols):
if not cols:
return len(df)
return {k: dictify(g, cols[1:]) for k, g in df.groupby(cols[0])}
Then call it with the parameters
dictify(df, ["Unit","Product","Scheme","Type"])
And here is the result
{'JBGRB': {'TU': {'UPTPS': {'BDDFIN': 4}}},
'JBJEB': {'TU': {'GJTPS': {'BDDFIN': 13}}},
'JBJMD': {'TU': {'GJTPS': {'BDDFIN': 5}}},
'JBJMR': {'BP': {'RJZPS': {'BDDFIN': 1}}, 'TU': {'RJTPS': {'BDDFIN': 9}}},
'JBMDR': {'TU': {'MJTPS': {'BDDFIN': 4}}},
'JBZJB': {'TU': {'UPTPS': {'BDDFIN': 1}}},
'JBZWI': {'TU': {'UKTPS': {'BDDFIN': 4}}},
'JBZWR': {'BP': {'RJZPS': {'BDDFIN': 2}}, 'TU': {'RJTPS': {'BDDFIN': 6}}}}