I have a pandas dataframe that looks like this:
user_id cat_id prod_id score pref_prod
29762 9 3115 1.000000 335.0
29762 58 1335 1.000000 335.0
234894 58 1335 1.000000 335.0
413276 43 1388 1.000000 335.0
413276 58 1335 1.000000 335.0
413276 73 26 1.000000 335.0
9280593 9 137 1.000000 335.0
9280593 58 1335 1.000000 335.0
9280593 74 160 1.000000 335.0
4554542 66 1612 0.166667 197.0
4554542 66 1406 0.166767 197.0
4554542 66 2021 1.000000 197.0
I want to group this df by user_id
& cat_id
and convert it to json so that it looks something like this:
{
29762: {
'cat_id': {
9: [{
'prod_id': 3115,
'score': 1.0
}],
58: [{
'prod_id': 1335,
'score': 1.0
}]
},
'pref_prod': 335.0
}
234894: {
'cat_id': {
58: [{
'prod_id': 1335,
'score': 1.0
}]
},
'pref_prod': 335.0
}
413276: {
'cat_id': {
43: [{
'prod_id': 1388,
'score': 1.0,
'fav_provider': 335.0
}],
58: [{
'prod_id': 1335,
'score': 1.0,
'fav_provider': 335.0
}],
73: [{
'prod_id': 26,
'score': 1.0,
}]
},
'pref_prod': 335.0
}
4554542: {
'cat_id': {
66: [{
'prod_id': 1612,
'score': 0.166
}, {
'prod_id': 1406,
'score': 0.16
}, {
'prod_id': 2021,
'score': 1.0,
}]
},
'pref_prod': 197.0
}
}
As of now I can do
gb = df.groupby(['user_id', 'cat_id']).apply(lambda g: g.drop(['user_id', 'cat_id'], axis=1).to_dict(orient='records')).to_dict()
which gives me user_id and cat_id in tuple keys:
{
(29762, 9): [{
'prod_id': 3115,
'score': 1.0,
'pref_prod': 335.0
}],
(29762, 58): [{
'prod_id': 1335,
'score': 1.0,
'pref_prod': 335.0
}],
(234894, 58): [{
'prod_id': 1335,
'score': 1.0,
'pref_prod': 335.0
}],
(413276, 43): [{
'prod_id': 1388,
'score': 1.0,
'pref_prod': 335.0
}],
(413276, 58): [{
'prod_id': 1335,
'score': 1.0,
'pref_prod': 335.0
}],
(413276, 73): [{
'prod_id': 26,
'score': 1.0,
'pref_prod': 335.0
}],
(9280593, 9): [{
'prod_id': 137,
'score': 1.0,
'pref_prod': 335.0
}],
(9280593, 58): [{
'prod_id': 1335,
'score': 1.0,
'pref_prod': 335.0
}],
(9280593, 74): [{
'prod_id': 160,
'score': 1.0,
'pref_prod': 335.0
}],
(4554542,
66): [{
'prod_id': 1612,
'score': 0.16666666666666666,
'pref_prod': 197.0
}, {
'prod_id': 1406,
'score': 0.16676666666666665,
'pref_prod': 197.0
}, {
'prod_id': 2021,
'score': 1.0,
'pref_prod': 197.0
}]
}
How can I get the json in the desired format
CodePudding user response:
You could either reparse your current groupby, or try groupby with just user_id, and then write a custom function and apply it to each group. Since the user_id would be the highest index, you identify groups based on that and then create the sub dictionary.
CodePudding user response:
I can't think of any direct way to do it with pandas
only. But you can construct a new dictionary with the desired format based on gb
, using a defaultdict
from collections import defaultdict
import json # just to prettyprint the resulting dictionary
gb = df.groupby(['user_id', 'cat_id']).apply(lambda g: g.drop(['user_id', 'cat_id'], axis=1).to_dict(orient='records')).to_dict()
d = defaultdict(lambda: {'cat_id':{}} )
for (user_id, cat_id), records in gb.items():
for record in records:
# drop 'pref_prod' key of each record
# I'm assuming its unique for each (user_id, cat_id) group
pref_prod = record.pop('pref_prod')
d[user_id]['cat_id'][cat_id] = records
d[user_id]['pref_prod'] = pref_prod
>>> print(json.dumps(d, indent=4))
{
"29762": {
"cat_id": {
"9": [
{
"prod_id": 3115,
"score": 1.0
}
],
"58": [
{
"prod_id": 1335,
"score": 1.0
}
]
},
"pref_prod": 335.0
},
"234894": {
"cat_id": {
"58": [
{
"prod_id": 1335,
"score": 1.0
}
]
},
"pref_prod": 335.0
},
"413276": {
"cat_id": {
"43": [
{
"prod_id": 1388,
"score": 1.0
}
],
"58": [
{
"prod_id": 1335,
"score": 1.0
}
],
"73": [
{
"prod_id": 26,
"score": 1.0
}
]
},
"pref_prod": 335.0
},
"4554542": {
"cat_id": {
"66": [
{
"prod_id": 1612,
"score": 0.166667
},
{
"prod_id": 1406,
"score": 0.166767
},
{
"prod_id": 2021,
"score": 1.0
}
]
},
"pref_prod": 197.0
},
"9280593": {
"cat_id": {
"9": [
{
"prod_id": 137,
"score": 1.0
}
],
"58": [
{
"prod_id": 1335,
"score": 1.0
}
],
"74": [
{
"prod_id": 160,
"score": 1.0
}
]
},
"pref_prod": 335.0
}
}
CodePudding user response:
I used a namedtuple from a dataframe conversion to create the json tree. if the tree has more than one level than I would use recursion to build it. the dataframe did not contain lists of list so recursion was not required.
from io import StringIO
import io
from collections import namedtuple
data="""user_id,cat_id,prod_id,score,pref_prod
29762,9,3115,1.000000,335.0
29762,58,1335,1.000000,335.0
234894,58,1335,1.000000,335.0
413276,43,1388,1.000000,335.0
413276,58,335,1.000000,335.0
413276,73,26,1.000000,335.0
9280593,9,137,1.000000,335.0
9280593,58,1335,1.000000,335.0
9280593,74,160,1.000000,335.0
4554542,66,1612,0.166667,197.0
4554542,66,1406,0.166767,197.0
4554542,66,2021,1.000000,197.0"""
df = pd.read_csv(io.StringIO(data), sep=',')
Record=namedtuple('Generic',['user_id','cat_id','prod_id','score','pref_prod'])
def map_to_record(row):
return Record(row.user_id, row.cat_id, row.prod_id,row.score,row.pref_prod)
my_list = list(map(map_to_record, df.itertuples()))
def named_tuple_to_json(named_tuple):
"""
convert a named tuple to a json tree structure
"""
json_string="records:["
for record in named_tuple:
json_string ="{"
json_string ="'user_id': {},'cat_id': {},'prod_id': {},'score': {},'pref_prod': {},".format(
record.user_id,record.cat_id,record.prod_id,record.score,record.pref_prod)
json_string ="},"
json_string ="]"
return json_string
# convert the list of named tuples to a json tree structure
json_tree = named_tuple_to_json(my_list)
print(json_tree)
output
records:[{'user_id': 29762,'cat_id': 9,'prod_id': 3115,'score': 1.0,'pref_prod': 335.0,},{'user_id': 29762,'cat_id': 58,'prod_id': 1335,'score': 1.0,'pref_prod': 335.0,},{'user_id': 234894,'cat_id': 58,'prod_id': 1335,'score': 1.0,'pref_prod': 335.0,},{'user_id': 413276,'cat_id': 43,'prod_id': 1388,'score': 1.0,'pref_prod': 335.0,},{'user_id': 413276,'cat_id': 58,'prod_id': 335,'score': 1.0,'pref_prod': 335.0,},{'user_id': 413276,'cat_id': 73,'prod_id': 26,'score': 1.0,'pref_prod': 335.0,},{'user_id': 9280593,'cat_id': 9,'prod_id': 137,'score': 1.0,'pref_prod': 335.0,},{'user_id': 9280593,'cat_id': 58,'prod_id': 1335,'score': 1.0,'pref_prod': 335.0,},{'user_id': 9280593,'cat_id': 74,'prod_id': 160,'score': 1.0,'pref_prod': 335.0,},{'user_id': 4554542,'cat_id': 66,'prod_id': 1612,'score': 0.166667,'pref_prod': 197.0,},{'user_id': 4554542,'cat_id': 66,'prod_id': 1406,'score': 0.166767,'pref_prod': 197.0,},{'user_id': 4554542,'cat_id': 66,'prod_id': 2021,'score': 1.0,'pref_prod': 197.0,},]