Let's say I've got a nested JSON file as below. If I want to print the courses that each instructor teaches, how do I do that?
{
"info":{
"source_objects":[
{
"type":"sub-category",
"id":277438897,
}
],
"item_type":"course",
"items":[
{
"_class":"course",
"id":156173119,
"is_paid":null,
"trainer":[
{
"id":257585701,
"url":"/user/tania_guerra/",
}
],
{
"_class":"course",
"id":12456,
"is_paid":null,
"trainer":[
{
"id":257585701,
"url":"/user/tania_guerra/",
}
],
}
*************and more data on the same format****************
}
}
I'm not sure if there's any simple trick that I'm missing. So far, I've tried the following and it prints the course id and trainer id. But then how do I add all the courses that this trainer trains?
with open (alljson, 'r') as json: # alljson is a directory where multiple json file exists
read_json = json.load(json)
for i in ange(int(len(read_all_json['info']['items']))):
cid = read_json['info']['items'][i]['id'] # gets the course id
for j in range(int(len(read_json['info']['items'][i]['trainer'])))
trainer_id = read_json['info']['items'][i]['trainer'][j]['id'] # gets the trainer id
# then how do I get course id added to trainer id. for example
# 12456---123456***123457***123454***12454
# trainer id--- all the courses that this instructor teaches addind ***
CodePudding user response:
Assuming each trainer has a unique id, you can create a dict of lists, where the keys are trainer ids and the values are lists of course ids:
import os, json
rootdir = 'tmp/test1'
trainers = {}
for root, dirs, files in os.walk(rootdir):
for filename in files:
if os.path.splitext(filename)[1] != '.json':
continue
filepath = os.path.join(root, filename)
with open(filepath) as stream:
data = json.load(stream)
for item in data['info']['items']:
cid = item['id']
for trainer in item['trainer']:
key = (trainer['id'], trainer['url'])
if key not in trainers:
trainers[key] = []
trainers[key].append(str(cid))
output = 'trainers.txt'
with open(output, 'w') as stream:
for (tid, url), cids in sorted(trainers.items()):
stream.write('%s---%s---%s\n' % (tid, url, ';;;'.join(cids)))
Result:
257585701---/user/tania_guerra/---12456;;;7992450;;;7812756;;;156173119;;;562456
918585703---/user/tania_guerra/---7867833;;;14473169;;;156173119
test.json:
{
"info": {
"source_objects": [
{
"type": "sub-category",
"id": 277438897
}
],
"item_type": "course",
"items": [
{
"_class": "course",
"id": 156173119,
"is_paid": null,
"trainer": [
{
"id": 257585701,
"url": "/user/tania_guerra/"
}
]
},
{
"_class": "course",
"id": 12456,
"is_paid": null,
"trainer": [
{
"id": 257585701,
"url": "/user/tania_guerra/"
}
]
}
]
}
}
CodePudding user response:
I think it's easiest to use a dict
or better a defaultdict[int->List[int]]
something like
from collections import defaultdict
with open(alljson, "r") as json:
items = json["info"]["items"]
trainer_course_mapping = defaultdict(list)
for item in items:
trainers = item["trainer"]
for trainer in trainers:
trainer_course_mapping[trainer["id"]].append(item["id"])