I'm storing data from an API and for some response of different country, some of the subsets is not provided, thus when I tried to subset and use the .get()
method it return KeyError.
I'm wondering how I can ignore these variable and return null value when the data is not provided by the API response.
Below is the json response of two different country: businessreport_ES.json
"salesAndTrafficByAsin": [{
"parentAsin": "AX0003",
"childAsin": "AXC0001",
"sku": "AXC1",
"salesByAsin": {
"unitsOrdered": 1,
"orderedProductSales": {
"amount": 31.06,
"currencyCode": "EUR"
},
"totalOrderItems": 1
},
"trafficByAsin": {
"browserSessions": 0,
"mobileAppSessions": 1,
"sessions": 1,
"browserSessionPercentage": 0.0,
"mobileAppSessionPercentage": 50.0,
"sessionPercentage": 14.29,
"browserPageViews": 0,
"mobileAppPageViews": 9,
"pageViews": 9,
"browserPageViewsPercentage": 0.0,
"mobileAppPageViewsPercentage": 90.0,
"pageViewsPercentage": 56.25,
"buyBoxPercentage": 100.0,
"unitSessionPercentage": 100.0
}]
businessreport_UK.json
"salesAndTrafficByAsin": [{
"parentAsin": "AX0003",
"childAsin": "AXC0001",
"sku": "AXC1",
"salesByAsin": {
"unitsOrdered": 0,
"unitsOrderedB2B": 0,
"orderedProductSales": {
"amount": 0.0,
"currencyCode": "GBP"
},
"orderedProductSalesB2B": {
"amount": 0.0,
"currencyCode": "GBP"
},
"totalOrderItems": 0,
"totalOrderItemsB2B": 0
},
"trafficByAsin": {
"browserSessions": 3,
"browserSessionsB2B": 0,
"mobileAppSessions": 12,
"mobileAppSessionsB2B": 0,
"sessions": 15,
"sessionsB2B": 0,
"browserSessionPercentage": 0.16,
"browserSessionPercentageB2B": 0.0,
"mobileAppSessionPercentage": 0.47,
"mobileAppSessionPercentageB2B": 0.0,
"sessionPercentage": 0.34,
"sessionPercentageB2B": 0.0,
"browserPageViews": 3,
"browserPageViewsB2B": 0,
"mobileAppPageViews": 15,
"mobileAppPageViewsB2B": 0,
"pageViews": 18,
"pageViewsB2B": 0,
"browserPageViewsPercentage": 0.12,
"browserPageViewsPercentageB2B": 0.0,
"mobileAppPageViewsPercentage": 0.46,
"mobileAppPageViewsPercentageB2B": 0.0,
"pageViewsPercentage": 0.31,
"pageViewsPercentageB2B": 0.0,
"buyBoxPercentage": 0.0,
"buyBoxPercentageB2B": 0.0,
"unitSessionPercentage": 0.0,
"unitSessionPercentageB2B": 0.0
}
}]
Below is my code:
f = open(f'./responses/businessreport_{marketplace}.json')
jsondata = json.load(f)
salesAndTrafficByAsin = []
for item in jsondata['salesAndTrafficByAsin']:
salesAndTrafficByAsin.append({
"date": pd.to_datetime(datef),
"parentAsin": item.get('parentAsin'),
"childAsin": item.get('childAsin'),
'unitsOrdered': item["salesByAsin"].get('unitsOrdered'),
'unitsOrderedB2B': item["salesByAsin"].get('unitsOrderedB2B'),
'orderedProductSales': item["salesByAsin"]['orderedProductSales'].get('amount'),
'currencyCode': item["salesByAsin"]['orderedProductSales'].get('currencyCode'),
'orderedProductSales_B2B': item["salesByAsin"]['orderedProductSalesB2B'].get('amount'),
'currencyCode_B2B': item["salesByAsin"]['orderedProductSalesB2B'].get('currencyCode'),
'browserSessions': item["trafficByAsin"].get('browserSessions'),
"browserSessionsB2B": item["trafficByAsin"].get('browserSessionsB2B'),
"mobileAppSessions": item["trafficByAsin"].get('mobileAppSessions'),
"mobileAppSessionsB2B": item["trafficByAsin"].get('mobileAppSessionsB2B'),
"sessions": item["trafficByAsin"].get('sessions'),
"sessionsB2B": item["trafficByAsin"].get('sessionsB2B'),
"browserSessionPercentage": item["trafficByAsin"].get('browserSessionPercentage'),
"browserSessionPercentageB2B": item["trafficByAsin"].get('browserSessionPercentageB2B'),
"mobileAppSessionPercentage": item["trafficByAsin"].get('mobileAppSessionPercentage'),
"mobileAppSessionPercentageB2B": item["trafficByAsin"].get('mobileAppSessionPercentageB2B'),
"sessionPercentage": item["trafficByAsin"].get('sessionPercentage'),
"sessionPercentageB2B": item["trafficByAsin"].get('sessionPercentageB2B'),
"browserPageViews": item["trafficByAsin"].get('browserPageViews'),
"browserPageViewsB2B": item["trafficByAsin"].get('browserPageViewsB2B'),
"mobileAppPageViews": item["trafficByAsin"].get('mobileAppPageViews'),
"mobileAppPageViewsB2B": item["trafficByAsin"].get('mobileAppPageViewsB2B'),
"pageViews": item["trafficByAsin"].get('pageViews'),
"pageViewsB2B": item["trafficByAsin"].get('pageViewsB2B'),
"browserPageViewsPercentage": item["trafficByAsin"].get('browserPageViewsPercentage'),
"browserPageViewsPercentageB2B": item["trafficByAsin"].get('browserPageViewsPercentageB2B'),
"mobileAppPageViewsPercentage": item["trafficByAsin"].get('mobileAppPageViewsPercentage'),
"mobileAppPageViewsPercentageB2B": item["trafficByAsin"].get('mobileAppPageViewsPercentageB2B'),
"pageViewsPercentage": item["trafficByAsin"].get('pageViewsPercentage'),
"pageViewsPercentageB2B": item["trafficByAsin"].get('pageViewsPercentageB2B'),
"buyBoxPercentage": item["trafficByAsin"].get('buyBoxPercentage'),
"buyBoxPercentageB2B": item["trafficByAsin"].get('buyBoxPercentageB2B'),
})
So the difference here is that buesinessreport_UK.json have the orderedProductSalesB2B subset while buesinessreport_ES.json dont. So the code return the error as shown below when trying to loop the buesinessreport_ES.json data
46 salesAndTrafficByDate = []
47 for item in jsondata['salesAndTrafficByDate']:
48 salesAndTrafficByDate.append({
49 "date": pd.to_datetime(item.get('date')),
50 "orderedProductSales": item["salesByDate"]['orderedProductSales'].get('amount'),
51 "CurrencyCode": item["salesByDate"]['orderedProductSales'].get('currencyCode'),
---> 52 "orderedProductSales_B2B": item["salesByDate"]['orderedProductSalesB2B'].get('amount'),
53 "CurrencyCode_B2B": item["salesByDate"]['orderedProductSalesB2B'].get('currencyCode'),
54 "unitsOrdered": item["salesByDate"].get("unitsOrdered"),
55 "unitsOrdered_B2B": item["salesByDate"].get("unitsOrderedB2B"),
56 "totalOrderItems": item["salesByDate"].get("totalOrderItems"),
57 "totalOrderItems_B2B": item["salesByDate"].get("totalOrderItemsB2B"),
58 "averageSalesPerOrderItem": item["salesByDate"]["averageSalesPerOrderItem"].get("amount"),
59 "averageSalesPerOrderItem_B2B": item["salesByDate"]["averageSalesPerOrderItemB2B"].get("amount"),
60 "averageUnitsPerOrderItem": item["salesByDate"].get("averageUnitsPerOrderItem"),
61 "averageUnitsPerOrderItem_B2B": item["salesByDate"].get("averageUnitsPerOrderItem"),
62 "averageSellingPrice": item["salesByDate"]["averageSellingPrice"].get("amount"),
63 "averageSellingPrice_B2B": item["salesByDate"]["averageSellingPriceB2B"].get("amount"),
64 "unitsRefunded": item["salesByDate"].get("unitsRefunded"),
65 "refundRate": item["salesByDate"].get("refundRate"),
66 "claimsGranted": item["salesByDate"].get("claimsGranted"),
67 "claimsAmount": item["salesByDate"]["claimsAmount"].get("amount"),
...
71 })
72 salesAndTrafficByAsin = []
74 for item in jsondata['salesAndTrafficByAsin']:
KeyError: 'orderedProductSalesB2B'
CodePudding user response:
Imho you should use multiple get
s chained, cause that's what they were designed for i.a.
I wouldn't recommend it to be honest, but you can define a method to "safely" get values from a nested dictionary.
- loop over "nested keys"
- reassign the result
- return the result OR default if the key is not present at some level of nesting
def nested_get(d, *keys, default=None):
"""
Return value from nested dictionary
nested_get(d, "a","b") == d["a"]["b"]
"""
for k in keys:
try:
d = d[k]
except (TypeError, ValueError):
return default
return d
a = {"w":{"d":5}}
b = {"x":{"D": 5}}
value = nested_get(b,"x","D")
print(value) # This prints 5
value = nested_get(a,"w","d","t")
print(value) # This prints "None"
value = nested_get(a,"w","d","t", default = 11)
print(value) # This prints 11