I am having a very hard time finding a proper way to print my result from the JSON that I have.
I searched for many hours but was not successful in finding the answer.
Here is the JSNO that I have:
json = \
{
"Envelope": {
"Body": {
"GetCTProductsResponse": {
"GetCTProductsResult": {
"CTPRODUCT": [
{
"CODE": "TESLAAIR3",
"PRODUCTGROUPCODE": "AIRPURIF",
"NAME": "Tesla Air purifier AIR 3",
"MANUFACTURER": "Tesla",
"MANUFACTURERCODE": "TESLA",
"QTTYINSTOCK": ">20",
"TAX": 21,
"PRICE": "69,9000",
"RETAILPRICE": 0,
"SHORT_DESCRIPTION": "",
"WARRANTY": "24M",
"EUR_ExchangeRate": "0,00",
"BARCODE": "",
"IMAGE_URL": ""
},
{
"CODE": "SKV4140GL",
"PRODUCTGROUPCODE": "AIRPURIF",
"NAME": "Xiaomi MI SMART Antibacterial humidifier",
"MANUFACTURER": "Xiaomi",
"MANUFACTURERCODE": "XIAOMI",
"QTTYINSTOCK": ">20",
"TAX": 21,
"PRICE": "39,0000",
"RETAILPRICE": 0,
"SHORT_DESCRIPTION": "",
"WARRANTY": "2G",
"EUR_ExchangeRate": "0,00",
"BARCODE": "",
"IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093216482.jpg"
},
{
"CODE": "SKV4140GL",
"PRODUCTGROUPCODE": "AIRPURIF",
"NAME": "Xiaomi MI SMART Antibacterial humidifier",
"MANUFACTURER": "Xiaomi",
"MANUFACTURERCODE": "XIAOMI",
"QTTYINSTOCK": ">20",
"TAX": 21,
"PRICE": "39,0000",
"RETAILPRICE": 0,
"SHORT_DESCRIPTION": "",
"WARRANTY": "2G",
"EUR_ExchangeRate": "0,00",
"BARCODE": "",
"IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093216098.jpg"
},
{
"CODE": "SKV4140GL",
"PRODUCTGROUPCODE": "AIRPURIF",
"NAME": "Xiaomi MI SMART Antibacterial humidifier",
"MANUFACTURER": "Xiaomi",
"MANUFACTURERCODE": "XIAOMI",
"QTTYINSTOCK": ">20",
"TAX": 21,
"PRICE": "39,0000",
"RETAILPRICE": 0,
"SHORT_DESCRIPTION": "",
"WARRANTY": "2G",
"EUR_ExchangeRate": "0,00",
"BARCODE": "",
"IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093215238.jpg"
},
{
"CODE": "BHR4802GL",
"PRODUCTGROUPCODE": "ZVUCNICI",
"NAME": "Xiaomi Mi Portable Bluetooth Speaker Grey",
"MANUFACTURER": "Xiaomi",
"MANUFACTURERCODE": "XIAOMI",
"QTTYINSTOCK": ">20",
"TAX": 21,
"PRICE": "17,0000",
"RETAILPRICE": 0,
"SHORT_DESCRIPTION": "",
"WARRANTY": "2G",
"EUR_ExchangeRate": "0,00",
"BARCODE": "",
"IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/BHR4802GL_1.jpg"
}
]
}
}
}
}
}
Using Python, I want to access the CODE variable ("CODE": "TESLAAIR3"
)
I tried many things, dumps, load, loads etc. but nothing solved my problem.
Thank you.
CodePudding user response:
You can try:
a["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][0]["CODE"]
CodePudding user response:
This will print out all the codes:
for code in test["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"]:
print(code["CODE"])
CodePudding user response:
Another way of doing this is the following:
data = {
"Envelope": {
"Body": {
"GetCTProductsResponse": {
"GetCTProductsResult": {
"CTPRODUCT": [
{
"CODE": "TESLAAIR3",
"PRODUCTGROUPCODE": "AIRPURIF",
"NAME": "Tesla Air purifier AIR 3",
"MANUFACTURER": "Tesla",
"MANUFACTURERCODE": "TESLA",
"QTTYINSTOCK": ">20",
"TAX": 21,
"PRICE": "69,9000",
"RETAILPRICE": 0,
"SHORT_DESCRIPTION": "",
"WARRANTY": "24M",
"EUR_ExchangeRate": "0,00",
"BARCODE": "",
"IMAGE_URL": ""
},
{
"CODE": "SKV4140GL",
"PRODUCTGROUPCODE": "AIRPURIF",
"NAME": "Xiaomi MI SMART Antibacterial humidifier",
"MANUFACTURER": "Xiaomi",
"MANUFACTURERCODE": "XIAOMI",
"QTTYINSTOCK": ">20",
"TAX": 21,
"PRICE": "39,0000",
"RETAILPRICE": 0,
"SHORT_DESCRIPTION": "",
"WARRANTY": "2G",
"EUR_ExchangeRate": "0,00",
"BARCODE": "",
"IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093216482.jpg"
},
{
"CODE": "SKV4140GL",
"PRODUCTGROUPCODE": "AIRPURIF",
"NAME": "Xiaomi MI SMART Antibacterial humidifier",
"MANUFACTURER": "Xiaomi",
"MANUFACTURERCODE": "XIAOMI",
"QTTYINSTOCK": ">20",
"TAX": 21,
"PRICE": "39,0000",
"RETAILPRICE": 0,
"SHORT_DESCRIPTION": "",
"WARRANTY": "2G",
"EUR_ExchangeRate": "0,00",
"BARCODE": "",
"IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093216098.jpg"
},
{
"CODE": "SKV4140GL",
"PRODUCTGROUPCODE": "AIRPURIF",
"NAME": "Xiaomi MI SMART Antibacterial humidifier",
"MANUFACTURER": "Xiaomi",
"MANUFACTURERCODE": "XIAOMI",
"QTTYINSTOCK": ">20",
"TAX": 21,
"PRICE": "39,0000",
"RETAILPRICE": 0,
"SHORT_DESCRIPTION": "",
"WARRANTY": "2G",
"EUR_ExchangeRate": "0,00",
"BARCODE": "",
"IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093215238.jpg"
},
{
"CODE": "BHR4802GL",
"PRODUCTGROUPCODE": "ZVUCNICI",
"NAME": "Xiaomi Mi Portable Bluetooth Speaker Grey",
"MANUFACTURER": "Xiaomi",
"MANUFACTURERCODE": "XIAOMI",
"QTTYINSTOCK": ">20",
"TAX": 21,
"PRICE": "17,0000",
"RETAILPRICE": 0,
"SHORT_DESCRIPTION": "",
"WARRANTY": "2G",
"EUR_ExchangeRate": "0,00",
"BARCODE": "",
"IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/BHR4802GL_1.jpg"
}
]
}
}
}
}
}
and
import pandas as pd
import json
json_object = json.dumps(data)
results = pd.json_normalize(data)
Now, define the following function:
def flatten_nested_json_df(df):
df = df.reset_index()
s = (df.applymap(type) == list).all()
list_columns = s[s].index.tolist()
s = (df.applymap(type) == dict).all()
dict_columns = s[s].index.tolist()
while len(list_columns) > 0 or len(dict_columns) > 0:
new_columns = []
for col in dict_columns:
horiz_exploded = pd.json_normalize(df[col]).add_prefix(f'{col}.')
horiz_exploded.index = df.index
df = pd.concat([df, horiz_exploded], axis=1).drop(columns=[col])
new_columns.extend(horiz_exploded.columns) # inplace
for col in list_columns:
#print(f"exploding: {col}")
df = df.drop(columns=[col]).join(df[col].explode().to_frame())
new_columns.append(col)
s = (df[new_columns].applymap(type) == list).all()
list_columns = s[s].index.tolist()
s = (df[new_columns].applymap(type) == dict).all()
dict_columns = s[s].index.tolist()
return df
and do this:
results = pd.json_normalize(data)
flatten_nested_json_df(results)
which returns a df from which you can pick anything you whish:
index \
0 0
0 0
0 0
0 0
0 0
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.CODE \
0 TESLAAIR3
0 SKV4140GL
0 SKV4140GL
0 SKV4140GL
0 BHR4802GL
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.PRODUCTGROUPCODE \
0 AIRPURIF
0 AIRPURIF
0 AIRPURIF
0 AIRPURIF
0 ZVUCNICI
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.NAME \
0 Tesla Air purifier AIR 3
0 Xiaomi MI SMART Antibacterial humidifier
0 Xiaomi MI SMART Antibacterial humidifier
0 Xiaomi MI SMART Antibacterial humidifier
0 Xiaomi Mi Portable Bluetooth Speaker Grey
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.MANUFACTURER \
0 Tesla
0 Xiaomi
0 Xiaomi
0 Xiaomi
0 Xiaomi
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.MANUFACTURERCODE \
0 TESLA
0 XIAOMI
0 XIAOMI
0 XIAOMI
0 XIAOMI
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.QTTYINSTOCK \
0 >20
0 >20
0 >20
0 >20
0 >20
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.TAX \
0 21
0 21
0 21
0 21
0 21
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.PRICE \
0 69,9000
0 39,0000
0 39,0000
0 39,0000
0 17,0000
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.RETAILPRICE \
0 0
0 0
0 0
0 0
0 0
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.SHORT_DESCRIPTION \
0
0
0
0
0
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.WARRANTY \
0 24M
0 2G
0 2G
0 2G
0 2G
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.EUR_ExchangeRate \
0 0,00
0 0,00
0 0,00
0 0,00
0 0,00
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.BARCODE \
0
0
0
0
0
Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.IMAGE_URL
0
0 http://www.ct4partners.ba/UploadDownload/Produ...
0 http://www.ct4partners.ba/UploadDownload/Produ...
0 http://www.ct4partners.ba/UploadDownload/Produ...
0 http://www.ct4partners.ba/UploadDownload/Produ...
It also has the advantage of showing you the path to what you want in the column name, so that:
data['Envelope']['Body']['GetCTProductsResponse']['GetCTProductsResult']['CTPRODUCT'][0]['CODE']
CodePudding user response:
I'd suggest thinking about the structure of the object. This is somewhat overdetermined code, but it will help you to identify any bugs more quickly:
import json
json_string = {} # Your whole json file from the question
ctproducts = None
try:
ct_products_object = json.loads(json_string)
ct_envelope = ct_products_object.get("Envelope") # See below
ct_body = ct_envelope.get("Body")
get_ct_products = ct_body.get("GetCTProductsResponse")
get_ct_result = get_ct_products.get("GetCTProductsResult")
ctproducts = get_ct_result.get("CTPRODUCT")
except json.JSONDecodeError as e:
print(f"This is not a valid JSON file because {str(e)}")
# Not a json file, handle that
except AttributeError as e:
print(str(e))
# You got one of the keys wrong, handle that.
if ctproducts:
for ctproduct in ctproducts:
code = ctproduct.get("CODE")
print(f"CODE: {code}")
It looks as though you're not actually getting the root of the JSON file in your code, so you probable want to skip over the ct_envelope and use ct_body = ct_products_object.get("Body")
. If that throws an AttributeError, try get_ct_products = ct_products_object.get("GetCTProductsResponse")
etc. until you find which part of the object is the root of your current object.
CodePudding user response:
You could write a function find_json
which gives you the Path to the first occurrence of the key 'CODE' and its value:
def find_json(key, json, acc=[]):
if type(json) == list:
for i, x in enumerate(json):
if type(x) == list or type(x) == dict:
return find_json(key, x, acc [i])
elif type(json) == dict:
for k, v in json.items():
if k == key:
return acc [k], v
elif type(v) == list or type(v) == dict:
return find_json(key, json[k], acc [k])
which you run by:
find_json('CODE', json)
# returning the path to the first 'CODE' in `json` and the value:
(['Envelope',
'Body',
'GetCTProductsResponse',
'GetCTProductsResult',
'CTPRODUCT',
0,
'CODE'],
'TESLAAIR3')
We can improve the output by generating the code for the call to the first CODE
by:
from functools import reduce
def path_to_code(lst, dict_name="json"):
return reduce(lambda x,y: f"{x}[\"{y}\"]" if type(y) == str else f"{x}[{y}]", lst, dict_name)
def find_json(key, json, acc=[]):
if type(json) == list:
for i, x in enumerate(json):
if type(x) == list or type(x) == dict:
return find_json(key, x, acc [i])
elif type(json) == dict:
for k, v in json.items():
if k == key:
return path_to_code(acc [k], "json"), v
elif type(v) == list or type(v) == dict:
return find_json(key, json[k], acc [k])
find_json('CODE', json)
# which outputs:
('json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][0]["CODE"]',
'TESLAAIR3')
And finally, to find all paths to 'CODE' key in this json:
from functools import reduce
def path_to_code(lst, dict_name="json"):
return reduce(lambda x,y: f"{x}[\"{y}\"]" if type(y) == str else f"{x}[{y}]", lst, dict_name)
def find_all_json(key, json):
res = []
def _find_all_json(key, json, acc=[]):
nonlocal res
if type(json) == list:
for i, x in enumerate(json):
if type(x) == list or type(x) == dict:
_find_all_json(key, x, acc [i])
elif type(json) == dict:
for k, v in json.items():
if k == key:
res = res [{"cmd": path_to_code(acc [k], "json"), "val": v}]
if type(v) == list or type(v) == dict:
_find_all_json(key, v, acc [k])
_find_all_json(key, json)
return res
find_all_json('CODE', json)
# [{'cmd': 'json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][0]["CODE"]',
# 'val': 'TESLAAIR3'},
# {'cmd': 'json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][1]["CODE"]',
# 'val': 'SKV4140GL'},
# {'cmd': 'json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][2]["CODE"]',
# 'val': 'SKV4140GL'},
# {'cmd': 'json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][3]["CODE"]',
# 'val': 'SKV4140GL'},
# {'cmd': 'json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][4]["CODE"]',
# 'val': 'BHR4802GL'}]