Home > Software engineering >  Accessing nested JSON using python
Accessing nested JSON using python

Time:12-22

I am having a very hard time finding a proper way to print my result from the JSON that I have.

I searched for many hours but was not successful in finding the answer.

Here is the JSNO that I have:

json = \
{
  "Envelope": {
    "Body": {
      "GetCTProductsResponse": {
        "GetCTProductsResult": {
          "CTPRODUCT": [
            {
              "CODE": "TESLAAIR3",
              "PRODUCTGROUPCODE": "AIRPURIF",
              "NAME": "Tesla Air purifier AIR 3",
              "MANUFACTURER": "Tesla",
              "MANUFACTURERCODE": "TESLA",
              "QTTYINSTOCK": ">20",
              "TAX": 21,
              "PRICE": "69,9000",
              "RETAILPRICE": 0,
              "SHORT_DESCRIPTION": "",
              "WARRANTY": "24M",
              "EUR_ExchangeRate": "0,00",
              "BARCODE": "",
              "IMAGE_URL": ""
            },
            {
              "CODE": "SKV4140GL",
              "PRODUCTGROUPCODE": "AIRPURIF",
              "NAME": "Xiaomi MI SMART Antibacterial humidifier",
              "MANUFACTURER": "Xiaomi",
              "MANUFACTURERCODE": "XIAOMI",
              "QTTYINSTOCK": ">20",
              "TAX": 21,
              "PRICE": "39,0000",
              "RETAILPRICE": 0,
              "SHORT_DESCRIPTION": "",
              "WARRANTY": "2G",
              "EUR_ExchangeRate": "0,00",
              "BARCODE": "",
              "IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093216482.jpg"
            },
            {
              "CODE": "SKV4140GL",
              "PRODUCTGROUPCODE": "AIRPURIF",
              "NAME": "Xiaomi MI SMART Antibacterial humidifier",
              "MANUFACTURER": "Xiaomi",
              "MANUFACTURERCODE": "XIAOMI",
              "QTTYINSTOCK": ">20",
              "TAX": 21,
              "PRICE": "39,0000",
              "RETAILPRICE": 0,
              "SHORT_DESCRIPTION": "",
              "WARRANTY": "2G",
              "EUR_ExchangeRate": "0,00",
              "BARCODE": "",
              "IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093216098.jpg"
            },
            {
              "CODE": "SKV4140GL",
              "PRODUCTGROUPCODE": "AIRPURIF",
              "NAME": "Xiaomi MI SMART Antibacterial humidifier",
              "MANUFACTURER": "Xiaomi",
              "MANUFACTURERCODE": "XIAOMI",
              "QTTYINSTOCK": ">20",
              "TAX": 21,
              "PRICE": "39,0000",
              "RETAILPRICE": 0,
              "SHORT_DESCRIPTION": "",
              "WARRANTY": "2G",
              "EUR_ExchangeRate": "0,00",
              "BARCODE": "",
              "IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093215238.jpg"
            },
            {
              "CODE": "BHR4802GL",
              "PRODUCTGROUPCODE": "ZVUCNICI",
              "NAME": "Xiaomi Mi Portable Bluetooth Speaker Grey",
              "MANUFACTURER": "Xiaomi",
              "MANUFACTURERCODE": "XIAOMI",
              "QTTYINSTOCK": ">20",
              "TAX": 21,
              "PRICE": "17,0000",
              "RETAILPRICE": 0,
              "SHORT_DESCRIPTION": "",
              "WARRANTY": "2G",
              "EUR_ExchangeRate": "0,00",
              "BARCODE": "",
              "IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/BHR4802GL_1.jpg"
            }
          ]
        }
      }
    }
  }
}

Using Python, I want to access the CODE variable ("CODE": "TESLAAIR3")

I tried many things, dumps, load, loads etc. but nothing solved my problem.

Thank you.

CodePudding user response:

You can try:

a["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][0]["CODE"]

CodePudding user response:

This will print out all the codes:

for code in test["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"]:
    print(code["CODE"])

CodePudding user response:

Another way of doing this is the following:

data = {
  "Envelope": {
    "Body": {
      "GetCTProductsResponse": {
        "GetCTProductsResult": {
          "CTPRODUCT": [
            {
              "CODE": "TESLAAIR3",
              "PRODUCTGROUPCODE": "AIRPURIF",
              "NAME": "Tesla Air purifier AIR 3",
              "MANUFACTURER": "Tesla",
              "MANUFACTURERCODE": "TESLA",
              "QTTYINSTOCK": ">20",
              "TAX": 21,
              "PRICE": "69,9000",
              "RETAILPRICE": 0,
              "SHORT_DESCRIPTION": "",
              "WARRANTY": "24M",
              "EUR_ExchangeRate": "0,00",
              "BARCODE": "",
              "IMAGE_URL": ""
            },
            {
              "CODE": "SKV4140GL",
              "PRODUCTGROUPCODE": "AIRPURIF",
              "NAME": "Xiaomi MI SMART Antibacterial humidifier",
              "MANUFACTURER": "Xiaomi",
              "MANUFACTURERCODE": "XIAOMI",
              "QTTYINSTOCK": ">20",
              "TAX": 21,
              "PRICE": "39,0000",
              "RETAILPRICE": 0,
              "SHORT_DESCRIPTION": "",
              "WARRANTY": "2G",
              "EUR_ExchangeRate": "0,00",
              "BARCODE": "",
              "IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093216482.jpg"
            },
            {
              "CODE": "SKV4140GL",
              "PRODUCTGROUPCODE": "AIRPURIF",
              "NAME": "Xiaomi MI SMART Antibacterial humidifier",
              "MANUFACTURER": "Xiaomi",
              "MANUFACTURERCODE": "XIAOMI",
              "QTTYINSTOCK": ">20",
              "TAX": 21,
              "PRICE": "39,0000",
              "RETAILPRICE": 0,
              "SHORT_DESCRIPTION": "",
              "WARRANTY": "2G",
              "EUR_ExchangeRate": "0,00",
              "BARCODE": "",
              "IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093216098.jpg"
            },
            {
              "CODE": "SKV4140GL",
              "PRODUCTGROUPCODE": "AIRPURIF",
              "NAME": "Xiaomi MI SMART Antibacterial humidifier",
              "MANUFACTURER": "Xiaomi",
              "MANUFACTURERCODE": "XIAOMI",
              "QTTYINSTOCK": ">20",
              "TAX": 21,
              "PRICE": "39,0000",
              "RETAILPRICE": 0,
              "SHORT_DESCRIPTION": "",
              "WARRANTY": "2G",
              "EUR_ExchangeRate": "0,00",
              "BARCODE": "",
              "IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/SKV4140GL_201117093215238.jpg"
            },
            {
              "CODE": "BHR4802GL",
              "PRODUCTGROUPCODE": "ZVUCNICI",
              "NAME": "Xiaomi Mi Portable Bluetooth Speaker Grey",
              "MANUFACTURER": "Xiaomi",
              "MANUFACTURERCODE": "XIAOMI",
              "QTTYINSTOCK": ">20",
              "TAX": 21,
              "PRICE": "17,0000",
              "RETAILPRICE": 0,
              "SHORT_DESCRIPTION": "",
              "WARRANTY": "2G",
              "EUR_ExchangeRate": "0,00",
              "BARCODE": "",
              "IMAGE_URL": "http://www.ct4partners.ba/UploadDownload/ProductImages/BHR4802GL_1.jpg"
            }
          ]
        }
      }
    }
  }
}

and

import pandas as pd
import json
json_object = json.dumps(data)
results = pd.json_normalize(data)

Now, define the following function:

def flatten_nested_json_df(df):
    df = df.reset_index()
    s = (df.applymap(type) == list).all()
    list_columns = s[s].index.tolist()
    
    s = (df.applymap(type) == dict).all()
    dict_columns = s[s].index.tolist()

    
    while len(list_columns) > 0 or len(dict_columns) > 0:
        new_columns = []

        for col in dict_columns:
            horiz_exploded = pd.json_normalize(df[col]).add_prefix(f'{col}.')
            horiz_exploded.index = df.index
            df = pd.concat([df, horiz_exploded], axis=1).drop(columns=[col])
            new_columns.extend(horiz_exploded.columns) # inplace

        for col in list_columns:
            #print(f"exploding: {col}")
            df = df.drop(columns=[col]).join(df[col].explode().to_frame())
            new_columns.append(col)

        s = (df[new_columns].applymap(type) == list).all()
        list_columns = s[s].index.tolist()

        s = (df[new_columns].applymap(type) == dict).all()
        dict_columns = s[s].index.tolist()
    return df

and do this:

results = pd.json_normalize(data)
flatten_nested_json_df(results)

which returns a df from which you can pick anything you whish:

 index  \
0      0   
0      0   
0      0   
0      0   
0      0   

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.CODE  \
0                                          TESLAAIR3                       
0                                          SKV4140GL                       
0                                          SKV4140GL                       
0                                          SKV4140GL                       
0                                          BHR4802GL                       

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.PRODUCTGROUPCODE  \
0                                           AIRPURIF                                   
0                                           AIRPURIF                                   
0                                           AIRPURIF                                   
0                                           AIRPURIF                                   
0                                           ZVUCNICI                                   

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.NAME  \
0                           Tesla Air purifier AIR 3                       
0           Xiaomi MI SMART Antibacterial humidifier                       
0           Xiaomi MI SMART Antibacterial humidifier                       
0           Xiaomi MI SMART Antibacterial humidifier                       
0          Xiaomi Mi Portable Bluetooth Speaker Grey                       

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.MANUFACTURER  \
0                                              Tesla                               
0                                             Xiaomi                               
0                                             Xiaomi                               
0                                             Xiaomi                               
0                                             Xiaomi                               

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.MANUFACTURERCODE  \
0                                              TESLA                                   
0                                             XIAOMI                                   
0                                             XIAOMI                                   
0                                             XIAOMI                                   
0                                             XIAOMI                                   

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.QTTYINSTOCK  \
0                                             >20                              
0                                             >20                              
0                                             >20                              
0                                             >20                              
0                                             >20                              

   Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.TAX  \
0                                                 21                       
0                                                 21                       
0                                                 21                       
0                                                 21                       
0                                                 21                       

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.PRICE  \
0                                            69,9000                        
0                                            39,0000                        
0                                            39,0000                        
0                                            39,0000                        
0                                            17,0000                        

   Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.RETAILPRICE  \
0                                                  0                               
0                                                  0                               
0                                                  0                               
0                                                  0                               
0                                                  0                               

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.SHORT_DESCRIPTION  \
0                                                                                       
0                                                                                       
0                                                                                       
0                                                                                       
0                                                                                       

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.WARRANTY  \
0                                                24M                           
0                                                 2G                           
0                                                 2G                           
0                                                 2G                           
0                                                 2G                           

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.EUR_ExchangeRate  \
0                                               0,00                                   
0                                               0,00                                   
0                                               0,00                                   
0                                               0,00                                   
0                                               0,00                                   

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.BARCODE  \
0                                                                             
0                                                                             
0                                                                             
0                                                                             
0                                                                             

  Envelope.Body.GetCTProductsResponse.GetCTProductsResult.CTPRODUCT.IMAGE_URL  
0                                                                              
0  http://www.ct4partners.ba/UploadDownload/Produ...                           
0  http://www.ct4partners.ba/UploadDownload/Produ...                           
0  http://www.ct4partners.ba/UploadDownload/Produ...                           
0  http://www.ct4partners.ba/UploadDownload/Produ...         

It also has the advantage of showing you the path to what you want in the column name, so that:

 data['Envelope']['Body']['GetCTProductsResponse']['GetCTProductsResult']['CTPRODUCT'][0]['CODE']             

CodePudding user response:

I'd suggest thinking about the structure of the object. This is somewhat overdetermined code, but it will help you to identify any bugs more quickly:

import json
json_string = {} # Your whole json file from the question
ctproducts = None
try:
    ct_products_object = json.loads(json_string)
    ct_envelope = ct_products_object.get("Envelope") # See below
    ct_body = ct_envelope.get("Body")
    get_ct_products = ct_body.get("GetCTProductsResponse")
    get_ct_result = get_ct_products.get("GetCTProductsResult")
    ctproducts = get_ct_result.get("CTPRODUCT")
except json.JSONDecodeError as e:
    print(f"This is not a valid JSON file because {str(e)}")
    # Not a json file, handle that
except AttributeError as e:
    print(str(e))
    # You got one of the keys wrong, handle that.
if ctproducts:
    for ctproduct in ctproducts:
        code = ctproduct.get("CODE")
        print(f"CODE: {code}")

It looks as though you're not actually getting the root of the JSON file in your code, so you probable want to skip over the ct_envelope and use ct_body = ct_products_object.get("Body"). If that throws an AttributeError, try get_ct_products = ct_products_object.get("GetCTProductsResponse") etc. until you find which part of the object is the root of your current object.

CodePudding user response:

You could write a function find_json which gives you the Path to the first occurrence of the key 'CODE' and its value:

def find_json(key, json, acc=[]):
    if type(json) == list:
        for i, x in enumerate(json):
            if type(x) == list or type(x) == dict:
                return find_json(key, x, acc   [i])
    elif type(json) == dict:
        for k, v in json.items():
            if k == key:
                return acc   [k], v
            elif type(v) == list or type(v) == dict:
                return find_json(key, json[k], acc   [k])

which you run by:

find_json('CODE', json)
# returning the path to the first 'CODE' in `json` and the value:
(['Envelope',
  'Body',
  'GetCTProductsResponse',
  'GetCTProductsResult',
  'CTPRODUCT',
  0,
  'CODE'],
 'TESLAAIR3')

We can improve the output by generating the code for the call to the first CODE by:

from functools import reduce

def path_to_code(lst, dict_name="json"): 
    return reduce(lambda x,y: f"{x}[\"{y}\"]" if type(y) == str else f"{x}[{y}]", lst, dict_name)

def find_json(key, json, acc=[]):
    if type(json) == list:
        for i, x in enumerate(json):
            if type(x) == list or type(x) == dict:
                return find_json(key, x, acc   [i])
    elif type(json) == dict:
        for k, v in json.items():
            if k == key:
                return path_to_code(acc   [k], "json"), v
            elif type(v) == list or type(v) == dict:
                return find_json(key, json[k], acc   [k])
        
find_json('CODE', json)

# which outputs:
('json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][0]["CODE"]',
 'TESLAAIR3')

And finally, to find all paths to 'CODE' key in this json:

from functools import reduce

def path_to_code(lst, dict_name="json"): 
    return reduce(lambda x,y: f"{x}[\"{y}\"]" if type(y) == str else f"{x}[{y}]", lst, dict_name)

def find_all_json(key, json):
    res = []
    def _find_all_json(key, json, acc=[]):
        nonlocal res
        if type(json) == list:
            for i, x in enumerate(json):
                if type(x) == list or type(x) == dict:
                    _find_all_json(key, x, acc   [i])
        elif type(json) == dict:
            for k, v in json.items():
                if k == key:
                    res = res   [{"cmd": path_to_code(acc   [k], "json"), "val": v}]
                if type(v) == list or type(v) == dict:
                    _find_all_json(key, v, acc   [k])
    _find_all_json(key, json)
    return res
        
find_all_json('CODE', json)
# [{'cmd': 'json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][0]["CODE"]',
#   'val': 'TESLAAIR3'},
#  {'cmd': 'json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][1]["CODE"]',
#   'val': 'SKV4140GL'},
#  {'cmd': 'json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][2]["CODE"]',
#   'val': 'SKV4140GL'},
#  {'cmd': 'json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][3]["CODE"]',
#   'val': 'SKV4140GL'},
#  {'cmd': 'json["Envelope"]["Body"]["GetCTProductsResponse"]["GetCTProductsResult"]["CTPRODUCT"][4]["CODE"]',
#   'val': 'BHR4802GL'}]

  • Related