Home > database >  How to parse and get specific data from a huge json file to implement search in python
How to parse and get specific data from a huge json file to implement search in python

Time:11-21

I have a json file with lot of information so I'm trying to just extract specific data where there is a position and I need to get the immediate name data, also trying to implement search in python. I'm uploading a part of sample json data from the file ex.json

`

{
  "storables": [
    {
      "columns": [
        {
          "position": 0,
          "header": {
            "id": "",
            "indexVersion": 35643,
            "generationNum": 35643,
            "name": "CAT",
            "author": "",
            "created": 1620247188226,
            "modified": 1668544812673,
            "modifiedBy": "",
            "owner": "",
            "isDeleted": false,
            "isHidden": false,
            "tags": [],
            "isExternal": false,
            "isDeprecated": false
          },
          "complete": true,
          "incompleteDetail": [],
          "isDerived": true,
          "dataType": "VARCHAR",
          "type": "ATTRIBUTE",
          "sageOutputColumnId": "",
          "defaultAggrType": "NONE",
          "ownerName": "",
          "ownerType": "WORKSHEET",
          "entityCategory": "DEFAULT",
          "spotiqPreference": "DEFAULT",
          "isAdditive": false,
          "indexType": "DEFAULT",
          "indexPriority": 1,
          "sources": [
            {
              "tableId": "",
              "tableName": "",
              "columnId": "",
              "columnName": "CATASTROPHE"
            }
          ],
          "synonyms": [],
          "injectedInlineValues": [],
          "precision": -1,
          "scale": 0,
          "isPrimaryKey": false,
          "isAttributionDimension": true,
          "derivationExpr": {
            "exprType": "LOGICAL_COLUMN_REFERENCE",
            "logicalColumn": {
              "header": {
                "id": "",
                "indexVersion": 35499,
                "generationNum": 35499,
                "name": "CATASTROPHE",
                "author": "",
                "created": 1630716505804,
                "modified": 1668211006637,
                "modifiedBy": "",
                "owner": "",
                "isDeleted": false,
                "isHidden": false,
                "schemaStripe": "",
                "databaseStripe": "",
                "tags": [],
                "isExternal": false,
                "isDeprecated": false
              }
            },
            "joinPaths": [
              {
                "joins": [
                  {
                    "sourceTable": "",
                    "destinationTable": "",
                    "content": {
                      "relationships": [
                        {
                          "sourceColumn": "",
                          "destinationColumn": ""
                        }
                      ],
                      "weight": 1
                    },
                    "joinType": "INNER",
                    "type": "USER_DEFINED",
                    "isOneToOneJoin": false,
                    "header": {
                      "id": "",
                      "indexVersion": 35499,
                      "generationNum": 35499,
                      "name": "",
                      "description": "",
                      "author": "",
                      "created": 1650658367043,
                      "modified": 1668211006686,
                      "modifiedBy": "",
                      "owner": "",
                      "isDeleted": false,
                      "isHidden": false,
                      "tags": [],
                      "type": "USER_DEFINED",
                      "isExternal": false,
                      "isDeprecated": false
                    },
                    "complete": true,
                    "incompleteDetail": [],
                    "sourceColumns": [
                      ""
                    ],
                    "targetColumns": [
                      ""
                    ]
                  }
                ]
              }
            ]
          }
        },
        {
          "position": 1,
          "header": {
            "id": "",
            "indexVersion": 35643,
            "generationNum": 35643,
            "name": "Peril",
            "author": "",
            "created": 1620247188226,
            "modified": 1668544812673,
            "modifiedBy": "",
            "owner": "",
            "isDeleted": false,
            "isHidden": false,
            "tags": [],
            "isExternal": false,
            "isDeprecated": false
          },
          "complete": true,
          "incompleteDetail": [],
          "isDerived": true,
          "dataType": "VARCHAR",
          "type": "ATTRIBUTE",
          "sageOutputColumnId": "",
          "defaultAggrType": "NONE",
          "ownerName": "",
          "ownerType": "WORKSHEET",
          "entityCategory": "DEFAULT",
          "spotiqPreference": "DEFAULT",
          "isAdditive": false,
          "indexType": "DEFAULT",
          "indexPriority": 1,
          "sources": [
            {
              "tableId": "",
              "tableName": "",
              "columnId": "",
              "columnName": "TYPE_OF"
            }
          ],
          "synonyms": [],
          "injectedInlineValues": [],
          "precision": -1,
          "scale": 0,
          "isPrimaryKey": false,
          "isAttributionDimension": true,
          "derivationExpr": {
            "exprType": "LOGICAL_COLUMN_REFERENCE",
            "logicalColumn": {
              "header": {
                "id": "",
                "indexVersion": 35499,
                "generationNum": 35499,
                "name": "TYPE_OF",
                "author": "",
                "created": 1630716505804,
                "modified": 1668211006637,
                "modifiedBy": "",
                "owner": "",
                "isDeleted": false,
                "isHidden": false,
                "schemaStripe": "",
                "databaseStripe": "",
                "tags": [],
                "isExternal": false,
                "isDeprecated": false
              }
            },
            "joinPaths": [
              {
                "joins": [
                  {
                    "sourceTable": "",
                    "destinationTable": "",
                    "content": {
                      "relationships": [
                        {
                          "sourceColumn": "",
                          "destinationColumn": ""
                        }
                      ],
                      "weight": 1
                    },
                    "joinType": "INNER",
                    "type": "USER_DEFINED",
                    "isOneToOneJoin": false,
                    "header": {
                      "id": "",
                      "indexVersion": 35499,
                      "generationNum": 35499,
                      "name": "",
                      "description": "Copy of user table relationship",
                      "author": "",
                      "created": 1650658367043,
                      "modified": 1668211006686,
                      "modifiedBy": "",
                      "owner": "",
                      "isDeleted": false,
                      "isHidden": false,
                      "tags": [],
                      "type": "USER_DEFINED",
                      "isExternal": false,
                      "isDeprecated": false
                    },
                    "complete": true,
                    "incompleteDetail": [],
                    "sourceColumns": [
                      ""
                    ],
                    "targetColumns": [
                      ""
                    ]
                  }
                ]
              }
            ]
          }
        },
        {
          "position": 2,
          "header": {
            "id": "",
            "indexVersion": 35643,
            "generationNum": 35643,
            "name": "Job",
            "author": "",
            "created": 1620247188226,
            "modified": 1668544812673,
            "modifiedBy": "",
            "owner": "",
            "isDeleted": false,
            "isHidden": false,
            "tags": [],
            "isExternal": false,
            "isDeprecated": false
          },
          "complete": true,
          "incompleteDetail": [],
          "isDerived": true,
          "dataType": "VARCHAR",
          "type": "ATTRIBUTE",
          "sageOutputColumnId": "",
          "defaultAggrType": "NONE",
          "ownerName": "",
          "ownerType": "WORKSHEET",
          "entityCategory": "DEFAULT",
          "spotiqPreference": "DEFAULT",
          "isAdditive": false,
          "indexType": "DEFAULT",
          "indexPriority": 1,
          "sources": [
            {
              "tableId": "",
              "tableName": "",
              "columnId": "",
              "columnName": ""
            }
          ],
          "synonyms": [],
          "injectedInlineValues": [],
          "precision": -1,
          "scale": 0,
          "isPrimaryKey": false,
          "isAttributionDimension": true,
          "derivationExpr": {
            "exprType": "LOGICAL_COLUMN_REFERENCE",
            "logicalColumn": {
              "header": {
                "id": "",
                "indexVersion": 35499,
                "generationNum": 35499,
                "name": "ROTATION_TRADE",
                "author": "",
                "created": 1630716505804,
                "modified": 1668211006637,
                "modifiedBy": "",
                "owner": "",
                "isDeleted": false,
                "isHidden": false,
                "schemaStripe": "",
                "databaseStripe": "",
                "tags": [],
                "isExternal": false,
                "isDeprecated": false
              }
            },
            "joinPaths": [
              {
                "joins": [
                  {
                    "sourceTable": "",
                    "destinationTable": "",
                    "content": {
                      "relationships": [
                        {
                          "sourceColumn": "",
                          "destinationColumn": ""
                        }
                      ],
                      "weight": 1
                    },
                    "joinType": "INNER",
                    "type": "USER_DEFINED",
                    "isOneToOneJoin": false,
                    "header": {
                      "id": "",
                      "indexVersion": 35499,
                      "generationNum": 35499,
                      "name": "",
                      "description": "Copy of user table relationship",
                      "author": "",
                      "created": 1650658367043,
                      "modified": 1668211006686,
                      "modifiedBy": "",
                      "owner": "",
                      "isDeleted": false,
                      "isHidden": false,
                      "tags": [],
                      "type": "USER_DEFINED",
                      "isExternal": false,
                      "isDeprecated": false
                    },
                    "complete": true,
                    "incompleteDetail": [],
                    "sourceColumns": [
                      ""
                    ],
                    "targetColumns": [
                      ""
                    ]
                  }
                ]
              }
            ]
          }
        },
        {
          "position": 3,
          "header": {
            "id": "",
            "indexVersion": 35643,
            "generationNum": 35643,
            "name": "Job Lenghth",
            "author": "",
            "created": 1620247188226,
            "modified": 1668544812673,
            "modifiedBy": "",
            "owner": "",
            "isDeleted": false,
            "isHidden": false,
            "tags": [],
            "isExternal": false,
            "isDeprecated": false
          },
          "complete": true,
          "incompleteDetail": [],
          "isDerived": true,
          "dataType": "VARCHAR",
          "type": "ATTRIBUTE",

`

`

with open('ex.json', 'r') as f:
    for line in f:
        if 'position' in line:
            for line in f: 
                if ' name: ' in line:
                    print(line)

` I tried this python piece of code but it din't work. I'm not sure how to return just the immediate name after the position. There are multiple name instances in the file but I need just the one after position...

CodePudding user response:

import json
with open('ex.json', 'r') as f:
    data = json.load(f)

Now you can access all json items just like you access any dictionary/object in python from data variable

CodePudding user response:

Your code may works, but you need to change the logic a little bit. Here is fast sketch of the solution:

prevWasPosition = False
with open('ex.json', 'r') as f:
    for line in f:
        if '"position":' in line:
            prevWasPosition = True
            continue

        if prevWasPosition and '"name":' in line:
            print(line)
        prevWasPosition = False

Beware that this solution based on the assumption that json file is properly formatted. If it is not, you may get unexpected results. Stronger solution would be to use read file chunk by chunk and parse it as json, but it's beyond the scope of this answer.

  • Related