I have a json file with lot of information so I'm trying to just extract specific data where there is a position and I need to get the immediate name data, also trying to implement search in python. I'm uploading a part of sample json data from the file ex.json
`
{
"storables": [
{
"columns": [
{
"position": 0,
"header": {
"id": "",
"indexVersion": 35643,
"generationNum": 35643,
"name": "CAT",
"author": "",
"created": 1620247188226,
"modified": 1668544812673,
"modifiedBy": "",
"owner": "",
"isDeleted": false,
"isHidden": false,
"tags": [],
"isExternal": false,
"isDeprecated": false
},
"complete": true,
"incompleteDetail": [],
"isDerived": true,
"dataType": "VARCHAR",
"type": "ATTRIBUTE",
"sageOutputColumnId": "",
"defaultAggrType": "NONE",
"ownerName": "",
"ownerType": "WORKSHEET",
"entityCategory": "DEFAULT",
"spotiqPreference": "DEFAULT",
"isAdditive": false,
"indexType": "DEFAULT",
"indexPriority": 1,
"sources": [
{
"tableId": "",
"tableName": "",
"columnId": "",
"columnName": "CATASTROPHE"
}
],
"synonyms": [],
"injectedInlineValues": [],
"precision": -1,
"scale": 0,
"isPrimaryKey": false,
"isAttributionDimension": true,
"derivationExpr": {
"exprType": "LOGICAL_COLUMN_REFERENCE",
"logicalColumn": {
"header": {
"id": "",
"indexVersion": 35499,
"generationNum": 35499,
"name": "CATASTROPHE",
"author": "",
"created": 1630716505804,
"modified": 1668211006637,
"modifiedBy": "",
"owner": "",
"isDeleted": false,
"isHidden": false,
"schemaStripe": "",
"databaseStripe": "",
"tags": [],
"isExternal": false,
"isDeprecated": false
}
},
"joinPaths": [
{
"joins": [
{
"sourceTable": "",
"destinationTable": "",
"content": {
"relationships": [
{
"sourceColumn": "",
"destinationColumn": ""
}
],
"weight": 1
},
"joinType": "INNER",
"type": "USER_DEFINED",
"isOneToOneJoin": false,
"header": {
"id": "",
"indexVersion": 35499,
"generationNum": 35499,
"name": "",
"description": "",
"author": "",
"created": 1650658367043,
"modified": 1668211006686,
"modifiedBy": "",
"owner": "",
"isDeleted": false,
"isHidden": false,
"tags": [],
"type": "USER_DEFINED",
"isExternal": false,
"isDeprecated": false
},
"complete": true,
"incompleteDetail": [],
"sourceColumns": [
""
],
"targetColumns": [
""
]
}
]
}
]
}
},
{
"position": 1,
"header": {
"id": "",
"indexVersion": 35643,
"generationNum": 35643,
"name": "Peril",
"author": "",
"created": 1620247188226,
"modified": 1668544812673,
"modifiedBy": "",
"owner": "",
"isDeleted": false,
"isHidden": false,
"tags": [],
"isExternal": false,
"isDeprecated": false
},
"complete": true,
"incompleteDetail": [],
"isDerived": true,
"dataType": "VARCHAR",
"type": "ATTRIBUTE",
"sageOutputColumnId": "",
"defaultAggrType": "NONE",
"ownerName": "",
"ownerType": "WORKSHEET",
"entityCategory": "DEFAULT",
"spotiqPreference": "DEFAULT",
"isAdditive": false,
"indexType": "DEFAULT",
"indexPriority": 1,
"sources": [
{
"tableId": "",
"tableName": "",
"columnId": "",
"columnName": "TYPE_OF"
}
],
"synonyms": [],
"injectedInlineValues": [],
"precision": -1,
"scale": 0,
"isPrimaryKey": false,
"isAttributionDimension": true,
"derivationExpr": {
"exprType": "LOGICAL_COLUMN_REFERENCE",
"logicalColumn": {
"header": {
"id": "",
"indexVersion": 35499,
"generationNum": 35499,
"name": "TYPE_OF",
"author": "",
"created": 1630716505804,
"modified": 1668211006637,
"modifiedBy": "",
"owner": "",
"isDeleted": false,
"isHidden": false,
"schemaStripe": "",
"databaseStripe": "",
"tags": [],
"isExternal": false,
"isDeprecated": false
}
},
"joinPaths": [
{
"joins": [
{
"sourceTable": "",
"destinationTable": "",
"content": {
"relationships": [
{
"sourceColumn": "",
"destinationColumn": ""
}
],
"weight": 1
},
"joinType": "INNER",
"type": "USER_DEFINED",
"isOneToOneJoin": false,
"header": {
"id": "",
"indexVersion": 35499,
"generationNum": 35499,
"name": "",
"description": "Copy of user table relationship",
"author": "",
"created": 1650658367043,
"modified": 1668211006686,
"modifiedBy": "",
"owner": "",
"isDeleted": false,
"isHidden": false,
"tags": [],
"type": "USER_DEFINED",
"isExternal": false,
"isDeprecated": false
},
"complete": true,
"incompleteDetail": [],
"sourceColumns": [
""
],
"targetColumns": [
""
]
}
]
}
]
}
},
{
"position": 2,
"header": {
"id": "",
"indexVersion": 35643,
"generationNum": 35643,
"name": "Job",
"author": "",
"created": 1620247188226,
"modified": 1668544812673,
"modifiedBy": "",
"owner": "",
"isDeleted": false,
"isHidden": false,
"tags": [],
"isExternal": false,
"isDeprecated": false
},
"complete": true,
"incompleteDetail": [],
"isDerived": true,
"dataType": "VARCHAR",
"type": "ATTRIBUTE",
"sageOutputColumnId": "",
"defaultAggrType": "NONE",
"ownerName": "",
"ownerType": "WORKSHEET",
"entityCategory": "DEFAULT",
"spotiqPreference": "DEFAULT",
"isAdditive": false,
"indexType": "DEFAULT",
"indexPriority": 1,
"sources": [
{
"tableId": "",
"tableName": "",
"columnId": "",
"columnName": ""
}
],
"synonyms": [],
"injectedInlineValues": [],
"precision": -1,
"scale": 0,
"isPrimaryKey": false,
"isAttributionDimension": true,
"derivationExpr": {
"exprType": "LOGICAL_COLUMN_REFERENCE",
"logicalColumn": {
"header": {
"id": "",
"indexVersion": 35499,
"generationNum": 35499,
"name": "ROTATION_TRADE",
"author": "",
"created": 1630716505804,
"modified": 1668211006637,
"modifiedBy": "",
"owner": "",
"isDeleted": false,
"isHidden": false,
"schemaStripe": "",
"databaseStripe": "",
"tags": [],
"isExternal": false,
"isDeprecated": false
}
},
"joinPaths": [
{
"joins": [
{
"sourceTable": "",
"destinationTable": "",
"content": {
"relationships": [
{
"sourceColumn": "",
"destinationColumn": ""
}
],
"weight": 1
},
"joinType": "INNER",
"type": "USER_DEFINED",
"isOneToOneJoin": false,
"header": {
"id": "",
"indexVersion": 35499,
"generationNum": 35499,
"name": "",
"description": "Copy of user table relationship",
"author": "",
"created": 1650658367043,
"modified": 1668211006686,
"modifiedBy": "",
"owner": "",
"isDeleted": false,
"isHidden": false,
"tags": [],
"type": "USER_DEFINED",
"isExternal": false,
"isDeprecated": false
},
"complete": true,
"incompleteDetail": [],
"sourceColumns": [
""
],
"targetColumns": [
""
]
}
]
}
]
}
},
{
"position": 3,
"header": {
"id": "",
"indexVersion": 35643,
"generationNum": 35643,
"name": "Job Lenghth",
"author": "",
"created": 1620247188226,
"modified": 1668544812673,
"modifiedBy": "",
"owner": "",
"isDeleted": false,
"isHidden": false,
"tags": [],
"isExternal": false,
"isDeprecated": false
},
"complete": true,
"incompleteDetail": [],
"isDerived": true,
"dataType": "VARCHAR",
"type": "ATTRIBUTE",
`
`
with open('ex.json', 'r') as f:
for line in f:
if 'position' in line:
for line in f:
if ' name: ' in line:
print(line)
` I tried this python piece of code but it din't work. I'm not sure how to return just the immediate name after the position. There are multiple name instances in the file but I need just the one after position...
CodePudding user response:
import json
with open('ex.json', 'r') as f:
data = json.load(f)
Now you can access all json items just like you access any dictionary/object in python from data variable
CodePudding user response:
Your code may works, but you need to change the logic a little bit. Here is fast sketch of the solution:
prevWasPosition = False
with open('ex.json', 'r') as f:
for line in f:
if '"position":' in line:
prevWasPosition = True
continue
if prevWasPosition and '"name":' in line:
print(line)
prevWasPosition = False
Beware that this solution based on the assumption that json file is properly formatted. If it is not, you may get unexpected results. Stronger solution would be to use read file chunk by chunk and parse it as json, but it's beyond the scope of this answer.