I'm trying to read this local JSON file using pandas, but when I use the JSON NORMALIZE library to extract data from the JSON it throws me errors, I've tried many ways to do it but I don't know if it's a code problem or a JSON structure problem
I need the transportEvents part of this file to save and upload to bigquery project
[
{
"event": {
"eventPublishTime": 0,
"eventSubmissionTime": 0,
"correlationId": "string",
"eventName": "string",
"senderOrgName": "string",
"senderOrgTypes": [
"string"
],
"originatorId": "string",
"eventOccurrenceTime": 1589574600000,
"eventOccurrenceTime8601": "2020-05-15T15:30:00.000-05:00",
"fromOceanAggregator": "false",
"originatorName": "string",
"containerTransportId": "string",
"physicalId": "string",
"eventSubmissionGpsLocation": "string",
"action": "string",
"doc": {
"description": "string",
"contentUrl": "string",
"docId": "string",
"docVersion": 0,
"docType": "string",
"docReferences": [
{
"referenceName": "string",
"referenceValue": "string"
}
]
},
"destinationOrgName": "string",
"documentType": "string",
"vehicleId": "string",
"vehicleName": "string",
"voyageId": "string",
"transportationPhase": "Import",
"eventTransactionId": "string",
"shipmentRef": "string",
"carrierBookingNumber": "string",
"containerTransportRef": "string",
"firstPortOfEntryCountry": "AD",
"cargoRemainingOnboardCountries": [
"AD"
],
"transitImportCountries": [
"AD"
],
"transitExportCountries": [
"AD"
],
"transshipmentPorts": [
"string"
],
"transshipmentPortDescriptions": [
"string"
],
"entryPort": "string",
"exitPort": "string",
"terminal": "string",
"terminalDescription": "string",
"consignmentId": "string",
"consignmentRef": "string",
"transportEquipmentId": "string",
"transportEquipmentRef": "string",
"equipmentNumber": "string",
"equipmentType": "string",
"transportPlanIssuanceTime8601": "2020-05-15T15:30:00.000-05:00",
"transportPlanIssuanceTime": 1589574600000,
"transportPlanSequenceNumber": 0,
"transportPlanTotalNumber": 0,
"subscriptionId": "string",
"documentId": "string",
"documentVersion": 0,
"dangerousGoodsType": "string",
"packingListNumber": "string",
"billOfLadingNumber": "string",
"consigneeId": "string",
"consigneeIdType": "string",
"shipperId": "string",
"shipperIdType": "string",
"operatorCode": "string",
"operatorName": "string",
"sealType": "Carrier",
"sealNumber": "string",
"seals": [
{
"sealNumber": "string",
"sealType": "Carrier"
}
],
"reasonForSurvey": "string",
"verifiedGrossMass": "string",
"netWeight": "string",
"certificateType": "string",
"waypointId": "string",
"newSlotNumber": "string",
"fullStatus": "Full",
"equipmentNumbers": [
"string"
],
"reasonCode": "string",
"cutOffType": "Cargo",
"cutOffDateTime8601": "2020-05-15T15:30:00.000-05:00",
"cutOffDateTime": 1589574600000,
"cutOffEventType": "string",
"harmonizedCodes": [
"string"
],
"eventType": "string",
"associatedConsignmentIds": [
"string"
],
"associatedCarrierBookingNumbers": [
"string"
],
"associatedBillOfLadingNumbers": [
"string"
],
"eBLProvider": "string",
"consignmentsToAccess": [
{
"transactionRef": {
"reference": "string",
"type": "string"
},
"carrierIdentifier": "string",
"consignmentId": "string",
"carrierBookingNumber": "string",
"billOfLadingNumber": "string",
"equipmentNumbers": [
"string"
]
}
],
"data": [
{
"type": "Barge",
"value": "string"
}
],
"document": {
"description": "string",
"uri": "string",
"hash": "string",
"hashEncodingType": "string",
"platformDocumentId": "string",
"platformDocumentVersion": 0
},
"documents": [
{
"description": "string",
"uri": "string",
"hash": "string",
"hashEncodingType": "string",
"platformDocumentId": "string",
"platformDocumentVersion": 0
}
],
"location": {
"unlocode": "string",
"address": {
"address1": "string",
"address2": "string",
"city": "string",
"stateProvince": "string",
"zipPostal": "string",
"country": "US"
},
"smdgTerminal": "string",
"geoCoord": {
"latitude": 0,
"longitude": 0
},
"splc": "string",
"gln": "string",
"type": "UN/Locode",
"value": "string",
"description": "string",
"locationKey": "string"
},
"bookingData": {
"originLocation": {
"unlocode": "string",
"address": {
"address1": "string",
"address2": "string",
"city": "string",
"stateProvince": "string",
"zipPostal": "string",
"country": "US"
},
"smdgTerminal": "string",
"geoCoord": {
"latitude": 0,
"longitude": 0
},
"splc": "string",
"gln": "string",
"type": "UN/Locode",
"value": "string",
"description": "string",
"locationKey": "string"
},
"destinationLocation": {
"unlocode": "string",
"address": {
"address1": "string",
"address2": "string",
"city": "string",
"stateProvince": "string",
"zipPostal": "string",
"country": "US"
},
"smdgTerminal": "string",
"geoCoord": {
"latitude": 0,
"longitude": 0
},
"splc": "string",
"gln": "string",
"type": "UN/Locode",
"value": "string",
"description": "string",
"locationKey": "string"
},
"contractCarriageCondition": "SD_SD",
"departureTime8601": "2020-05-15T15:30:00.000-05:00",
"departureDateEpoch": 1589574600000,
"vehicleId": "string",
"vehicleName": "string",
"voyageId": "string",
"transportEquipmentDetails": [
{
"equipmentType": "string",
"equipmentQuantity": 0,
"description": "string"
}
],
"commodities": [
{
"itemNumber": 0,
"commodityDescription": "string",
"commodityHarmonizedCode": "string",
"commodityQuantity": 0,
"commodityWeight": "string"
}
]
},
"subcontractId": "string",
"subcontractCBN": "string",
"subcontractBOL": "string",
"messages": [
"string"
],
"consignmentStatus": "Active",
"transferFromConsignment": {
"carrierBookingNumber": "string",
"billOfLadingNumber": "string",
"consignmentId": "string"
},
"partLoadWithConsignment": {
"carrierBookingNumber": "string",
"billOfLadingNumber": "string",
"consignmentId": "string"
},
"transferFromEquipment": "string",
"splitFromConsignment": {
"carrierBookingNumber": "string",
"billOfLadingNumber": "string",
"consignmentId": "string"
},
"combinedCarrierBookingNumbers": [
"string"
],
"partBill": "false",
"partBillOf": {
"carrierBookingNumber": "string",
"billOfLadingNumber": "string",
"consignmentId": "string"
},
"shipmentIdentifier": {
"reference": "string",
"type": "string",
"label": "string"
},
"plannedCompletionDate": 1589574600000,
"plannedCompletionDate8601": "2020-05-15T15:30:00.000-05:00",
"plannedDeliveryDate": 1589574600000,
"plannedDeliveryDate8601": "2020-05-15T15:30:00.000-05:00",
"consignmentIdentifiers": [
{
"consignmentId": "string",
"carrierBookingNumber": "string",
"billOfLadingNumber": "string"
}
],
"references": [
{
"reference": "string",
"type": "string",
"label": "string"
}
],
"goodsDescription": "string",
"issuanceTime8601": "2020-05-15T15:30:00.000-05:00",
"callersPartyRole": "SELLER",
"parties": [
{
"partyRef": "string",
"partyRole": "BUYER"
}
],
"consignments": [
"string"
],
"transportEquipment": [
"string"
],
"shipments": [
"string"
],
"transportEquipmentDetails": [
{
"equipmentType": "string",
"equipmentQuantity": 0
}
],
"equipmentPickUpComments": "string",
"minimumTemperature": 0,
"maximumTemperature": 0,
"unit": "degreeCelsius",
"temperatureInstructions": "string",
"eventClassifier": "Planned",
"transportMode": "Vessel",
"eventAction": "Arrival",
"facilityType": "PortTerminal",
"emptyIndicator": "Empty",
"eventPriority": 0,
"releaseParty": {
"partyRef": "string",
"orgName": "string",
"address": {
"address1": "string",
"address2": "string",
"city": "string",
"stateProvince": "string",
"zipPostal": "string",
"country": "US"
}
},
"controlBy": "CUSTOMS",
"controlEventAction": "CONTROL_START",
"nationalControlCode": "string",
"procedure": "IMPORTATION",
"conditionalRelease": "string",
"releaseObject": "FULL_CONTAINER",
"releasedBy": "CUSTOMS",
"declarationRef": "string",
"nationalProcedureDetails": {
"procedureCode": "string",
"procedureDescription": "string",
"procedureType": "string",
"procedureStatus": "string",
"procedureNumber": "string",
"createTime8601": "2018-03-10T11:30:00.000-05:00",
"closeTime8601": "2018-03-10T11:30:00.000-05:00",
"carrierIdentification": "string",
"procedureLocation": "string"
},
"transportEvents": [
{
"eventAction": "Arrival",
"transportMode": "Rail",
"eventOccurrenceTime8601": "2018-03-13T11:30:00.000-05:00",
"location": {
"unlocode": "NLRTM"
},
"transportPlanSequenceNumber": 1,
"transportationPhase": "Import",
"vehicleId": "JEV4568",
"vehicleName": "Vehicle Name",
"voyageId": "1234",
"emptyIndicator": "Laden"
}
]
}
}
]
and this is the python code I am using
import pandas as pd
import json
from pandas.io.json import json_normalize
ruta_archivo_json = 'response_1660310720193.json'
data = pd.read_json(ruta_archivo_json)
df = pd.json_normalize(data, 'transportEvents')
print(df)
df.head(10)
CodePudding user response:
I am not sure what is the expected result, but from what I understood, the below code should help:
import pandas as pd
import json
from pandas.io.json import json_normalize
with open('response_1660310720193.json','r') as f:
data = json.loads(f.read())
ruta_archivo_json = 'response_1660310720193.json'
df = pd.json_normalize(data, ["event","transportEvents"])
print(df)
df.head(10)
CodePudding user response:
I'm not sure what you would like to achieve in the final outcome. But if you run below code you should get something in return
import pandas as pd
import json
# from pandas.io.json import json_normalize #no need this
pd.options.display.max_colwidth = 100 #default=50
pd.options.display.max_rows = 250 #default=60
data = pd.read_json('response_1660310720193.json')
df = pd.json_normalize(data['event']).T
print(df)
Output:
eventPublishTime 0
eventSubmissionTime 0
correlationId string
eventName string
senderOrgName string
senderOrgTypes [string]
originatorId string
eventOccurrenceTime 1589574600000
eventOccurrenceTime8601 2020-05-15T15:30:00.000-05:00
fromOceanAggregator false
originatorName string
<truncated>
declarationRef string
transportEvents [{'eventAction': 'Arrival', 'transpo...
doc.description string
<truncated>
You should see 'transportEvents' in the df
, so if you drill deeper you can get the data
df1 = pd.json_normalize(df.T['transportEvents'].explode().tolist()).T
df1.columns = ['transportEvents']
print(df1)
transportEvents
eventAction Arrival
transportMode Rail
eventOccurrenceTime8601 2018-03-13T11:30:00.000-05:00
transportPlanSequenceNumber 1
transportationPhase Import
vehicleId JEV4568
vehicleName Vehicle Name
voyageId 1234
emptyIndicator Laden
location.unlocode NLRTM