I have a JSON file that is received from a REST API. An example of the return is like this:
{
"d": {
"results": [
{
"__metadata": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')",
"type": "EmpEmployment"
},
"personIdExternal": "60000033",
"userId": "60000033",
"hiringNotCompleted": false,
"isECRecord": true,
"lastModifiedDateTime": "/Date(1642917586000 0000)/",
"endDate": "/Date(1675123200000)/",
"createdDateTime": "/Date(1641473919000 0000)/",
"createdOn": "/Date(1641473919000)/",
"originalStartDate": "/Date(1501545600000)/",
"customDate1": "/Date(1501545600000)/",
"customString17": null,
"customString18": null,
"customString19": null,
"assignmentClass": "ST",
"lastModifiedBy": "This Dude",
"okToRehire": true,
"customString4": null,
"customString3": "3",
"customString2": null,
"assignmentIdExternal": "60000033",
"customString16": null,
"lastModifiedOn": "/Date(1642917586000)/",
"customString1": null,
"createdBy": "This Dudette",
"seniorityDate": "/Date(1501545600000)/",
"startDate": "/Date(1659398400000)/",
"customString16Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString16Nav"
}
},
"customString1Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString1Nav"
}
},
"customString18Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString18Nav"
}
},
"customString3Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString3Nav"
}
},
"paymentInformationNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/paymentInformationNav"
}
},
"empJobRelationshipNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/empJobRelationshipNav"
}
},
"personNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/personNav"
}
},
"empWorkPermitNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/empWorkPermitNav"
}
},
"photoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/photoNav"
}
},
"compInfoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/compInfoNav"
}
},
"userNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/userNav"
}
},
"customString2Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString2Nav"
}
},
"customString19Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString19Nav"
}
},
"jobInfoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/jobInfoNav"
}
},
"wfRequestNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/wfRequestNav"
}
},
"costDistributionNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/costDistributionNav"
}
},
"empPayCompNonRecurringNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/empPayCompNonRecurringNav"
}
}
},
{
"__metadata": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')",
"type": "EmpEmployment"
},
"personIdExternal": "100003",
"userId": "100003",
"hiringNotCompleted": false,
"isECRecord": true,
"lastModifiedDateTime": "/Date(1638051713000 0000)/",
"endDate": null,
"createdDateTime": "/Date(1638051713000 0000)/",
"createdOn": "/Date(1638051713000)/",
"originalStartDate": "/Date(1635724800000)/",
"customDate1": null,
"customString17": null,
"customString18": null,
"customString19": null,
"assignmentClass": "ST",
"lastModifiedBy": "This Dudette",
"okToRehire": null,
"customString4": null,
"customString3": null,
"customString2": null,
"assignmentIdExternal": "100003",
"customString16": null,
"lastModifiedOn": "/Date(1638051713000)/",
"customString1": null,
"createdBy": "This Dude",
"seniorityDate": "/Date(1635724800000)/",
"startDate": "/Date(1635724800000)/",
"customString16Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString16Nav"
}
},
"customString1Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString1Nav"
}
},
"customString18Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString18Nav"
}
},
"customString3Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString3Nav"
}
},
"paymentInformationNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/paymentInformationNav"
}
},
"empJobRelationshipNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/empJobRelationshipNav"
}
},
"personNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/personNav"
}
},
"empWorkPermitNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/empWorkPermitNav"
}
},
"photoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/photoNav"
}
},
"compInfoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/compInfoNav"
}
},
"userNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/userNav"
}
},
"customString2Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString2Nav"
}
},
"customString19Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString19Nav"
}
},
"jobInfoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/jobInfoNav"
}
},
"wfRequestNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/wfRequestNav"
}
},
"costDistributionNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/costDistributionNav"
}
},
"empPayCompNonRecurringNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/empPayCompNonRecurringNav"
}
}
}
]
}
}
I'm at present just looking to pull the userId & startDate from the JSON. I've tried using the explode command as shown in this example.
Some help in being pointed in the right direction would be great please. Am I better just building a custom schema and trying to parse the JSON into that?
All I'm looking to do is return the result as per the image but each userId and startDate on on its own row due to them relating to each other.
CodePudding user response:
Explode results
to get them into row
df = spark.read.json("./sample.json", multiLine=True)
df2 = df.withColumn('d', explode(col('d.results')))
df2.select(df2.d.userId, df2.d.startDate).show(10,False)
-------- ---------------------
|d.userId|d.startDate |
-------- ---------------------
|60000033|/Date(1659398400000)/|
|100003 |/Date(1635724800000)/|
-------- ---------------------
You can add as many attributes as required e.g
df.select(explode(col('d.results'))).\
selectExpr("col.userId","col.startDate","col.lastModifiedBy").\
show(10,False)
-------- --------------------- --------------
|userId |startDate |lastModifiedBy|
-------- --------------------- --------------
|60000033|/Date(1659398400000)/|This Dude |
|100003 |/Date(1635724800000)/|This Dudette |
-------- --------------------- --------------