Home > Enterprise >  How to convert a complex nested JSON file to CSV in Python?
How to convert a complex nested JSON file to CSV in Python?

Time:11-09

I'm trying to convert a complex JSON file to CSV using Python.



{
"commits": [
{
    "repository": "https://code.google.com/p/closure-compiler/",
    "sha1": "1f5edbcd2b5b09ec59151137e643d9ce75ef1055",
    "url": "https://code.google.com/p/closure-compiler/1f5edbcd2b5b09ec59151137e643d9ce75ef1055",
    "refactorings": [{
    "type": "Add Method Annotation",
    "description": "Add Method Annotation @Override in method public getPreciserScopeKnowingConditionOutcome(condition Node, blindScope FlowScope, outcome boolean) : FlowScope from class com.google.javascript.jscomp.ClosureReverseAbstractInterpreter",
    "leftSideLocations": [{
        "filePath": "src/com/google/javascript/jscomp/ClosureReverseAbstractInterpreter.java",
        "startLine": 191,
        "endLine": 215,
        "startColumn": 3,
        "endColumn": 4,
        "codeElementType": "METHOD_DECLARATION",
        "description": "original method declaration",
        "codeElement": "public getPreciserScopeKnowingConditionOutcome(condition Node, blindScope FlowScope, outcome boolean) : FlowScope"
}],
    "rightSideLocations": [{
        "filePath": "src/com/google/javascript/jscomp/ClosureReverseAbstractInterpreter.java",
        "startLine": 200,
        "endLine": 200,
        "startColumn": 3,
        "endColumn": 12,
        "codeElementType": "ANNOTATION",
        "description": "added annotation",
        "codeElement": "@Override"
}, {
        "filePath": "src/com/google/javascript/jscomp/ClosureReverseAbstractInterpreter.java",
        "startLine": 200,
        "endLine": 223,
        "startColumn": 3,
        "endColumn": 4,
        "codeElementType": "METHOD_DECLARATION",
        "description": "method declaration with added annotation",
        "codeElement": "public getPreciserScopeKnowingConditionOutcome(condition Node, blindScope FlowScope, outcome boolean) : FlowScope"
}]
},
{
    "type": "Extract And Move Method",
    "description": "Extract And Move Method private getNativeTypeForTypeOf(value String) : JSType extracted from protected caseTopType(topType JSType) : JSType in class com.google.javascript.jscomp.ChainableReverseAbstractInterpreter.RestrictByOneTypeOfResultVisitor & moved to class com.google.javascript.jscomp.ChainableReverseAbstractInterpreter",
    "leftSideLocations": [{
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 485,
        "endLine": 501,
        "startColumn": 5,
        "endColumn": 6,
        "codeElementType": "METHOD_DECLARATION",
        "description": "source method declaration before extraction",
        "codeElement": "protected caseTopType(topType JSType) : JSType"
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 489,
        "endLine": 489,
        "startColumn": 11,
        "endColumn": 45,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 491,
        "endLine": 491,
        "startColumn": 11,
        "endColumn": 46,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 493,
        "endLine": 493,
        "startColumn": 11,
        "endColumn": 45,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 495,
        "endLine": 495,
        "startColumn": 11,
        "endColumn": 43,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 497,
        "endLine": 497,
        "startColumn": 11,
        "endColumn": 54,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 500,
        "endLine": 500,
        "startColumn": 7,
        "endColumn": 22,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 496,
        "endLine": 498,
        "startColumn": 16,
        "endColumn": 10,
        "codeElementType": "IF_STATEMENT",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 494,
        "endLine": 498,
        "startColumn": 16,
        "endColumn": 10,
        "codeElementType": "IF_STATEMENT",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 492,
        "endLine": 498,
        "startColumn": 16,
        "endColumn": 10,
        "codeElementType": "IF_STATEMENT",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 490,
        "endLine": 498,
        "startColumn": 16,
        "endColumn": 10,
        "codeElementType": "IF_STATEMENT",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 488,
        "endLine": 498,
        "startColumn": 9,
        "endColumn": 10,
        "codeElementType": "IF_STATEMENT",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 494,
        "endLine": 496,
        "startColumn": 47,
        "endColumn": 10,
        "codeElementType": "BLOCK",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 492,
        "endLine": 494,
        "startColumn": 44,
        "endColumn": 10,
        "codeElementType": "BLOCK",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 490,
        "endLine": 492,
        "startColumn": 45,
        "endColumn": 10,
        "codeElementType": "BLOCK",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 488,
        "endLine": 490,
        "startColumn": 37,
        "endColumn": 10,
        "codeElementType": "BLOCK",
        "description": "extracted code from source method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 496,
        "endLine": 498,
        "startColumn": 46,
        "endColumn": 10,
        "codeElementType": "BLOCK",
        "description": "extracted code from source method declaration",
        "codeElement": null
}],
    "rightSideLocations": [{
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 595,
        "endLine": 618,
        "startColumn": 3,
        "endColumn": 4,
        "codeElementType": "METHOD_DECLARATION",
        "description": "extracted method declaration",
        "codeElement": "private getNativeTypeForTypeOf(value String) : JSType"
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 606,
        "endLine": 606,
        "startColumn": 7,
        "endColumn": 41,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 608,
        "endLine": 608,
        "startColumn": 7,
        "endColumn": 42,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 610,
        "endLine": 610,
        "startColumn": 7,
        "endColumn": 41,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 612,
        "endLine": 612,
        "startColumn": 7,
        "endColumn": 39,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 614,
        "endLine": 614,
        "startColumn": 7,
        "endColumn": 50,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 616,
        "endLine": 616,
        "startColumn": 7,
        "endColumn": 19,
        "codeElementType": "RETURN_STATEMENT",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 613,
        "endLine": 617,
        "startColumn": 12,
        "endColumn": 6,
        "codeElementType": "IF_STATEMENT",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 611,
        "endLine": 617,
        "startColumn": 12,
        "endColumn": 6,
        "codeElementType": "IF_STATEMENT",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 609,
        "endLine": 617,
        "startColumn": 12,
        "endColumn": 6,
        "codeElementType": "IF_STATEMENT",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 607,
        "endLine": 617,
        "startColumn": 12,
        "endColumn": 6,
        "codeElementType": "IF_STATEMENT",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 605,
        "endLine": 617,
        "startColumn": 5,
        "endColumn": 6,
        "codeElementType": "IF_STATEMENT",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 611,
        "endLine": 613,
        "startColumn": 43,
        "endColumn": 6,
        "codeElementType": "BLOCK",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 609,
        "endLine": 611,
        "startColumn": 40,
        "endColumn": 6,
        "codeElementType": "BLOCK",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 607,
        "endLine": 609,
        "startColumn": 41,
        "endColumn": 6,
        "codeElementType": "BLOCK",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 605,
        "endLine": 607,
        "startColumn": 33,
        "endColumn": 6,
        "codeElementType": "BLOCK",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 613,
        "endLine": 615,
        "startColumn": 42,
        "endColumn": 6,
        "codeElementType": "BLOCK",
        "description": "extracted code to extracted method declaration",
        "codeElement": null
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 486,
        "endLine": 496,
        "startColumn": 5,
        "endColumn": 6,
        "codeElementType": "METHOD_DECLARATION",
        "description": "source method declaration after extraction",
        "codeElement": "protected caseTopType(topType JSType) : JSType"
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 490,
        "endLine": 490,
        "startColumn": 29,
        "endColumn": 58,
        "codeElementType": "METHOD_INVOCATION",
        "description": "extracted method invocation",
        "codeElement": "getNativeTypeForTypeOf(value)"
}, {
        "filePath": "src/com/google/javascript/jscomp/ChainableReverseAbstractInterpreter.java",
        "startLine": 615,
        "endLine": 617,
        "startColumn": 12,
        "endColumn": 6,
        "codeElementType": "BLOCK",
        "description": "added statement in extracted method declaration",
        "codeElement": null
}]
}
]
}]
}

I tried different approaches. However, any of them function perfectly. In particular, I used the solution reported in this post: How to convert a nested JSON file into a Pandas dataframe?

But this solution partially works only if manually remove the first line (the line commits)

{
"commits":
.
.
}

However, even removing this line; the generated CSV does not include all the fields, but only a subset. I also tried using the Json_normalize function, but in this case the generated CSV file contained more than one field in a single field.

How can I fix it?

CodePudding user response:

Using json_normalize()

left_df = pd.json_normalize(
    data=data["commits"],
    meta=["repository", "sha1", "url", ["refactorings", "type"], ["refactorings", "description"]],
    record_path=["refactorings", "leftSideLocations"]
)
left_df.columns = left_df.columns.str.split(".").str[-1]
left_df.insert(loc=0, column="Location", value="left_side")

right_df = pd.json_normalize(
    data=data["commits"],
    meta=["repository", "sha1", "url", ["refactorings", "type"], ["refactorings", "description"]],
    record_path=["refactorings", "rightSideLocations"]
)
right_df.columns = right_df.columns.str.split(".").str[-1]
right_df.insert(loc=0, column="Location", value="right_side")

pd.concat([left_df, right_df]).reset_index(drop=True).to_csv("/path/to/file/un-nested.csv")
  • Related