Home > Back-end >  JOLT: Merge specific data from JSON array using id key and leave other arrays untouch
JOLT: Merge specific data from JSON array using id key and leave other arrays untouch

Time:11-17

I previously have this issue of merging data into another one to avoid duplicates and make a cleaner version of the JSON. I got a solution in here that worked like a charm for a while but after I got more information arrayed inside the JSON things got a little bit tricky.

I have this array:

{
  "clubhouse": [
    {
      "id": "01",
      "statusId": "ok",
      "stateid": "2",
      "nationalities": [
        {
          "nationalityid": "1"
        },
        {
          "nationalityid": "2"
        },
        {
          "nationalityid": "3"
        }
      ],
      "TypeId": "3",
      "investors": [
        {
          "investor": {
            "id": "1234",
            "gender": "01"
          },
          "inamount": "1500000",
          "ratio": "12"
        }
      ]
    },
    {
      "id": "01",
      "statusId": "ok",
      "stateid": "2",
"nationalities": [
        {
          "nationalityid": "1"
        },
        {
          "nationalityid": "2"
        },
        {
          "nationalityid": "3"
        }
      ],
      "TypeId": "3",
      "investors": [
        {
          "investor": {
            "id": "4321",
            "gender": "02"
          },
          "inamount": "1700000",
          "ratio": "12"
        }
      ]
    },
    {
      "id": "02",
      "statusId": "ok",
      "stateid": "2",
"nationalities": [
        {
          "nationalityid": "3"
        },
        {
          "nationalityid": "4"
        },
        {
          "nationalityid": "5"
        }
      ],

      "TypeId": "3",
      "investors": [
        {
          "investor": {
            "id": "1333",
            "gender": "01"
          },
          "inamount": "1500000",
          "ratio": "12"
        }
      ]
    },
    {
      "id": "03",
      "statusId": "ok",
      "stateid": "5",

"nationalities": [
        {
          "nationalityid": "3"
        },
        {
          "nationalityid": "4"
        },
        {
          "nationalityid": "5"
        }
      ],

      "TypeId": "3",
      "investors": [
        {
          "investor": {
            "id": "", 
            "gender": ""
          },
          "inamount": "",
          "ratio": ""
        }
      ]
    },
    {
      "id": "02",
      "statusId": "ok",
      "stateid": "2",
"nationalities": [
        {
          "nationalityid": "3"
        },
        {
          "nationalityid": "4"
        },
        {
          "nationalityid": "5"
        }
      ],
      "TypeId": "3",
      "investors": [
        {
          "investor": {
            "id": "1334",
            "gender": "02"
          },
          "inamount": "1900000",
          "ratio": "12"
        }
      ]
    }
  ]
}

I was using this JOLT but it doesnt work with the nationalities,since it loses the array they are in.

[
  {
   // group by "id" values to create separate objects 
    "operation": "shift",
    "spec": {
      "*": {
        "*": {
          "*": "@(1,id).&",
          "investors": {
            "*": {
              "*": {
                "@": "@(4,id).&3[&4].&" // &3 -> going 3 levels up to grab literal "investors", [&4] -> going 4 levels up the tree in order to reach the indexes of "clubhouse" array, & -> replicate the leaf node values for the current key-value pair
              }
            }
          }
        }
      }
    }
  },
  {
    // get rid of "null" values
    "operation": "modify-overwrite-beta",
    "spec": {
      "*": "=recursivelySquashNulls"
    }
  },
  {
    // pick only the first components from the repeated values populated within the arrays 
    "operation": "cardinality",
    "spec": {
      "*": {
        "*": "ONE",
        "investors": "MANY"
      }
    }
  },
  {
    // get rid of object labels
    "operation": "shift",
    "spec": {
      "*": ""
    }
  }
]

What I need to get is something like this:

{
  "clubhouse": [
    {
      "id": "01",
      "statusId": "ok",
      "stateid": "2",
            "nationalities": [
        {
          "nationalityid": "1"
        },
        {
          "nationalityid": "2"
        },
        {
          "nationalityid": "3"
        }
      ],
      "TypeId": "3",
      "investors": [
        {
          "investor": {
            "id": "1234",
            "gender": "01"
          },
          "inamount": "1500000",
          "ratio": "12"
        },
        {
          "investor": {
            "id": "4321",
            "gender": "02"
          },
          "inamount": "1700000",
          "ratio": "12"
        }
      ]
    },
    {
      "id": "02",
      "statusId": "ok",
      "stateid": "2",
      "nationalities": [
        {
          "nationalityid": "3"
        },
        {
          "nationalityid": "4"
        },
        {
          "nationalityid": "5"
        }
      ],
      "TypeId": "3",
      "investors": [
        {
          "investor": {
            "id": "1333",
            "gender": "01"
          },
          "inamount": "1500000",
          "ratio": "12"
        },
        {
          "investor": {
            "id": "1334",
            "gender": "02"
          },
          "inamount": "1900000",
          "ratio": "12"
        }
      ]
    },
    {
      "id": "03",
      "statusId": "ok",
      "stateid": "5",
      "nationalities": [
        {
          "nationalityid": "3"
        },
        {
          "nationalityid": "4"
        },
        {
          "nationalityid": "5"
        }
      ],
      "TypeId": "3",
      "investors": [
        {
          "investor": {
            "id": "",
            "gender": ""
          },
          "inamount": "",
          "ratio": ""
        }
      ]
    }
  ]
}

CodePudding user response:

You can rearrange the first shift transformation by adding a new object tagged "nationalities" which has one level reduced identifiers compared to the already existing object tagged "investors", and the existing cardinality transformation would already pick only the first array among repeated identical "nationalities" arrays if the remaining specs are kept as they are, such as the below one

[
  {
    "operation": "shift",
    "spec": {
      "*": {
        "*": {
          "*": "@(1,id).&",
          "nationalities": {
            "*": {
              "@": "@(3,id).&2[&3]"
            }
          },
          "investors": {
            "*": {
              "*": {
                "@": "@(4,id).&3[&4].&"
              }
            }
          }
        }
      }
    }
  },
  ...
]
  • Related