Home > Back-end >  Jolt Transform to "tabular format"
Jolt Transform to "tabular format"

Time:09-02

I'm using Nifi to pull in some Meteomatics API data and store it in our SQL Datawarehouse.

Nifi architecture is mostly set-up, but I would like to convert the JSON response of the API call to row/column format. I get the feeling this should be possible with a single shift operation, but I'm struggling to get multiple levels of data into my output.

See sample input (single set of coordinates, and same three parameters can be assumed):

{
  "version": "3.0",
  "user": "USERNAME",
  "dateGenerated": "2022-09-01T07:52:43Z",
  "status": "OK",
  "data": [
    {
      "parameter": "precip_24h:mm",
      "coordinates": [
        {
          "lat": 123,
          "lon": 456,
          "dates": [
            {
              "date": "2022-09-01T00:00:00Z",
              "value": 11.6
            },
            {
              "date": "2022-09-02T00:00:00Z",
              "value": 4.49
            },
            {
              "date": "2022-09-03T00:00:00Z",
              "value": 7.79
            },
            {
              "date": "2022-09-04T00:00:00Z",
              "value": 6.6
            },
            {
              "date": "2022-09-05T00:00:00Z",
              "value": 12.7
            },
            {
              "date": "2022-09-06T00:00:00Z",
              "value": 2.01
            }
          ]
        }
      ]
    },
    {
      "parameter": "heavy_rain_warning_24h:idx",
      "coordinates": [
        {
          "lat": 123,
          "lon": 456,
          "dates": [
            {
              "date": "2022-09-01T00:00:00Z",
              "value": 0
            },
            {
              "date": "2022-09-02T00:00:00Z",
              "value": 0
            },
            {
              "date": "2022-09-03T00:00:00Z",
              "value": 0
            },
            {
              "date": "2022-09-04T00:00:00Z",
              "value": 0
            },
            {
              "date": "2022-09-05T00:00:00Z",
              "value": 0
            },
            {
              "date": "2022-09-06T00:00:00Z",
              "value": 0
            }
          ]
        }
      ]
    },
    {
      "parameter": "t_0m:C",
      "coordinates": [
        {
          "lat": 123,
          "lon": 456,
          "dates": [
            {
              "date": "2022-09-01T00:00:00Z",
              "value": 27.2
            },
            {
              "date": "2022-09-02T00:00:00Z",
              "value": 27.3
            },
            {
              "date": "2022-09-03T00:00:00Z",
              "value": 27.3
            },
            {
              "date": "2022-09-04T00:00:00Z",
              "value": 27.3
            },
            {
              "date": "2022-09-05T00:00:00Z",
              "value": 27.3
            },
            {
              "date": "2022-09-06T00:00:00Z",
              "value": 28.2
            }
          ]
        }
      ]
    }
  ]
}

Desired Output:

[
  {
    "date": "2022-09-01T00:00:00Z",
    "value": 11.6,
    "parameter": "precip_24h:mm",
    "lat": "123",
    "lon": "456",
    "dateGenerated": "2022-09-01T07:52:43Z"
  },
  {
    "date": "2022-09-02T00:00:00Z",
    "value": 4.49,
    "parameter": "precip_24h:mm",
    "lat": "123",
    "lon": "456",
    "dateGenerated": "2022-09-01T07:52:43Z"
  },
  {
    "date": "2022-09-03T00:00:00Z",
    "value": 7.79,
    "parameter": "precip_24h:mm",
    "lat": "123",
    "lon": "456",
    "dateGenerated": "2022-09-01T07:52:43Z"
  },
  {
    "date": "2022-09-04T00:00:00Z",
    "value": 6.6,
    "parameter": "precip_24h:mm",
    "lat": "123",
    "lon": "456",
    "dateGenerated": "2022-09-01T07:52:43Z"
  },
  {
    "date": "2022-09-05T00:00:00Z",
    "value": 12.7,
    "parameter": "precip_24h:mm",
    "lat": "123",
    "lon": "456",
    "dateGenerated": "2022-09-01T07:52:43Z"
  },
  {
    "date": "2022-09-06T00:00:00Z",
    "value": 2.01,
    "parameter": "precip_24h:mm",
    "lat": "123",
    "lon": "456",
    "dateGenerated": "2022-09-01T07:52:43Z"
  },
  {
    "date": "2022-09-01T00:00:00Z",
    "value": 0,
    "parameter": "heavy_rain_warning_24h:idx",
    "lat": "123",
    "lon": "456",
    "dateGenerated": "2022-09-01T07:52:43Z"
  },
  {
    "date": "2022-09-02T00:00:00Z",
    "value": 0,
    "parameter": "heavy_rain_warning_24h:idx",
    "lat": "123",
    "lon": "456",
    "dateGenerated": "2022-09-01T07:52:43Z"
  },
  {
    "date": "2022-09-03T00:00:00Z",
    "value": 0,
    "parameter": "heavy_rain_warning_24h:idx",
    "lat": "123",
    "lon": "456",
    "dateGenerated": "2022-09-01T07:52:43Z"  
  etc...

This way I can put in a simple avro converter and push directly to a SQL table with columns "date,value,parameter,lat,lon,dateGenerated".

Currently, I've gotten to this response:

[
  {
    "date": "2022-09-01T00:00:00Z",
    "value": 11.6
  },
  {
    "date": "2022-09-02T00:00:00Z",
    "value": 4.49
  },
  {
    "date": "2022-09-03T00:00:00Z",
    "value": 7.79
  }
 etc...

Using the following spec (loop through at the lowest level, return the data and put in a big array):

[
  {
    "operation": "shift",
    "spec": {
      "data": {
        "*": {
          "coordinates": {
            "*": {
              "dates": {
                "*": {
                  "@": "[]"
                }
              }
            }
          }
        }
      }
    }
  }
]

Including dateGenerated/other fields from further up the tree is giving me difficulty. I wanted to include something like this (traverse 5 levels up, get the DataGenerated Field and map it to DateGen?). This doesn't work, and I'm a little lost:

[
  {
    "operation": "shift",
    "spec": {
      "data": {
        "*": {
          "coordinates": {
            "*": {
              "dates": {
                "*": {
                  "@": "[]",
                  "@5.DateGenerated":"[].DateGen"
                }
              }
            }
          }
        }
      }
    }
  }
]

CodePudding user response:

You can use two succesive shift transformations such as

[
  {
   // distinguish each objects by some common values
    "operation": "shift",
    "spec": {
      "data": {
        "*": {
          "coo*": {
            "*": {
              "da*": {
                "*": {
                  "@": "@(5,parameter)[&1]", // separate the object by their presumed id values(parameter) after going five levels up the tree to reach its level
                  "@(2,lat)": "@(5,parameter)[&1].lat", // go two levels up the tree to reach the level of "lat" and "lon"
                  "@(2,lon)": "@(5,parameter)[&1].lon",
                  "@(4,parameter)": "@(5,parameter)[&1].parameter",
                  "@(6,dateGenerated)": "@(5,parameter)[&1].dateGenerated"
                }
              }
            }
          }
        }
      }
    }
  },
  {
   // get rid of labels and square brackets
    "operation": "shift",
    "spec": {
      "*": {
        "*": ""
      }
    }
  }
]
  • Related