Groovy script (NiFi) - xml select dynamically attributes of child node-CodePudding

I have an xml structure like this:

 <?xml version="1.0" encoding="ISO-8859-1"?>
 <Document>
 <ExportData>
    <Site name="name" f="">
        <Kapta id1="id1">
            <Infos>
                <Info>
                    <EndPoint foo="value-name" />
                </Info>
            </Infos>
            <Samples>
                <Sample date="date" attribute1="5.44" attribute2="234" attribute3="8.45"/>
                <Sample date="date" attribute1="7.45" attribute5="8.45"/>
            </Samples>
        </Kapta>
        <Kapta id2="id2">
            <Infos>
                <Info>
                    <EndPoint foo="value-name" />
                </Info>
            </Infos>
            <Samples>
                <Sample date="date" attribute1="5.44" attribute2="234" attribute3="8.45"/>
                <Sample date="date" attribute1="7.45" attribute5="8.45" attribute6="7.45" attribute7="8.45"/>
            </Samples>
        </Kapta>
    </Site>
 </ExportData>

The desired output is like this:

 {"time":"date1","name":"id1_attribute1","value":5.44}
 {"time":"date1","name":"id1_attribute2","value":234}
 {"time":"date1","name":"id1_attribute3","value":8.45}
 {"time":"date2","name":"id1_attribute4","value":7.45}
 {"time":"date2","name":"id1_attribute5","value":8.45}
 {"time":"date3","name":"id2_attribute1","value":5.44}
 .
 .
 .

I get the files through (list and fetch ftp processor in NiFi but I'm not able to print my desired output.

I am trying to get my desired output through this code in this related question but Im not sure how to change it in order to get it right.

So the code is this one bellow:

import org.apache.nifi.flowfile.FlowFile;
import org.apache.commons.io.IOUtils
import org.apache.nifi.processor.io.InputStreamCallback
import org.apache.nifi.processor.io.StreamCallback
import java.nio.charset.StandardCharsets
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import groovy.xml.dom.DOMCategory
import groovy.json.JsonGenerator

def flowFile

try {

  flowFile = session.get()

  DocumentBuilderFactory dbFactory = 
DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = null

session.read(flowFile, {inputStream ->
    doc =  dBuilder.parse(inputStream)
} as InputStreamCallback)

def root = doc.documentElement
def sb = new StringBuilder()
def jsonGenerator = new 
 JsonGenerator.Options().disableUnicodeEscaping().build()

// get a specific attribute
use(DOMCategory) {
    root['ExportData']['Site']['*'].findAll { node ->
        def data = new LinkedHashMap()
        data.id = node['@id1']
        sb.append(jsonGenerator.toJson(data))
        sb.append('\n')
    }   
 }

  // get all attributes of Sample under Samples
  use(DOMCategory) {
    root['ExportData']['Site']['Kapta']['Samples']['*'].findAll { 
  node ->
        def data = new LinkedHashMap()
        data.NodeName = node.name()
        def attributesMap = node.attributes()
        for (int x = 0; x < attributesMap.getLength(); x  ) {
            data.AttrName = attributesMap.item(x).getNodeName();
            data.AttrValue = attributesMap.item(x).getNodeValue();
            sb.append(jsonGenerator.toJson(data))
            sb.append('\n')
        }
                
   }
 }   

 flowFile = session.write(flowFile, {inputStream, outputStream ->
    
 outputStream.write(sb.toString().getBytes(StandardCharsets.UTF_8))
 } as StreamCallback)

 session.transfer(flowFile, REL_SUCCESS)

 } catch (Exception e) {
   log.error('',e)
   session.transfer(flowFile, REL_FAILURE)
 }

This code outputs a attribute id and then dynamically all sample attributes. I want to print as I have described above for each id, its sample attributes.

Thanks a lot for your time and effort!

CodePudding user response：

code for ExecuteGroovyScript processor

import groovy.json.JsonBuilder

def ff = session.get()
if(!ff) return

ff.write{streamIn, streamOut->
    def xml = new XmlParser().parse(streamIn)
    def json = xml.ExportData.Site.Kapta.Samples.Sample.collectMany{sample->
        def attr = sample.attributes()
        def date = attr.remove('date')
        //use regexp to find id attribute by prefix `id`
        def id = sample.parent().parent().attributes().find{ k,v-> k =~ "^id.*" }.value
        attr.collect{k,v->
            [
                time: date,
                name: "${id}_${k}",
                value: new BigDecimal(v),
            ]
        }
    }
    streamOut.withWriter("UTF-8"){w-> new JsonBuilder(json).writeTo(w) }
}
ff."mime.type" = "application/json"
REL_SUCCESS<<ff

output:

[
    {
        "time": "date1",
        "name": "id1_attribute1",
        "value": 5.44
    },
    {
        "time": "date1",
        "name": "id1_attribute2",
        "value": 234
    },
    {
        "time": "date1",
        "name": "id1_attribute3",
        "value": 8.45
    },
    {
        "time": "date2",
        "name": "id1_attribute1",
        "value": 7.45
    },
    {
        "time": "date2",
        "name": "id1_attribute5",
        "value": 8.45
    },
    {
        "time": "date3",
        "name": "id2_attribute1",
        "value": 5.44
    },
    {
        "time": "date3",
        "name": "id2_attribute2",
        "value": 234
    },
    {
        "time": "date3",
        "name": "id2_attribute3",
        "value": 8.45
    },
    {
        "time": "date4",
        "name": "id2_attribute1",
        "value": 7.45
    },
    {
        "time": "date4",
        "name": "id2_attribute5",
        "value": 8.45
    },
    {
        "time": "date4",
        "name": "id2_attribute6",
        "value": 7.45
    },
    {
        "time": "date4",
        "name": "id2_attribute7",
        "value": 8.45
    }
]