I have an xml structure like this:
<?xml version="1.0" encoding="ISO-8859-1"?>
<Document>
<ExportData>
<Site name="name" f="">
<Kapta id1="id1">
<Infos>
<Info>
<EndPoint foo="value-name" />
</Info>
</Infos>
<Samples>
<Sample date="date" attribute1="5.44" attribute2="234" attribute3="8.45"/>
<Sample date="date" attribute1="7.45" attribute5="8.45"/>
</Samples>
</Kapta>
<Kapta id2="id2">
<Infos>
<Info>
<EndPoint foo="value-name" />
</Info>
</Infos>
<Samples>
<Sample date="date" attribute1="5.44" attribute2="234" attribute3="8.45"/>
<Sample date="date" attribute1="7.45" attribute5="8.45" attribute6="7.45" attribute7="8.45"/>
</Samples>
</Kapta>
</Site>
</ExportData>
The desired output is like this:
{"time":"date1","name":"id1_attribute1","value":5.44}
{"time":"date1","name":"id1_attribute2","value":234}
{"time":"date1","name":"id1_attribute3","value":8.45}
{"time":"date2","name":"id1_attribute4","value":7.45}
{"time":"date2","name":"id1_attribute5","value":8.45}
{"time":"date3","name":"id2_attribute1","value":5.44}
.
.
.
I get the files through (list and fetch ftp processor in NiFi but I'm not able to print my desired output.
I am trying to get my desired output through this code in this related question but Im not sure how to change it in order to get it right.
So the code is this one bellow:
import org.apache.nifi.flowfile.FlowFile;
import org.apache.commons.io.IOUtils
import org.apache.nifi.processor.io.InputStreamCallback
import org.apache.nifi.processor.io.StreamCallback
import java.nio.charset.StandardCharsets
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import groovy.xml.dom.DOMCategory
import groovy.json.JsonGenerator
def flowFile
try {
flowFile = session.get()
DocumentBuilderFactory dbFactory =
DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = null
session.read(flowFile, {inputStream ->
doc = dBuilder.parse(inputStream)
} as InputStreamCallback)
def root = doc.documentElement
def sb = new StringBuilder()
def jsonGenerator = new
JsonGenerator.Options().disableUnicodeEscaping().build()
// get a specific attribute
use(DOMCategory) {
root['ExportData']['Site']['*'].findAll { node ->
def data = new LinkedHashMap()
data.id = node['@id1']
sb.append(jsonGenerator.toJson(data))
sb.append('\n')
}
}
// get all attributes of Sample under Samples
use(DOMCategory) {
root['ExportData']['Site']['Kapta']['Samples']['*'].findAll {
node ->
def data = new LinkedHashMap()
data.NodeName = node.name()
def attributesMap = node.attributes()
for (int x = 0; x < attributesMap.getLength(); x ) {
data.AttrName = attributesMap.item(x).getNodeName();
data.AttrValue = attributesMap.item(x).getNodeValue();
sb.append(jsonGenerator.toJson(data))
sb.append('\n')
}
}
}
flowFile = session.write(flowFile, {inputStream, outputStream ->
outputStream.write(sb.toString().getBytes(StandardCharsets.UTF_8))
} as StreamCallback)
session.transfer(flowFile, REL_SUCCESS)
} catch (Exception e) {
log.error('',e)
session.transfer(flowFile, REL_FAILURE)
}
This code outputs a attribute id and then dynamically all sample attributes. I want to print as I have described above for each id, its sample attributes.
Thanks a lot for your time and effort!
CodePudding user response:
code for ExecuteGroovyScript
processor
import groovy.json.JsonBuilder
def ff = session.get()
if(!ff) return
ff.write{streamIn, streamOut->
def xml = new XmlParser().parse(streamIn)
def json = xml.ExportData.Site.Kapta.Samples.Sample.collectMany{sample->
def attr = sample.attributes()
def date = attr.remove('date')
//use regexp to find id attribute by prefix `id`
def id = sample.parent().parent().attributes().find{ k,v-> k =~ "^id.*" }.value
attr.collect{k,v->
[
time: date,
name: "${id}_${k}",
value: new BigDecimal(v),
]
}
}
streamOut.withWriter("UTF-8"){w-> new JsonBuilder(json).writeTo(w) }
}
ff."mime.type" = "application/json"
REL_SUCCESS<<ff
output:
[
{
"time": "date1",
"name": "id1_attribute1",
"value": 5.44
},
{
"time": "date1",
"name": "id1_attribute2",
"value": 234
},
{
"time": "date1",
"name": "id1_attribute3",
"value": 8.45
},
{
"time": "date2",
"name": "id1_attribute1",
"value": 7.45
},
{
"time": "date2",
"name": "id1_attribute5",
"value": 8.45
},
{
"time": "date3",
"name": "id2_attribute1",
"value": 5.44
},
{
"time": "date3",
"name": "id2_attribute2",
"value": 234
},
{
"time": "date3",
"name": "id2_attribute3",
"value": 8.45
},
{
"time": "date4",
"name": "id2_attribute1",
"value": 7.45
},
{
"time": "date4",
"name": "id2_attribute5",
"value": 8.45
},
{
"time": "date4",
"name": "id2_attribute6",
"value": 7.45
},
{
"time": "date4",
"name": "id2_attribute7",
"value": 8.45
}
]