Home > database >  csv to xml properties using java
csv to xml properties using java

Time:11-17

I have a set of CSV data to convert to XML file using Java.

My issue is that I need to convert it in special format like this (XML properties format):

?xml version="1.0"?>
<root>
    <row col1="All" Col2="0" Col3="" Col4="0" Col5="0"></row>
    <row col1="All" Col2="935" Col3="231" Col4="0" Col5="30"></row>
    <row col1="None" Col2="1011" Col3="257" Col4="0" Col5="30"></row>
    .
    .
    ...
</root>

The function to convert my CSV dataset to XML is:

public void convertFile(String csvFileName, String xmlFileName,
          String delimiter) {

        try {
          Document newDoc = domBuilder.newDocument();
          // Root element
          Element rootElement = newDoc.createElement("root");
          newDoc.appendChild(rootElement);
          // Read csv file
          BufferedReader csvReader;
          csvReader = new BufferedReader(new FileReader(csvFileName));
          int fieldCount = 0;
          String[] csvFields = null;
          StringTokenizer stringTokenizer = null;

          String curLine = csvReader.readLine();
          if (curLine != null) {
            // how about other form of csv files?
            stringTokenizer = new StringTokenizer(curLine, delimiter);
            fieldCount = stringTokenizer.countTokens();
            if (fieldCount > 0) {
              csvFields = new String[fieldCount];
              int i = 0;
              while (stringTokenizer.hasMoreElements())
                csvFields[i  ] = String.valueOf(stringTokenizer.nextElement());
            }
          }

          while ((curLine = csvReader.readLine()) != null) {
            stringTokenizer = new StringTokenizer(curLine, delimiter);
            fieldCount = stringTokenizer.countTokens();
            if (fieldCount > 0) {
              Element rowElement = newDoc.createElement("row");
              int i = 0;
              while (stringTokenizer.hasMoreElements()) {
                try {
                  String curValue = String.valueOf(stringTokenizer.nextElement());
                  Element curElement = newDoc.createElement(csvFields[i  ]);
                  curElement.appendChild(newDoc.createTextNode(curValue));
                  rowElement.appendChild(curElement);
                } catch (Exception exp) {
                }
              }
              rootElement.appendChild(rowElement);

            }
          }
          csvReader.close();

          // Save the document to the disk file
          TransformerFactory tranFactory = TransformerFactory.newInstance();
          Transformer aTransformer = tranFactory.newTransformer();
          Source src = new DOMSource(newDoc);
          Result result = new StreamResult(new File(xmlFileName));
          aTransformer.transform(src, result);

          // Output to console for testing
          // Resultt result = new StreamResult(System.out);

        } catch (IOException exp) {
          System.err.println(exp.toString());
        } catch (Exception exp) {
          System.err.println(exp.toString());
        }

      }

But the the generated XML file was this and it is not the format that I'm looking for:

<dataset>
    <row>
        <Col1>All</Col1>
        <Col2>0</Col2>
        <Col3></Col3>
        <Col4>0</Col4>
        <Col5>0</Col5>
    </row>
    <row>
        <Col1>All</Col1>
        <Col2>935</Col2>
        <Col3>231</Col3>
        <Col4>0</Col4>
        <Col5>30</Col5>
    </row>
        <row>
        <Col1>None</Col1>
        <Col2>1011</Col2>
        <Col3>257</Col3>
        <Col4>0</Col4>
        <Col5>30</Col5>
    </row>
</dataset>

Could you please help me ?

CodePudding user response:

The easy way of parsing this is by converting the CSV to an object of the POJO class. Using Marshaller in java you can convert it to XML directly.

Yes, I agree your problem is you are getting results as elements instead of attributes. It can be easily configured in the POJO class as below.

@XmlAttribute(name = "") @XmlRootElement(name = "") @XmlElement(name = "")

Reference - https://howtodoinjava.com/jaxb/marshaller-example/

CodePudding user response:

I would prefer an OO approach too, as in - create a POJO and then process it to xml, but if you want a pure file to file approach, you could do something like this:

import java.io.File;
import javax.xml.transform.TransformerFactory;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import javax.xml.transform.stream.StreamResult;

import javax.xml.transform.Transformer;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.Source;
import javax.xml.transform.Result;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.stream.Stream;
import java.util.List;

import java.util.concurrent.atomic.AtomicInteger;

public class Csv2Xml {

    private String[] headers;

    public static void main(String[] args) {
        try {
            new Csv2Xml().convertFile(args[0], args[1], "\\s*,\\s*");
        } catch (Throwable t) {
            t.printStackTrace();
        }
    }

    public void convertFile(String... args) {
        try {
            Document newDoc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
            // Root element
            Element rootElement = newDoc.createElement("root");
            newDoc.appendChild(rootElement);
            List<String> lines = Files.readAllLines(Path.of(args[0]));
            headers = lines.get(0).split(args[2]);
            lines.stream().skip(1).map(line -> line.split(args[2])).map(val -> toRow(newDoc, val))
                    .forEach(row -> rootElement.appendChild(row));
            // Save the document to the disk file
            TransformerFactory tranFactory = TransformerFactory.newInstance();
            Transformer aTransformer = tranFactory.newTransformer();
            Source src = new DOMSource(newDoc);
            Result result = new StreamResult(new File(args[1]));
            aTransformer.transform(src, result);

        } catch (Throwable t) {
            t.printStackTrace();
        }
    }

    private Element toRow(Document newDoc, String[] s) {
        Element row = newDoc.createElement("row");
        AtomicInteger index = new AtomicInteger(0);
        Stream.of(s).forEach(val -> {
            String attName = headers[index.getAndIncrement()];
            attName = sanitizeAttributeName(attName);
            Attr att = newDoc.createAttribute(attName);
            att.setNodeValue(val);
            row.setAttributeNode(att);
        });

        return row;
    }

    protected String sanitizeAttributeName(String attName) {
        final String BAD_CHARS = "[<>&'\",#~@;^$£()\\[\\]=]";
        return attName.replaceAll(" ", "_").replaceAll(BAD_CHARS, "");
    }

}
  • Related