Home > Software design >  How to extract content from specific tag to tag in xml using java
How to extract content from specific tag to tag in xml using java

Time:01-20

I want to iterate through xml and trying to extract the specific content from the xml using java. Everywhere I am getting solution using xpath only but xpath works on tags basis but in my case I am not sure which tag will come. Tag name can vary dynamically. Below is the xml:

<Employees>  
    <Employee>
        <id>1</id>  
        <username>user1</username>
    </Employee>

    <Detail>
        <greet>Hi</greet>
    </Detail>

    <Employee>  
        <id>2</id>  
        <username>user2</username>
    </Employee>

    <Note>
        <date>2023-01-20</date>
        <heading>reminder</heading>
    </Note>

    <Detail>
        <greet>Hello</greet>
    </Detail>

    <Employee>
        <id>3</id>  
        <username>user3</username>
    </Employee>
</Employees> 

I want to extract tags after each Employee tag. I don't want to iterate through child tags of an Employee. For example I should get :

<Employee>
    <id>1</id>  
    <username>user1</username>
</Employee>

<Detail>
    <greet>Hi</greet>
</Detail>

and next I should get :

<Employee>  
    <id>2</id>  
    <username>user2</username>
</Employee>

<Note>
    <date>2023-01-20</date>
    <heading>reminder</heading>
</Note>

<Detail>
    <greet>Hello</greet>
</Detail>

and next I should get :

<Employee>
    <id>3</id>  
    <username>user3</username>
</Employee>

Can anyone please help me on this how can I implement like this.

CodePudding user response:

You can use XPath to extract all root node's child elements, then iterate over these elements and use Element.getTagName() to check for Employee elements.

Here is a complete example:

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;

public class ExtractEmployee {


  private static String xml = "<Employees>  \n"  
    "  <Employee>\n"  
    "    <id>1</id>  \n"  
    "    <username>user1</username>\n"  
    "  </Employee>\n"  
    "  <Detail>\n"  
    "    <greet>Hi</greet>\n"  
    "  </Detail>\n"  
    "  <Employee>  \n"  
    "    <id>2</id>  \n"  
    "    <username>user2</username>\n"  
    "  </Employee>\n"  
    "  <Note>\n"  
    "    <date>2023-01-20</date>\n"  
    "    <heading>reminder</heading>\n"  
    "  </Note>\n"  
    "  <Detail>\n"  
    "    <greet>Hello</greet>\n"  
    "  </Detail>\n"  
    "  <Employee>\n"  
    "    <id>3</id>  \n"  
    "    <username>user3</username>\n"  
    "  </Employee>\n"  
    "</Employees> \n";
  public static void main(String[] args) {
    try {
      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
      DocumentBuilder builder = factory.newDocumentBuilder();
      StringReader reader = new StringReader(xml);
      InputSource source = new InputSource(reader);
      Document document = builder.parse(source);
      XPath xpath = XPathFactory.newInstance().newXPath();
      NodeList list = (NodeList) xpath.evaluate("/Employees/*", document, XPathConstants.NODESET);

      List<List<Element>> result = new ArrayList<>();
      List<Element> current = null;

      for(int i = 0; i < list.getLength(); i  ) {
        Element el = (Element) list.item(i);
        if(el.getTagName().equals("Employee")) {
          if(current != null) {
            result.add(current);
          }
          current = new ArrayList<>();
          current.add(el);
        } else {
          if(current != null) {
            current.add(el);
          }
        }
      }
      if(current != null) {
        result.add(current);
      }

      int partNr = 1;
      for(List<Element> part: result) {
        System.out.println("--------------------------------------------------------------------------------");
        System.out.println(String.format("Part number: %d", partNr  ));
        for(Element el: part) {
          System.out.println(element2String(el));
        }
      }
    } catch (ParserConfigurationException e) {
      e.printStackTrace();
    } catch (SAXException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } catch (XPathExpressionException e) {
      e.printStackTrace();
    }
  }

  private static String element2String(Element element) {
    String xmlString = null;
    try {
      TransformerFactory tf = TransformerFactory.newInstance();
      Transformer transformer = tf.newTransformer();
      transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
      StringWriter writer = new StringWriter();
      transformer.transform(new DOMSource(element), new StreamResult(writer));
      xmlString = writer.getBuffer().toString();
    }
    catch (TransformerException e) {
      e.printStackTrace();
    }
    catch (Exception e) {
      e.printStackTrace();
    }
    return xmlString;
  }

}

Output is:

--------------------------------------------------------------------------------
Part number: 1
<Employee>
    <id>1</id>  
    <username>user1</username>
  </Employee>
<Detail>
    <greet>Hi</greet>
  </Detail>
--------------------------------------------------------------------------------
Part number: 2
<Employee>  
    <id>2</id>  
    <username>user2</username>
  </Employee>
<Note>
    <date>2023-01-20</date>
    <heading>reminder</heading>
  </Note>
<Detail>
    <greet>Hello</greet>
  </Detail>
--------------------------------------------------------------------------------
Part number: 3
<Employee>
    <id>3</id>  
    <username>user3</username>
  </Employee>
  • Related