Home > OS >  XSLT to covert XML file to csv with no duplicate records
XSLT to covert XML file to csv with no duplicate records

Time:02-04

We have a requirement to covert XML file to csv with total record count added in trailer and add some columns from xml file into one column. By using below XSLT I am able to achieve but input xml file has duplicate and same getting added to result csv file. Please refer below example for more detail.

Sample input xml file

<?xml version="1.0" encoding="UTF-8"?>
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
   <soap:Body>
      <ns1:getDocumentByKeyResponse xmlns:ns1="http://www.taleo.com/ws/integration/toolkit/2005/07">
         <Document xmlns="http://www.taleo.com/ws/integration/toolkit/2005/07">
   
            <Content>
               <ExportXML>
                  <record>
                     <field name="position_id">1110684</field>
                     <field name="status">Approved</field>
                     <field name="number_of_openings">14</field>
                     <field name="Internal_Description1">test</field>
                     <field name="Internal_Description2">test1</field>
                     <field name="Internal_Description3">test2</field>
                     <field name="Internal_Description4" />
                     <field name="Internal_Description5" />
                     <field name="Description1">test5</field>
                     <field name="Description2">test6</field>
                     <field name="Description3">test7</field>
                     <field name="Description4" />
                     <field name="Description5" />
                     <field name="Req_Creation_Date">2011-08-31 11:54:07</field>
                     <field name="Req_Last_Modified_Date">2022-09-02 13:44:07</field>
                  </record>
                  <record>
                     <field name="position_id">1110684</field>
                     <field name="status">Approved</field>
                     <field name="number_of_openings">14</field>
                     <field name="Internal_Description1">test</field>
                     <field name="Internal_Description2">test1</field>
                     <field name="Internal_Description3">test2</field>
                     <field name="Internal_Description4" />
                     <field name="Internal_Description5" />
                     <field name="Description1">test5</field>
                     <field name="Description2">test6</field>
                     <field name="Description3">test7</field>
                     <field name="Description4" />
                     <field name="Description5" />
                     <field name="Req_Creation_Date">2011-08-31 11:54:07</field>
                     <field name="Req_Last_Modified_Date">2022-09-02 13:44:07</field>
                  </record>
               </ExportXML>
            </Content>
         </Document>
      </ns1:getDocumentByKeyResponse>
   </soap:Body>
</soap:Envelope>

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
                xmlns:fct="http://www.taleo.com/xsl_functions" 
                xmlns:itk="http://www.taleo.com/ws/integration/toolkit/2005/07"
                xmlns:quer="http://www.taleo.com/ws/integration/query" 
                version="2.0">
   <xsl:output method="text" encoding="UTF-8" />
   <xsl:param name="csvDelimiter">~</xsl:param>
   <xsl:param name="csvQuoteCharacter">"</xsl:param>
   <xsl:param name="requestFile" />
   <xsl:param name="lineBreak"><xsl:text>&#xa;</xsl:text></xsl:param>

   <!-- ======================================= -->
   <!-- Root template. -->
   <!-- ======================================= -->
   <xsl:template match="/">

      <xsl:for-each-group select="//itk:field" group-by="replace(@name, '[0-9]', '')">
         <xsl:value-of select="current-grouping-key()" />
         <xsl:value-of select="if (position() != last()) then $csvDelimiter else $lineBreak" />
      </xsl:for-each-group>
      
      <xsl:text />
      <xsl:apply-templates select="//itk:record" />
      <!-- Build trailer record. -->
      <xsl:value-of select="$csvQuoteCharacter" />
      <xsl:text>T</xsl:text>
      <xsl:value-of select="$csvQuoteCharacter" />
      <xsl:value-of select="$csvDelimiter" />
      <xsl:value-of select="$csvQuoteCharacter" />
      <xsl:value-of select="format-number(count(//itk:record), '000000000')" />
      <xsl:value-of select="$csvQuoteCharacter" />
      <xsl:text />
   </xsl:template>

   <!-- ======================================= -->
   <!-- Template matching each record. -->
   <!-- ======================================= -->
   <xsl:template match="itk:record">
      <xsl:for-each-group select="itk:field" group-by="replace(@name, '[0-9]', '')">
        <!-- String join all group values -->
         <xsl:variable name="val">
           <xsl:value-of select="current-group()" separator=" "/>
         </xsl:variable>
  
         <xsl:value-of select="$csvQuoteCharacter"/>
           <xsl:value-of select="fct:quote($val)" />
           <xsl:value-of select="$csvQuoteCharacter"/>
         <xsl:value-of select="if (position() != last()) then $csvDelimiter else $lineBreak" />
      </xsl:for-each-group>
      <xsl:text />
   </xsl:template>

   <!-- ======================================= -->
   <!-- Quote a value if it contains the csvDelimiter or the csvQuoteCharacter. -->
   <!-- ======================================= -->
   <xsl:function name="fct:quote">
      <xsl:param name="value" />
      <xsl:choose>
         <xsl:when test="contains($value, $csvDelimiter) or contains($value, $csvQuoteCharacter)">
            <xsl:value-of select="replace($value, $csvQuoteCharacter, concat($csvQuoteCharacter, $csvQuoteCharacter))" />
         </xsl:when>
         <xsl:otherwise>
            <xsl:value-of select="$value" />
         </xsl:otherwise>
      </xsl:choose>
   </xsl:function>

   <xsl:function name="fct:nvl">
      <xsl:param name="value" />
      <xsl:param name="replace-with" />
      <xsl:choose>
         <xsl:when test="string-length($value) &gt; 0">
            <xsl:value-of select="$value" />
         </xsl:when>
         <xsl:otherwise>
            <xsl:value-of select="$replace-with" />
         </xsl:otherwise>
      </xsl:choose>
   </xsl:function>

</xsl:stylesheet>

Below is the result

position_id~status~number_of_openings~Internal_Description~Description~Req_Creation_Date~Req_Last_Modified_Date
1110684~Approved~14~test test1 test3~test5 test6 test7~2011-08-31 11:54:07~2022-09-02 13:44:07
1110684~Approved~14~test test1 test3~test5 test6 test7~2011-08-31 11:54:07~2022-09-02 13:44:07
T~2

Expected output

position_id~status~number_of_openings~Internal_Description~Description~Req_Creation_Date~Req_Last_Modified_Date
1110684~Approved~14~test test1 test3~test5 test6 test7~2011-08-31 11:54:07~2022-09-02 13:44:07
T~1

Result file should not have duplicate record

CodePudding user response:

Using Powershell

using assembly System.Xml
using assembly System.Xml.Linq 

$input_filename = "c:\temp\test.xml"
$output_filename = "c:\temp\test.csv"

$xDoc = [System.Xml.Linq.XDocument]::Load($input_filename)
$records = $xDoc.Descendants().Where( {$_.Name.LocalName -eq "record"})

$table = @()
foreach($record in $records)
{
   Write-Host "Record"
   $newRow = New-Object -TypeName psobject
   foreach($field in $record.Elements().Where({$_.Name.LocalName -eq "field"}))
   {
      $name = $field.Attribute("name").Value
      $value = $field.Value
      $newRow | Add-Member -NotePropertyName $name -NotePropertyValue $value
      
   }

   $table  = $newRow
}
$table | Format-Table
$uniqueTable = $table | Sort-Object -Property @{Expression={$_.position_id.Trim()}} -Unique
$uniqueTable | Export-Csv -Path $output_filename -NoTypeInformation

CodePudding user response:

Not sure what makes a record a duplicate but if it's that all of the values are the same, you could group the record elements by joining the field values too.

So instead of:

<xsl:apply-templates select="//itk:record" />

you could do:

<xsl:for-each-group select="//itk:record" group-by="string-join(*,'~')">
    <xsl:copy-of select="current-group()[1]"/>
</xsl:for-each-group>

You could also store that in a variable and use that in your count() to get the total:

<xsl:template match="/">
    
    <xsl:for-each-group select="//itk:field" group-by="replace(@name, '[0-9]', '')">
        <xsl:value-of select="current-grouping-key()" />
        <xsl:value-of select="if (position() != last()) then $csvDelimiter else $lineBreak" />
    </xsl:for-each-group>
    
    <xsl:text />
    <xsl:variable name="unique_records" as="element()*">
        <xsl:for-each-group select="//itk:record" group-by="string-join(*,'~')">
            <xsl:copy-of select="current-group()[1]"/>
        </xsl:for-each-group>            
    </xsl:variable>
    <xsl:apply-templates select="$unique_records"/>
    <!-- Build trailer record. -->
    <xsl:value-of select="$csvQuoteCharacter" />
    <xsl:text>T</xsl:text>
    <xsl:value-of select="$csvQuoteCharacter" />
    <xsl:value-of select="$csvDelimiter" />
    <xsl:value-of select="$csvQuoteCharacter" />
    <xsl:value-of select="format-number(count($unique_records), '000000000')" />
    <xsl:value-of select="$csvQuoteCharacter" />
    <xsl:text />
</xsl:template>

Fiddle: http://xsltfiddle.liberty-development.net/3Nqk35d

  • Related