We have a requirement to covert XML file to csv with total record count added in trailer and add some columns from xml file into one column. By using below XSLT I am able to achieve but input xml file has duplicate and same getting added to result csv file. Please refer below example for more detail.
Sample input xml file
<?xml version="1.0" encoding="UTF-8"?>
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
<soap:Body>
<ns1:getDocumentByKeyResponse xmlns:ns1="http://www.taleo.com/ws/integration/toolkit/2005/07">
<Document xmlns="http://www.taleo.com/ws/integration/toolkit/2005/07">
<Content>
<ExportXML>
<record>
<field name="position_id">1110684</field>
<field name="status">Approved</field>
<field name="number_of_openings">14</field>
<field name="Internal_Description1">test</field>
<field name="Internal_Description2">test1</field>
<field name="Internal_Description3">test2</field>
<field name="Internal_Description4" />
<field name="Internal_Description5" />
<field name="Description1">test5</field>
<field name="Description2">test6</field>
<field name="Description3">test7</field>
<field name="Description4" />
<field name="Description5" />
<field name="Req_Creation_Date">2011-08-31 11:54:07</field>
<field name="Req_Last_Modified_Date">2022-09-02 13:44:07</field>
</record>
<record>
<field name="position_id">1110684</field>
<field name="status">Approved</field>
<field name="number_of_openings">14</field>
<field name="Internal_Description1">test</field>
<field name="Internal_Description2">test1</field>
<field name="Internal_Description3">test2</field>
<field name="Internal_Description4" />
<field name="Internal_Description5" />
<field name="Description1">test5</field>
<field name="Description2">test6</field>
<field name="Description3">test7</field>
<field name="Description4" />
<field name="Description5" />
<field name="Req_Creation_Date">2011-08-31 11:54:07</field>
<field name="Req_Last_Modified_Date">2022-09-02 13:44:07</field>
</record>
</ExportXML>
</Content>
</Document>
</ns1:getDocumentByKeyResponse>
</soap:Body>
</soap:Envelope>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:fct="http://www.taleo.com/xsl_functions"
xmlns:itk="http://www.taleo.com/ws/integration/toolkit/2005/07"
xmlns:quer="http://www.taleo.com/ws/integration/query"
version="2.0">
<xsl:output method="text" encoding="UTF-8" />
<xsl:param name="csvDelimiter">~</xsl:param>
<xsl:param name="csvQuoteCharacter">"</xsl:param>
<xsl:param name="requestFile" />
<xsl:param name="lineBreak"><xsl:text>
</xsl:text></xsl:param>
<!-- ======================================= -->
<!-- Root template. -->
<!-- ======================================= -->
<xsl:template match="/">
<xsl:for-each-group select="//itk:field" group-by="replace(@name, '[0-9]', '')">
<xsl:value-of select="current-grouping-key()" />
<xsl:value-of select="if (position() != last()) then $csvDelimiter else $lineBreak" />
</xsl:for-each-group>
<xsl:text />
<xsl:apply-templates select="//itk:record" />
<!-- Build trailer record. -->
<xsl:value-of select="$csvQuoteCharacter" />
<xsl:text>T</xsl:text>
<xsl:value-of select="$csvQuoteCharacter" />
<xsl:value-of select="$csvDelimiter" />
<xsl:value-of select="$csvQuoteCharacter" />
<xsl:value-of select="format-number(count(//itk:record), '000000000')" />
<xsl:value-of select="$csvQuoteCharacter" />
<xsl:text />
</xsl:template>
<!-- ======================================= -->
<!-- Template matching each record. -->
<!-- ======================================= -->
<xsl:template match="itk:record">
<xsl:for-each-group select="itk:field" group-by="replace(@name, '[0-9]', '')">
<!-- String join all group values -->
<xsl:variable name="val">
<xsl:value-of select="current-group()" separator=" "/>
</xsl:variable>
<xsl:value-of select="$csvQuoteCharacter"/>
<xsl:value-of select="fct:quote($val)" />
<xsl:value-of select="$csvQuoteCharacter"/>
<xsl:value-of select="if (position() != last()) then $csvDelimiter else $lineBreak" />
</xsl:for-each-group>
<xsl:text />
</xsl:template>
<!-- ======================================= -->
<!-- Quote a value if it contains the csvDelimiter or the csvQuoteCharacter. -->
<!-- ======================================= -->
<xsl:function name="fct:quote">
<xsl:param name="value" />
<xsl:choose>
<xsl:when test="contains($value, $csvDelimiter) or contains($value, $csvQuoteCharacter)">
<xsl:value-of select="replace($value, $csvQuoteCharacter, concat($csvQuoteCharacter, $csvQuoteCharacter))" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$value" />
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<xsl:function name="fct:nvl">
<xsl:param name="value" />
<xsl:param name="replace-with" />
<xsl:choose>
<xsl:when test="string-length($value) > 0">
<xsl:value-of select="$value" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$replace-with" />
</xsl:otherwise>
</xsl:choose>
</xsl:function>
</xsl:stylesheet>
Below is the result
position_id~status~number_of_openings~Internal_Description~Description~Req_Creation_Date~Req_Last_Modified_Date
1110684~Approved~14~test test1 test3~test5 test6 test7~2011-08-31 11:54:07~2022-09-02 13:44:07
1110684~Approved~14~test test1 test3~test5 test6 test7~2011-08-31 11:54:07~2022-09-02 13:44:07
T~2
Expected output
position_id~status~number_of_openings~Internal_Description~Description~Req_Creation_Date~Req_Last_Modified_Date
1110684~Approved~14~test test1 test3~test5 test6 test7~2011-08-31 11:54:07~2022-09-02 13:44:07
T~1
Result file should not have duplicate record
CodePudding user response:
Using Powershell
using assembly System.Xml
using assembly System.Xml.Linq
$input_filename = "c:\temp\test.xml"
$output_filename = "c:\temp\test.csv"
$xDoc = [System.Xml.Linq.XDocument]::Load($input_filename)
$records = $xDoc.Descendants().Where( {$_.Name.LocalName -eq "record"})
$table = @()
foreach($record in $records)
{
Write-Host "Record"
$newRow = New-Object -TypeName psobject
foreach($field in $record.Elements().Where({$_.Name.LocalName -eq "field"}))
{
$name = $field.Attribute("name").Value
$value = $field.Value
$newRow | Add-Member -NotePropertyName $name -NotePropertyValue $value
}
$table = $newRow
}
$table | Format-Table
$uniqueTable = $table | Sort-Object -Property @{Expression={$_.position_id.Trim()}} -Unique
$uniqueTable | Export-Csv -Path $output_filename -NoTypeInformation
CodePudding user response:
Not sure what makes a record a duplicate but if it's that all of the values are the same, you could group the record
elements by joining the field
values too.
So instead of:
<xsl:apply-templates select="//itk:record" />
you could do:
<xsl:for-each-group select="//itk:record" group-by="string-join(*,'~')">
<xsl:copy-of select="current-group()[1]"/>
</xsl:for-each-group>
You could also store that in a variable and use that in your count()
to get the total:
<xsl:template match="/">
<xsl:for-each-group select="//itk:field" group-by="replace(@name, '[0-9]', '')">
<xsl:value-of select="current-grouping-key()" />
<xsl:value-of select="if (position() != last()) then $csvDelimiter else $lineBreak" />
</xsl:for-each-group>
<xsl:text />
<xsl:variable name="unique_records" as="element()*">
<xsl:for-each-group select="//itk:record" group-by="string-join(*,'~')">
<xsl:copy-of select="current-group()[1]"/>
</xsl:for-each-group>
</xsl:variable>
<xsl:apply-templates select="$unique_records"/>
<!-- Build trailer record. -->
<xsl:value-of select="$csvQuoteCharacter" />
<xsl:text>T</xsl:text>
<xsl:value-of select="$csvQuoteCharacter" />
<xsl:value-of select="$csvDelimiter" />
<xsl:value-of select="$csvQuoteCharacter" />
<xsl:value-of select="format-number(count($unique_records), '000000000')" />
<xsl:value-of select="$csvQuoteCharacter" />
<xsl:text />
</xsl:template>