We have a ongoing project where xsl use to convert openXML to custom XML, In this case always we were getting single section content but now we have some nested section are coming. I done lots of things by using for-each-group to achieve my result but in nesting part its getting stuck. Seeking help to understand term grouping here: Here is things i tried as well: http://xsltransform.net/pPqteBi
Sample XML:
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<w:body xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:p>
<w:pPr>
<w:pStyle w:val="TRH1"/>
</w:pPr>
<w:r>
<w:t>Heading 1 Title</w:t>
</w:r>
</w:p>
<w:p>
<w:pPr>
<w:pStyle w:val="TRSubtitle1"/>
</w:pPr>
<w:r>
<w:t>section heading 1</w:t>
</w:r>
</w:p>
<w:p>
<w:pPr>
<w:pStyle w:val="TRNormal"/>
</w:pPr>
<w:r>
<w:t>section 1 First para</w:t>
</w:r>
</w:p>
<w:p>
<w:pPr>
<w:pStyle w:val="TRNormal"/>
</w:pPr>
<w:r>
<w:t>section 1 2 para</w:t>
</w:r>
</w:p>
<w:p>
<w:pPr>
<w:pStyle w:val="TRSubtitle2"/>
</w:pPr>
<w:r>
<w:t>Section title 1.1</w:t>
</w:r>
</w:p>
<w:p>
<w:pPr>
<w:pStyle w:val="TRNormal"/>
</w:pPr>
<w:r>
<w:t>First para of section 1.1 </w:t>
</w:r>
</w:p>
<w:table>table</w:table>
<w:p>
<w:pPr>
<w:pStyle w:val="TRSubtitle1"/>
</w:pPr>
<w:r>
<w:t>section title 2</w:t>
</w:r>
</w:p>
<w:p>
<w:pPr>
<w:pStyle w:val="TRNormal"/>
</w:pPr>
<w:r>
<w:t>section 2 First para</w:t>
</w:r>
</w:p>
</w:body>
Modified XSL:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:pkg="http://schemas.microsoft.com/office/2006/xmlPackage"
xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas"
xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex"
xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex"
xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex"
xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex"
xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex"
xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing"
xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
xmlns:w10="urn:schemas-microsoft-com:office:word"
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"
xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml"
xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"
xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup"
xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk"
xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape"
xmlns:random="java:java.util.Random" xmlns:mf="http://example.com/mf"
exclude-result-prefixes="#all" version="2.0">
<xsl:output indent="yes" method="xml" encoding="UTF-8"/>
<xsl:strip-space elements="*"/>
<xsl:template match="pkg:part[not(descendant::w:document)]"/>
<!-- Removed as per srikanth discussion because footnotes printing numbers in this tags. -->
<xsl:template match="w:hyperlink"/>
<xsl:template match="w:body">
<chapter-poc>
<ctitle>XXX XXX</ctitle>
<xsl:for-each-group select="*[normalize-space()]"
group-starting-with="w:p[w:pPr/w:pStyle[@w:val='TRSubtitle1']]">
<xsl:choose>
<xsl:when test="current-group()[self::w:p[w:pPr/w:pStyle[@w:val='TRH1']]]">
<sub-chapter>
<sctitle>
<xsl:apply-templates/>
</sctitle>
</sub-chapter>
</xsl:when>
<xsl:otherwise>
<section>
<xsl:for-each select="current-group()">
<xsl:choose>
<xsl:when
test="self::w:p[w:pPr/w:pStyle[@w:val=('TRSubtitle1','TRSubtitle2')]]">
<stitle>
<xsl:apply-templates/>
</stitle>
</xsl:when>
<xsl:when test="self::w:p">
<para
num="{count(preceding-sibling::w:p[not(w:pPr/w:pStyle[@w:val='TRH1' or @w:val='TRSubtitle1' or @w:val='TRSubtitle2'])]) 1}">
<xsl:apply-templates/>
</para>
</xsl:when>
<xsl:otherwise>
<!-- Just apply templates if any new tags are coming (Ex. table and others elemets) so this will print only value, it will not drop any data. -->
<xsl:copy copy-namespaces="no">
<xsl:apply-templates/>
</xsl:copy>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</section>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</chapter-poc>
</xsl:template>
</xsl:stylesheet>
Current output:
<?xml version="1.0" encoding="UTF-8"?>
<chapter-poc>
<ctitle>XXX XXX</ctitle>
<sub-chapter>
<sctitle>Heading 1 Title</sctitle>
</sub-chapter>
<section>
<stitle>section heading 1</stitle>
<para num="1">section 1 First para</para>
<para num="2">section 1 2 para</para>
<stitle>Section title 1.1</stitle>
<para num="3">First para of section 1.1 </para>
<w:table xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">table</w:table>
</section>
<section>
<stitle>section title 2</stitle>
<para num="4">section 2 First para</para>
</section>
</chapter-poc>
Desire output:
<?xml version="1.0" encoding="UTF-8"?>
<chapter-poc>
<ctitle>XXX XXX</ctitle>
<sub-chapter>
<sctitle>Heading 1 Title</sctitle>
</sub-chapter>
<section>
<stitle>section heading 1</stitle>
<para num="1">section 1 First para</para>
<para num="2">section 1 2 para</para>
<section>
<stitle>Section title 1.1</stitle>
<para num="3">First para of section 1.1 </para>
<w:table xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">table</w:table>
</section>
</section>
<section>
<stitle>section title 2</stitle>
<para num="4">section 2 First para</para>
</section>
</chapter-poc>
CodePudding user response:
It is possible to use grouping in a recursive function:
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:mf="http://example.com/mf"
exclude-result-prefixes="#all"
version="3.0">
<xsl:function name="mf:group-sections" as="node()*">
<xsl:param name="elements" as="element(*)*"/>
<xsl:param name="index" as="xs:integer"/>
<xsl:for-each-group select="$elements" group-starting-with="w:p[w:pPr/w:pStyle[@w:val='TRSubtitle' || $index]]">
<xsl:choose>
<xsl:when test="self::w:p[w:pPr/w:pStyle[@w:val='TRSubtitle' || $index]]">
<section>
<xsl:sequence select="mf:group-sections(current-group(), $index 1)"/>
</section>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="current-group()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</xsl:function>
<xsl:output indent="yes" method="xml" encoding="UTF-8"/>
<xsl:strip-space elements="*"/>
<xsl:template match="w:pPr"/>
<xsl:template match="w:body">
<chapter-poc>
<ctitle>XXX XXX</ctitle>
<xsl:sequence select="mf:group-sections(*[normalize-space()], 1)"/>
</chapter-poc>
</xsl:template>
<xsl:template match="w:p[w:pPr/w:pStyle[@w:val = ((1 to 6)!('TRSubtitle' || .))]]">
<stitle>
<xsl:apply-templates/>
</stitle>
</xsl:template>
<xsl:template match="*">
<xsl:copy copy-namespaces="no">
<xsl:apply-templates/>
</xsl:copy>
</xsl:template>
<xsl:template match="w:p">
<xsl:variable name="p-num" as="xs:integer">
<xsl:number count="w:p[not(w:pPr/w:pStyle[@w:val='TRH1' or @w:val='TRSubtitle1' or @w:val='TRSubtitle2'])]"/>
</xsl:variable>
<para num="{$p-num}">
<xsl:apply-templates/>
</para>
</xsl:template>
<xsl:template match="w:p[w:pPr/w:pStyle[@w:val='TRH1']]">
<sub-chapter>
<sctitle>
<xsl:apply-templates/>
</sctitle>
</sub-chapter>
</xsl:template>
<xsl:template match="w:r | w:t">
<xsl:apply-templates/>
</xsl:template>
</xsl:stylesheet>
I have made use of some XPath 3 constructs like ||
, use the concat
function instead if you use an XSLT 2 processor.