Home > Enterprise >  Strip HTML Tags in from XML file
Strip HTML Tags in from XML file

Time:04-27

I am facing an issue while trying to strip the HTML tags in XML file using XSLT.

Input XML

<ns1:productSpecificationFullDTO xmlns:ns1="http://www.micros.com/creations/core/domain/dto/v1p0/full" xmlns:ns2="http://www.micros.com/creations/core/domain/dto/v1p0/simple">
<ns1:specification>
    <ns1:CopySection>
        <ns1:otherRangeName>
            <![CDATA[CNF - BOP 1904 
<b>BOP 1904</b>]]>
        </ns1:otherRangeName>
        <ns1:additionalPrintOfPackCopy>
            <![CDATA[CNF FOP 1904 <b>FOP 1904</b>]]>
        </ns1:additionalPrintOfPackCopy>
    </ns1:CopySection>
</ns1:ns1:specification></ns1:productSpecificationFullDTO>

XSLT Definition Though the Function to strip HTML tags works for few attributes it is not working for some

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:ns1="http://www.micros.com/creations/core/domain/dto/v1p0/full" xmlns:ns2="http://www.micros.com/creations/core/domain/dto/v1p0/simple" exclude-result-prefixes="ns1 ns1">
<xsl:template name="replace-string">
    <xsl:param name="text"/>
    <xsl:param name="replace"/>
    <xsl:param name="with"/>
    <xsl:choose>
    <xsl:when test="contains($text,$replace)">
        <xsl:value-of select="substring-before($text,$replace)"/>
        <xsl:value-of select="$with"/>
        <xsl:call-template name="replace-string">
            <xsl:with-param name="text" select="substring-after($text,$replace)"/>
            <xsl:with-param name="replace" select="$replace"/>
            <xsl:with-param name="with" select="$with"/>
        </xsl:call-template>
    </xsl:when>
    <xsl:otherwise>
        <xsl:value-of select="$text"/>
    </xsl:otherwise>
    </xsl:choose>
</xsl:template>
<xsl:template name="strip-html-tags">
<xsl:param name="text"/>
<xsl:choose>
    <xsl:when test="contains($text, '&lt;')">
        <xsl:value-of select="substring-before($text, '&lt;')"/>
        <xsl:call-template name="strip-html-tags">
                <xsl:with-param name="text" select="substring-after($text, '&gt;')"/>
        </xsl:call-template>
    </xsl:when>
</xsl:choose>
<xsl:call-template name="replace-string">
            <xsl:with-param name="text" select="$text"/>
            <xsl:with-param name="replace" select="'&amp;nbsp;'" />
            <xsl:with-param name="with" select="' '"/>
        </xsl:call-template>    
</xsl:template>
<xsl:template match="/">
    <ItemDetails>
        <Items>
            <!-- Food section start here -->
            <LongDescription>
                    <xsl:variable name="productCheck" select="ns1:productSpecificationFullDTO/ns1:specification/ns1:CopySection/ns1:otherRangeName"/>
                    <xsl:choose>
                        <xsl:when test="$productCheck != ''">
                            <xsl:call-template name="strip-html-tags">
                                <xsl:with-param name="text" select="normalize-space(ns1:productSpecificationFullDTO/ns1:specification/ns1:CopySection/ns1:otherRangeName)"/>
                            </xsl:call-template>
                        </xsl:when>
                        <xsl:otherwise>
                            <xsl:value-of select="normalize-space(ns1:productSpecificationFullDTO/ns1:specification/ns1:CopySection/ns1:additionalPrintOfPackCopy)"/>
                        </xsl:otherwise>
                    </xsl:choose>
                </LongDescription>
        </Items>
    </ItemDetails>
</xsl:template></xsl:stylesheet>

Desired Output The output should be as below where all the HTML tags are removed

<ItemDetails xmlns:ns2="http://www.micros.com/creations/core/domain/dto/v1p0/simple">
   <Items>
      <Item>
         <LongDescription>CNF - BOP 1904 BOP 1904<LongDescription/>
      </Item>
    </Items>
</ItemDetails>

Actual Output The actual output that i'm getting includes additional HTML tags which are unwanted nd should be removed

    <ItemDetails xmlns:ns2="http://www.micros.com/creations/core/domain/dto/v1p0/simple">
   <Items>
      <Item>
         <LongDescription>CNF - BOP 1904 BOP 1904BOP 1904&lt;/b&gt;CNF - BOP 1904 &lt;b&gt;BOP 1904&lt;/b&gt;<LongDescription/>
      </Item>
    </Items>
</ItemDetails>

Please assist. Thanks in Advance

CodePudding user response:

Try this:

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:ns1="http://www.micros.com/creations/core/domain/dto/v1p0/full" xmlns:ns2="http://www.micros.com/creations/core/domain/dto/v1p0/simple" exclude-result-prefixes="ns1 ns1">

  <xsl:template match="/">
    <ItemDetails>
      <Items>
        <!-- Food section start here -->
        <LongDescription>
          
          <!-- First determine which content to be stripped -->
          <xsl:variable name="stripFromHtml">
            <xsl:variable name="otherRangeName" select="normalize-space(ns1:productSpecificationFullDTO/ns1:specification/ns1:CopySection/ns1:otherRangeName)"/>
            <xsl:choose>
              <xsl:when test="$otherRangeName">
                <xsl:value-of select="$otherRangeName"/>
              </xsl:when>
              <xsl:otherwise>
                <xsl:value-of select="normalize-space(ns1:productSpecificationFullDTO/ns1:specification/ns1:CopySection/ns1:additionalPrintOfPackCopy)"/>
              </xsl:otherwise>
            </xsl:choose>
          </xsl:variable>
          
          <!-- Second strip tags -->
          <xsl:variable name="tags-stripped">
            <xsl:call-template name="strip-html-tags">
              <xsl:with-param name="text" select="$stripFromHtml"/>
            </xsl:call-template>
          </xsl:variable>

          <!-- Third translate nbsp's -->
          <xsl:call-template name="replace-string">
            <xsl:with-param name="text" select="$tags-stripped"/>
            <xsl:with-param name="replace" select="'&amp;nbsp;'"/>
            <xsl:with-param name="with" select="'&#160;'"/>
          </xsl:call-template>
        </LongDescription>
      </Items>
    </ItemDetails>
  </xsl:template>

  <xsl:template name="replace-string">
    <xsl:param name="text"/>
    <xsl:param name="replace"/>
    <xsl:param name="with"/>
    <xsl:choose>
      <xsl:when test="contains($text, $replace)">
        <xsl:value-of select="substring-before($text, $replace)"/>
        <xsl:value-of select="$with"/>
        <xsl:call-template name="replace-string">
          <xsl:with-param name="text" select="substring-after($text, $replace)"/>
          <xsl:with-param name="replace" select="$replace"/>
          <xsl:with-param name="with" select="$with"/>
        </xsl:call-template>
      </xsl:when>
      <xsl:otherwise>
        <xsl:value-of select="$text"/>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>
  
  <xsl:template name="strip-html-tags">
    <xsl:param name="text"/>
    <xsl:choose>
      <xsl:when test="contains($text, '&lt;')">
        <xsl:value-of select="substring-before($text, '&lt;')"/>
        <xsl:call-template name="strip-html-tags">
          <xsl:with-param name="text" select="substring-after($text, '&gt;')"/>
        </xsl:call-template>
      </xsl:when>
      <xsl:otherwise>
        <xsl:value-of select="$text"/>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>

</xsl:stylesheet>
  • Related