Home > front end >  Python 2.7 lxml: How to replace a tag with a comment
Python 2.7 lxml: How to replace a tag with a comment

Time:05-30

I have the fallowing XML:

<?xml version='1.0' encoding='UTF-8'?>
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
  <B name="NAME" version="VERSION">
    <AA>SOME NAME</AA>
    <CC>SOME OTHER NAME</CC>
  </B>
  <C>
    <SOME1>
      <TAG_3 name="NAME_1" path="path_1"/>
      <TAG_3 name="NAME_2" path="path_2"/>
      <TAG_3 name="NAME_3" path="path_3"/>
    </SOME1>
    <SOME2>
      <TAG_3 name="NAME_4" path="path_1"/>
      <TAG_3 name="NAME_5" path="path_2"/>
      <TAG_3 name="NAME_6" path="path_3"/>
    </SOME2>
  </C>
  <D>
    <TAG_3 type="type" name="NAME_1" version="version_1"/>
    <TAG_3 type="type" name="NAME_2" version="version_2"/>
    <TAG_3 type="type" name="NAME_3" version="version_3"/>
  </D>
</A>

And I have to change it to be something like this:

<?xml version='1.0' encoding='UTF-8'?>
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
    <B name="NAME" version="VERSION">
        <AA>SOME NAME</AA>
        <CC>SOME OTHER NAME</CC>
    </B>
    <C>
        <!-- SOME1 -->
        <TAG_3 name="NAME_1" path="path_1"/>
        <TAG_3 name="NAME_3" path="path_3"/>
        <TAG_3 name="NAME_2" path="path_2"/>
        <!-- SOME2 -->
        <TAG_3 name="NAME_5" path="path_2"/>
        <TAG_3 name="NAME_4" path="path_1"/>
        <TAG_3 name="NAME_6" path="path_3"/>
    </C>
    <D>
        <TAG_3 type="type" name="NAME_1" version="version_1"/>
        <TAG_3 type="type" name="NAME_2" version="version_2"/>
        <TAG_3 type="type" name="NAME_3" version="version_3"/>
    </D>
</A>

Does any of you know how I can achieve something like this? I've tried to do iterate over all the children of SOME1 and SOME2 and remove that tag, but I don't know how to:

  1. Remove the TAG itself, such as SOME1 and SOME2
  2. Add the first line of the comment in place of that TAG

Could any of you give me any hint how can I do that?

CodePudding user response:

Moving elements to SOME1 parent element

from lxml import etree 

tree = etree.parse('tmp.xml')
root = tree.getroot()

c = tree.xpath('/A/C[SOME1 | SOME2]')

print(c)
for s in tree.xpath('//C[SOME1 | SOME2]/SOME1/* | //C[SOME1 | SOME2]/SOME2/*'):
    print(s)
    c[0].append(s)
    
for d in tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2'):
    c[0].remove(d)
    
print(etree.tostring(tree, encoding="utf-8", method="xml").decode("utf-8"))

Result

<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
  <B name="NAME" version="VERSION">
    <AA>SOME NAME</AA>
    <CC>SOME OTHER NAME</CC>
  </B>
  <C>
    <TAG_3 name="NAME_1" path="path_1"/>
      <TAG_3 name="NAME_2" path="path_2"/>
      <TAG_3 name="NAME_3" path="path_3"/>
      <TAG_3 name="NAME_4" path="path_1"/>
      <TAG_3 name="NAME_5" path="path_2"/>
      <TAG_3 name="NAME_6" path="path_3"/>
    </C>
  <D>
    <TAG_3 type="type" name="NAME_1" version="version_1"/>
    <TAG_3 type="type" name="NAME_2" version="version_2"/>
    <TAG_3 type="type" name="NAME_3" version="version_3"/>
  </D>
</A>

To replace removed elements with a comment

slist = tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2')
for d in slist:
    if d.tag == 'SOME1':
        c[0].insert(slist.index(d), etree.Comment(d.tag))
        c[0][slist.index(d)].tail = "\n"
    elif d.tag == 'SOME2':
        c[0].insert(slist.index(d)   1, etree.Comment(d.tag))
        c[0][slist.index(d)   1].tail = "\n"

slist = tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2')
for d in slist:
    idx = slist.index(d)
    if d.tag == 'SOME1':
        for s in tree.xpath('//C[SOME1 | SOME2]/SOME1/*'):
            idx  = 1
            c[0].insert(idx, s)
        
for s in tree.xpath('//C[SOME1 | SOME2]/SOME2/*'):
    print(s)
    c[0].append(s)
    
for d in tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2'):
    c[0].remove(d)

Result

<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
  <B name="NAME" version="VERSION">
    <AA>SOME NAME</AA>
    <CC>SOME OTHER NAME</CC>
  </B>
  <C>
    <!--SOME1-->
<TAG_3 name="NAME_1" path="path_1"/>
      <TAG_3 name="NAME_2" path="path_2"/>
      <TAG_3 name="NAME_3" path="path_3"/>
    <!--SOME2-->
<TAG_3 name="NAME_4" path="path_1"/>
      <TAG_3 name="NAME_5" path="path_2"/>
      <TAG_3 name="NAME_6" path="path_3"/>
    </C>
  <D>
    <TAG_3 type="type" name="NAME_1" version="version_1"/>
    <TAG_3 type="type" name="NAME_2" version="version_2"/>
    <TAG_3 type="type" name="NAME_3" version="version_3"/>
  </D>
</A>
  • Related