I need to get a string into a li tag using python and bs4. I'm trying with the code below:
from bs4 import BeautifulSoup
from lxml import etree
html_doc = """
<html>
<head>
</head>
<body>
<div >
<section id="page">
<div >
<div >
<ul>
<li>Name: Peter</li>
<li>Age: 21</li>
<li>Status: Active</li>
</ul>
</div>
</div>
</section>
</div>
</body>
</html>
"""
soup = BeautifulSoup(html_doc, 'lxml')
dom = etree.HTML(str(soup))
print (dom.xpath('/html/body/div/section/div[1]/div[1]/ul/li[3]'))
That one returns: [<Element li at 0x7fc640e896c0>]
but the desired result is the li tag text like below: Status: Active
How to do? Thanks
CodePudding user response:
In xpath just you have to use text()
method
from bs4 import BeautifulSoup
from lxml import etree
html_doc = """
<html>
<head>
</head>
<body>
<div >
<section id="page">
<div >
<div >
<ul>
<li>Name: Peter</li>
<li>Age: 21</li>
<li>Status: Active</li>
</ul>
</div>
</div>
</section>
</div>
</body>
</html>
"""
soup = BeautifulSoup(html_doc, 'lxml')
dom = etree.HTML(str(soup))
print(dom.xpath('/html/body/div/section/div[1]/div[1]/ul/li[3]/text())
Output:
['Status: Active']
#OR
for li in dom.xpath('/html/body/div/section/div[1]/div[1]/ul/li[3]/text()'):
txt=li.split()[1]
print(txt)
Output:
Active
#OR
print(' '.join(dom.xpath('/html/body/div/section/div[1]/div[1]/ul/li[3]/text()')))
Output:
Status: Active
#OR
print(''.join(dom.xpath('//*[@]/ul/li[3]/text()')))
Output:
Status: Active
CodePudding user response:
Try the below (no external lib required)
import xml.etree.ElementTree as ET
xml = """
<html>
<head>
</head>
<body>
<div >
<section id="page">
<div >
<div >
<ul>
<li>Name: Peter</li>
<li>Age: 21</li>
<li>Status: Active</li>
</ul>
</div>
</div>
</section>
</div>
</body>
</html>
"""
root = ET.fromstring(xml)
print(root.find('.//ul')[-1].text)
output
Status: Active