I have a html tree where needed price is inside div tag and under the span tag. My code printing an empty value. How to I get needed price?
<div class='card-product__price'>
<span class='card-product__price-old>Old price is here></span>
'Needed price is here'
</div>
class Parser(HTMLParser):
def handle_starttag(self, tag, attrs):
if (not self.price_is_found and
'class' not in self._product_info and
tag == 'div'):
attrs = dict(attrs)
if attrs.get('class') == 'card-product__details':
self.is_price_field = True
def handle_data(self, data):
if (not self.price_is_found and
self.is_price_field and
'class' not in self._product_info):
self.lst.append(data)
self._product_info['price'] = re.sub('[^\d ]', '', data)
self.price_is_found = True
CodePudding user response:
Updated version :
from bs4 import BeautifulSoup
html = '<div > <span >Old price is here</span> "Needed price is here" </div>'
soup = BeautifulSoup(html, 'html.parser')
price = soup.find('div', {'class': 'card-product__price'}).text
print(price)
This will print out the string "Needed price is here". You can also use the css selector to get the desired value in a more elegant way.
price = soup.select_one('.card-product__price').text
You can also use PyQuery library it uses jquery like syntax to get the desired value.
from pyquery import PyQuery as pq
doc = pq(html)
price = doc('.card-product__price').text()
If you want to use html.parser then try this out.
from html.parser import HTMLParser
class PriceParser(HTMLParser):
def __init__(self):
super().__init__()
self.in_price_tag = False
self.price = ""
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
if tag == 'div' and attrs.get('class') == 'card-product__price':
self.in_price_tag = True
def handle_data(self, data):
if self.in_price_tag:
self.price = data
self.in_price_tag = False
html = '<div > <span >Old price is here</span> "Needed price is here" </div>'
parser = PriceParser()
parser.feed(html)
print(parser.price)
CodePudding user response:
Try:
from html.parser import HTMLParser
class PriceParser(HTMLParser):
def __init__(self):
super().__init__()
self.in_price_tag = False
self.prices = []
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
if tag == 'div' and attrs.get('class') == 'card-product__price':
self.in_price_tag = True
def handle_endtag(self, tag):
if tag == 'div' and self.in_price_tag:
self.in_price_tag = False
def handle_data(self, data):
data = data.strip()
if self.in_price_tag:
self.prices.append(data)
html = '<div > <span >Old price is here</span> "Needed price is here" </div>'
parser = PriceParser()
parser.feed(html)
print(parser.prices[-1]) # <-- the needed price is the last element of the array
Prints:
"Needed price is here"