I'm a bit of confused on how to filter data I get from scrapping data on ebay, here the code below :
from bs4 import BeautifulSoup
import requests
url ='https://www.ebay.fr/sch/267/i.html?_from=R40&_nkw=star wars&_sop=10&_ipg=200'
def get_data(url):
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
return soup
def parse(soup):
results = soup.find_all('div', {'class' : 's-item__info clearfix'})
for item in results:
data = []
try:
Title = item.find('h3', {'class': 's-item__title'}).text.replace('Nouvelle annonce','')
Price = item.find('span', {'class':'s-item__price'}).text
Link = item.find('a', {'class' : 's-item__link'})['href']
products = {'Title' : Title, 'Price' : Price, 'Link' : Link}
data.append(products)
print(data)
except:
continue
return
soup = get_data(url)
parse(soup)
With that code I get all the books from ebay's page, but I only want a specific book from the list I get when I print(data)
by using a keyword like :
[{'Title': 'Star Wars - Rebels T05', 'Price': '8,53 EUR', 'Link': 'https://www.ebay.fr/itm/265401372083?hash=item3dcb278db3:g:g00AAOSwTmBhjXjq'}]
[{'Title': 'Official Lego� Star Wars Annual 2016 (Lego Annuals), , Used; Good Book', 'Price': '8,42 EUR', 'Link': 'https://www.ebay.fr/itm/165178509530?hash=item26756808da:g:NU4AAOSwsldhjXi2'}]
[{'Title': 'Thrawn (Star Wars) de Zahn, Timothy | Livre | état très bon', 'Price': '10,95 EUR', 'Link': 'https://www.ebay.fr/itm/124998742900?hash=item1d1a817374:g:zBQAAOSwSGFhjXPt'}]
[{'Title': 'STARFIX 007 1983 STAR WARS La guerre des étoiles III Les PREDATEURS GWENDOLINE', 'Price': '12,90 EUR', 'Link': 'https://www.ebay.fr/itm/294540446774?hash=item4493fa8c36:g:EMUAAOSwWjxhjXNe'}]
[{'Title': 'Star Wars, Der Kristallstern de McIntyre, Vonda N.,... | Livre | état acceptable', 'Price': '3,53 EUR', 'Link': 'https://www.ebay.fr/itm/124998670341?hash=item1d1a805805:g:6xIAAOSwKmZhjWPn'}]
I would like to use the keyword : "Thrawn" so I get only the 3rd line :
[{'Title': 'Thrawn (Star Wars) de Zahn, Timothy | Livre | état très bon', 'Price': '10,95 EUR', 'Link': 'https://www.ebay.fr/itm/124998742900?hash=item1d1a817374:g:zBQAAOSwSGFhjXPt'}]
at this point I'm stuck, I tried many attempts with if
, string
, attrs
but until now I get no result, so how can I implement "keyword" ? :)
thank you
CodePudding user response:
There are several ways to find the book titles that contains the keyword 'Thrawn.'
First the individual data elements are dictionary, so a basic string has to be modified using str(dict).
book_titles = parse(soup)
book = [title for title in book_titles if 'Thrawn' in str(title)]
print(book)
# output
[{'Title': 'Thrawn (Star Wars) de Zahn, Timothy | Livre | état très bon', 'Price': '10,95 EUR', 'Link': 'https://www.ebay.fr/itm/124998742900?hash=item1d1a817374:g:zBQAAOSwSGFhjXPt'}, {'Title': 'Star Wars™ Thrawn de Zahn, Timothy | Livre | état très bon', 'Price': '10,77 EUR', 'Link': 'https://www.ebay.fr/itm/124997651763?hash=item1d1a70cd33:g:FhoAAOSwPF9hjIs-'}]
book_titles = parse(soup) Here is another way to do this with a regex.
book = [title for title in book_titles if regex.search('Thrawn', str(title))]
print(book)
# output
[{'Title': 'Thrawn (Star Wars) de Zahn, Timothy | Livre | état très bon', 'Price': '10,95 EUR', 'Link': 'https://www.ebay.fr/itm/124998742900?hash=item1d1a817374:g:zBQAAOSwSGFhjXPt'}, {'Title': 'Star Wars™ Thrawn de Zahn, Timothy | Livre | état très bon', 'Price': '10,77 EUR', 'Link': 'https://www.ebay.fr/itm/124997651763?hash=item1d1a70cd33:g:FhoAAOSwPF9hjIs-'}]
and here is another way:
book_titles = parse(soup)
for title in book_titles:
for key, value in title.items():
if key == 'Title':
if 'Thrawn' in value:
print(title)
# output
{'Title': 'Thrawn (Star Wars) de Zahn, Timothy | Livre | état très bon', 'Price': '10,95 EUR',
'Link': 'https://www.ebay.fr/itm/124998742900?hash=item1d1a817374:g:zBQAAOSwSGFhjXPt'}
{'Title': 'Star Wars™ Thrawn de Zahn, Timothy | Livre | état très bon', 'Price': '10,77 EUR',
'Link': 'https://www.ebay.fr/itm/124997651763?hash=item1d1a70cd33:g:FhoAAOSwPF9hjIs-'}
The function parse also need to return the data, so do this:
def parse(soup):
data = []
results = soup.find_all('div', {'class' : 's-item__info clearfix'})
for item in results:
try:
Title = item.find('h3', {'class': 's-item__title'}).text.replace('Nouvelle annonce','')
Price = item.find('span', {'class':'s-item__price'}).text
Link = item.find('a', {'class': 's-item__link'})['href']
products = {'Title': Title, 'Price': Price, 'Link': Link}
data.append(products)
except:
continue
return data