I'm following a Selenium tutorial for an Amazon price tracker (Clever Programming on Youtube) and I got stuck at getting the links from amazon using their techniques.
tutorial link: https://www.youtube.com/watch?v=WbJeL_Av2-Q&t=4315s
I realized the problem laid on the fact that I'm only getting one link out of the 17 available after doing the product search. I need to get all the links for every product after doing a search and them use then to get into each product and get their title, seller and price.
funtion get_products_links() should get all links and stores them into a list to be used by the function get_product_info()
def get_products_links(self):
self.driver.get(self.base_url) # Go to amazon.com using BASE_URL
element = self.driver.find_element_by_id('twotabsearchtextbox')
element.send_keys(self.search_term)
element.send_keys(Keys.ENTER)
time.sleep(2) # Wait to load page
self.driver.get(f'{self.driver.current_url}{self.price_filter}')
time.sleep(2) # Wait to load page
result_list = self.driver.find_elements_by_class_name('s-result-list')
links = []
try:
### Tying to get a list for Xpath links attributes ###
### Only numbers from 3 to 17 work after doing product search where 'i' is placed in the XPATH ###
i = 3
results = result_list[0].find_elements_by_xpath(
f'//*[@id="search"]/div[1]/div[1]/div/span[3]/div[2]/div[{i}]/div/div/div/div/div/div[1]/div/div[2]/div/span/a')
links = [link.get_attribute('href') for link in results]
return links
except Exception as e:
print("Didn't get any products...")
print(e)
return links
At this point get_products_links() only returns one link since I just made 'i' a fixed value of 3 to make it work for now.
I was thinking to iterate 'i' in some sort so I can save every different PATHs but I don't know how to implement this.
I've tried performing a for loop and append the result into a new list but them the app stops working
Here is the complete code:
from amazon_config import(
get_web_driver_options,
get_chrome_web_driver,
set_browser_as_incognito,
set_ignore_certificate_error,
NAME,
CURRENCY,
FILTERS,
BASE_URL,
DIRECTORY
)
import time
from selenium.webdriver.common.keys import Keys
class GenerateReport:
def __init__(self):
pass
class AmazonAPI:
def __init__(self, search_term, filters, base_url, currency):
self.base_url = base_url
self.search_term = search_term
options = get_web_driver_options()
set_ignore_certificate_error(options)
set_browser_as_incognito(options)
self.driver = get_chrome_web_driver(options)
self.currency = currency
self.price_filter = f"&rh=p_36:{filters['min']}00-{filters['max']}00"
def run(self):
print("Starting script...")
print(f"Looking for {self.search_term} products...")
links = self.get_products_links()
time.sleep(1)
if not links:
print("Stopped script.")
return
print(f"Got {len(links)} links to products...")
print("Getting info about products...")
products = self.get_products_info(links)
# self.driver.quit()
def get_products_info(self, links):
asins = self.get_asins(links)
product = []
for asin in asins:
product = self.get_single_product_info(asin)
def get_single_product_info(self, asin):
print(f"Product ID: {asin} - getting data...")
product_short_url = self.shorten_url(asin)
self.driver.get(f'{product_short_url}?language=en_GB')
time.sleep(2)
title = self.get_title()
seller = self.get_seller()
price = self.get_price()
def get_title(self):
try:
return self.driver.find_element_by_id('productTitle')
except Exception as e:
print(e)
print(f"Can't get title of a product - {self.driver.current_url}")
return None
def get_seller(self):
try:
return self.driver.find_element_by_id('bylineInfo')
except Exception as e:
print(e)
print(f"Can't get title of a product - {self.driver.current_url}")
return None
def get_price(self):
return '$99'
def shorten_url(self, asin):
return self.base_url 'dp/' asin
def get_asins(self, links):
return [self.get_asin(link) for link in links]
def get_asin(self, product_link):
return product_link[product_link.find('/dp/') 4:product_link.find('/ref')]
def get_products_links(self):
self.driver.get(self.base_url) # Go to amazon.com using BASE_URL
element = self.driver.find_element_by_id('twotabsearchtextbox')
element.send_keys(self.search_term)
element.send_keys(Keys.ENTER)
time.sleep(2) # Wait to load page
self.driver.get(f'{self.driver.current_url}{self.price_filter}')
time.sleep(2) # Wait to load page
result_list = self.driver.find_elements_by_class_name('s-result-list')
links = []
try:
### Tying to get a list for Xpath links attributes ###
### Only numbers from 3 to 17 work after doing product search where 'i' is placed ###
i = 3
results = result_list[0].find_elements_by_xpath(
f'//*[@id="search"]/div[1]/div[1]/div/span[3]/div[2]/div[{i}]/div/div/div/div/div/div[1]/div/div[2]/div/span/a')
links = [link.get_attribute('href') for link in results]
return links
except Exception as e:
print("Didn't get any products...")
print(e)
return links
if __name__ == '__main__':
print("HEY!!!