Home > Net >  How to iterate a variable in XPATH, extract a link and store it into a list for further iteration
How to iterate a variable in XPATH, extract a link and store it into a list for further iteration

Time:02-18

I'm following a Selenium tutorial for an Amazon price tracker (Clever Programming on Youtube) and I got stuck at getting the links from amazon using their techniques.

tutorial link: https://www.youtube.com/watch?v=WbJeL_Av2-Q&t=4315s

I realized the problem laid on the fact that I'm only getting one link out of the 17 available after doing the product search. I need to get all the links for every product after doing a search and them use then to get into each product and get their title, seller and price.

funtion get_products_links() should get all links and stores them into a list to be used by the function get_product_info()

    def get_products_links(self):
    self.driver.get(self.base_url) # Go to amazon.com using BASE_URL
    element = self.driver.find_element_by_id('twotabsearchtextbox')
    element.send_keys(self.search_term)
    element.send_keys(Keys.ENTER)
    time.sleep(2) # Wait to load page
    self.driver.get(f'{self.driver.current_url}{self.price_filter}')
    time.sleep(2) # Wait to load page
    result_list = self.driver.find_elements_by_class_name('s-result-list')

    links = []
    try:
        ### Tying to get a list for Xpath links attributes ###
        ### Only numbers from 3 to 17 work after doing product search where 'i' is placed in the XPATH ###
        i = 3
        results = result_list[0].find_elements_by_xpath(
            f'//*[@id="search"]/div[1]/div[1]/div/span[3]/div[2]/div[{i}]/div/div/div/div/div/div[1]/div/div[2]/div/span/a')
        links = [link.get_attribute('href') for link in results]
        return links
    except Exception as e:
        print("Didn't get any products...")
        print(e)
        return links

At this point get_products_links() only returns one link since I just made 'i' a fixed value of 3 to make it work for now.

I was thinking to iterate 'i' in some sort so I can save every different PATHs but I don't know how to implement this.

I've tried performing a for loop and append the result into a new list but them the app stops working

Here is the complete code:

from amazon_config import(
get_web_driver_options,
get_chrome_web_driver,
set_browser_as_incognito,
set_ignore_certificate_error,
NAME,
CURRENCY,
FILTERS,
BASE_URL,
DIRECTORY
)
import time
from selenium.webdriver.common.keys import Keys

class GenerateReport:
    def __init__(self):
    pass
class AmazonAPI:
def __init__(self, search_term, filters, base_url, currency):
    self.base_url = base_url
    self.search_term = search_term
    options = get_web_driver_options()
    set_ignore_certificate_error(options)
    set_browser_as_incognito(options)
    self.driver = get_chrome_web_driver(options)
    self.currency = currency
    self.price_filter = f"&rh=p_36:{filters['min']}00-{filters['max']}00"
    
def run(self):
    print("Starting script...")
    print(f"Looking for {self.search_term} products...")
    links = self.get_products_links()
    time.sleep(1)
    if not links:
        print("Stopped script.")
        return
    print(f"Got {len(links)} links to products...")
    print("Getting info about products...")
    products = self.get_products_info(links)

    # self.driver.quit()

def get_products_info(self, links):
    asins = self.get_asins(links)
    product = []
    for asin in asins:
        product = self.get_single_product_info(asin)

def get_single_product_info(self, asin):
    print(f"Product ID: {asin} - getting data...")
    product_short_url = self.shorten_url(asin)
    self.driver.get(f'{product_short_url}?language=en_GB')
    time.sleep(2)
    title = self.get_title()
    seller = self.get_seller()
    price = self.get_price()

def get_title(self):
    try:
        return self.driver.find_element_by_id('productTitle')
    except Exception as e:
        print(e)
        print(f"Can't get title of a product - {self.driver.current_url}")
        return None

def get_seller(self):
    try:
        return self.driver.find_element_by_id('bylineInfo')
    except Exception as e:
        print(e)
        print(f"Can't get title of a product - {self.driver.current_url}")
        return None

def get_price(self):
    return '$99'

def shorten_url(self, asin):
    return self.base_url   'dp/'   asin

def get_asins(self, links):
    return [self.get_asin(link) for link in links]

def get_asin(self, product_link):
    return product_link[product_link.find('/dp/')   4:product_link.find('/ref')]
    
def get_products_links(self):
    self.driver.get(self.base_url) # Go to amazon.com using BASE_URL
    element = self.driver.find_element_by_id('twotabsearchtextbox')
    element.send_keys(self.search_term)
    element.send_keys(Keys.ENTER)
    time.sleep(2) # Wait to load page
    self.driver.get(f'{self.driver.current_url}{self.price_filter}')
    time.sleep(2) # Wait to load page
    result_list = self.driver.find_elements_by_class_name('s-result-list')

    links = []
    try:
        ### Tying to get a list for Xpath links attributes ###
        ### Only numbers from 3 to 17 work after doing product search where 'i' is placed ###
        i = 3
        results = result_list[0].find_elements_by_xpath(
            f'//*[@id="search"]/div[1]/div[1]/div/span[3]/div[2]/div[{i}]/div/div/div/div/div/div[1]/div/div[2]/div/span/a')
            
        links = [link.get_attribute('href') for link in results]
        return links
    except Exception as e:
        print("Didn't get any products...")
        print(e)
        return links


  if __name__ == '__main__':
print("HEY!!!           
  • Related