Home > Enterprise >  How to get rid of this index out of range error? I know the reason but don't know the solution
How to get rid of this index out of range error? I know the reason but don't know the solution

Time:06-28

I am using selenium with python to scrape web data. I want to scrape the data from a page and in the last I want to enter random generated email in input box and get the quote, this directs me to another page. When I come back to previous pages after getting all the price list. Everything stops working because of Stale Element error in selenium. To get rid of this error, I am again using find_elements thing within the for loop but this is giving me another kind of error.

Here is my code

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager

import time

import random
import string


# For generating fake email

def random_char(y):
       return ''.join(random.choice(string.ascii_letters) for x in range(y))

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

search_url = "https://northladder.com/en/ae/electronics/laptop/lenovo/thinkpad/v1/5e2c888401c848ac4f695bb8"
driver.get(search_url)
time.sleep(3)

ALL_ITEMS = []
item = ["", "", "", "", "", "", "", "", "", "", ""]

sections = driver.find_elements(By.XPATH, '//div[@]')
processor = sections[0].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")

for i in range(len(processor)):
    print(i)
    sections = driver.find_elements(By.XPATH, '//div[@]')
    processor = sections[0].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
    item[0] = processor[i].text

    #processor[i].click()
    driver.execute_script("arguments[0].click()", processor[i])
    
    time.sleep(2)

    sections = driver.find_elements(By.XPATH, '//div[@]')
    generation = sections[1].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
    
    for g in range(len(generation)):
        sections = driver.find_elements(By.XPATH, '//div[@]')
        generation = sections[1].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
        item[1] = generation[g].text

        #generation[g].click()
        driver.execute_script("arguments[0].click()", generation[g])
        time.sleep(2)

        sections = driver.find_elements(By.XPATH, '//div[@]')
        ram = sections[2].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")

        for r in range(len(ram)):
            sections = driver.find_elements(By.XPATH, '//div[@]')
            ram = sections[2].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
            item[2] = ram[r].text

            #ram[r].click()
            driver.execute_script("arguments[0].click()", ram[r])
            time.sleep(2)

            sections = driver.find_elements(By.XPATH, '//div[@]')
            model = sections[3].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")

            for m in range(len(model)):
                sections = driver.find_elements(By.XPATH, '//div[@]')
                model = sections[3].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
                item[3] = model[m].text

                #model[m].click()
                driver.execute_script("arguments[0].click()", model[m])
                time.sleep(2)

                sections = driver.find_elements(By.XPATH, '//div[@]')
                screensize = sections[4].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")

                for s in range(len(screensize)):
                    sections = driver.find_elements(By.XPATH, '//div[@]')
                    screensize = sections[4].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
                    item[4] = screensize[s].text

                    #screensize[s].click()
                    driver.execute_script("arguments[0].click()", screensize[s])
                    time.sleep(2)

                    sections = driver.find_elements(By.XPATH, '//div[@]')
                    drive = sections[5].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")

                    for d in range(len(drive)):
                        sections = driver.find_elements(By.XPATH, '//div[@]')
                        drive = sections[5].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
                        item[5] = drive[d].text
                        email= random_char(7) "@gmail.com"
                        sections = driver.find_elements(By.XPATH, '//div[@]')
                        email_input=sections[0].find_element(by=By.ID, value='checkout-user-detail_email')
                        email_input.send_keys(email)
                        button = driver.find_element(By.XPATH, '//button[@]')
                        driver.execute_script("arguments[0].click()", button)
                        time.sleep(5)
                        quote = driver.find_elements(By.XPATH, '//div[@]/ul[@]')
                        price_quote = quote[0].find_elements(By.XPATH, ".//li[@class='fgp-item']/h1[@class='price']")
                        item[6] = price_quote[0].text
                        item[7] = price_quote[1].text
                        item[8] = price_quote[2].text
                        item[9] = price_quote[3].text
                        item[10] = price_quote[4].text
                        print(item)
                        ALL_ITEMS.append(item.copy())  # duplicate `item` because I will use the same list to get new results
                        driver.execute_script("window.history.go(-1)")
                        

for item in ALL_ITEMS:
    print(item)
my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('lenovo_thinkpad.csv', index=False, header=False)

Here is my error

['2018 Series', 'Core i7', '6th Gen', '8 GB', '2018', '256 GB', 'AED 595', 'AED 465', 'AED 405', 'AED 250', 'AED 115']
1
['2017 Series', 'Core i7', '6th Gen', '8 GB', '2017', '256 GB', 'AED 850', 'AED 675', 'AED 585', 'AED 365', 'AED 185']
Traceback (most recent call last):
  File "/home/noor/Documents/testing_python/scrapper.py", line 46, in <module>
    generation = sections[1].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
IndexError: list index out of range

CodePudding user response:

All problem is that you open other page (Get an Offer) and after going back all selections are removed - browser expects that user will start from the beginning but code expects that some sections will be still selected.

It may need to remember all selections and later click them again.

Here I remeber selections in item_index

item_index = [0, 0, 0, 0, 0, 0]  # remember all selections

for i in range(len(processor)):
    item_index[0] = i  # remember selection
    # ... code ...

    for g in range(len(generation)):
        item_index[1] = g  # remember selection
        # ... code ...

        for r in range(len(ram)):
            item_index[2] = r  # remember selection
            # ... code ...

# ... etc. ...  

And later select them again

    driver.execute_script("window.history.go(-1)")

    for number, index in enumerate(item_index):
        print(f'[reselect] section {number} -> index {index}')
        sections = driver.find_elements(By.XPATH, '//div[@]')
        items = sections[number].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
        driver.execute_script("arguments[0].click()", items[index])
        time.sleep(3)

Full working code:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

import pandas as pd

import time
import random
import string

# --- functions ---

def random_char(number):
    """For generating fake email."""
    return ''.join(random.choices(string.ascii_letters, k=number))


def reselect(item_index, sleep_time=3):
    for number, index in enumerate(item_index):
        print(f'[reselect] section {number} -> index {index}')
        sections = driver.find_elements(By.XPATH, '//div[@]')
        items = sections[number].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
        driver.execute_script("arguments[0].click()", items[index])
        time.sleep(sleep_time)

# --- main ---

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

search_url = "https://northladder.com/en/ae/electronics/laptop/lenovo/thinkpad/v1/5e2c888401c848ac4f695bb8"
driver.get(search_url)
time.sleep(3)

ALL_ITEMS = []
item = ["", "", "", "", "", "", "", "", "", "", ""]
item_index = [0, 0, 0, 0, 0, 0]  # remember all selections

sections = driver.find_elements(By.XPATH, '//div[@]')
processor = sections[0].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")

for i in range(len(processor)):
    print('sections[0]:', i)

    sections = driver.find_elements(By.XPATH, '//div[@]')
    processor = sections[0].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
    item[0] = processor[i].text
    item_index[0] = i  # remember selection

    #processor[i].click()
    driver.execute_script("arguments[0].click()", processor[i])
    time.sleep(2)

    sections = driver.find_elements(By.XPATH, '//div[@]')
    generation = sections[1].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")

    for g in range(len(generation)):
        print('sections[1]:', g)

        sections = driver.find_elements(By.XPATH, '//div[@]')
        generation = sections[1].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
        item[1] = generation[g].text
        item_index[1] = g  # remember selection

        #generation[g].click()
        driver.execute_script("arguments[0].click()", generation[g])
        time.sleep(2)

        sections = driver.find_elements(By.XPATH, '//div[@]')
        ram = sections[2].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")

        for r in range(len(ram)):
            print('sections[2]:', r)

            sections = driver.find_elements(By.XPATH, '//div[@]')
            ram = sections[2].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
            item[2] = ram[r].text
            item_index[2] = r  # remember selection

            #ram[r].click()
            driver.execute_script("arguments[0].click()", ram[r])
            time.sleep(2)

            sections = driver.find_elements(By.XPATH, '//div[@]')
            model = sections[3].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")

            for m in range(len(model)):
                print('sections[3]:', r)

                sections = driver.find_elements(By.XPATH, '//div[@]')
                model = sections[3].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
                item[3] = model[m].text
                item_index[3] = m  # remember selection

                #model[m].click()
                driver.execute_script("arguments[0].click()", model[m])
                time.sleep(2)

                sections = driver.find_elements(By.XPATH, '//div[@]')
                screensize = sections[4].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")

                for s in range(len(screensize)):
                    print('sections[4]:', s)

                    sections = driver.find_elements(By.XPATH, '//div[@]')
                    screensize = sections[4].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
                    item[4] = screensize[s].text
                    item_index[4] = s  # remember selection

                    #screensize[s].click()
                    driver.execute_script("arguments[0].click()", screensize[s])
                    time.sleep(2)

                    sections = driver.find_elements(By.XPATH, '//div[@]')
                    drive = sections[5].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")

                    for d in range(len(drive)):
                        print('sections[5]:', d)

                        sections = driver.find_elements(By.XPATH, '//div[@]')
                        drive = sections[5].find_elements(By.XPATH, ".//label/div[@class='select-item m-1']")
                        item[5] = drive[d].text
                        item_index[5] = d  # remember selection

                        # you forgot it
                        sections = driver.find_elements(By.XPATH, '//div[@]')
                        driver.execute_script("arguments[0].click()", drive[d])

                        print('mail')
                        email = random_char(7) "@gmail.com"
                        email_input = sections[0].find_element(by=By.ID, value='checkout-user-detail_email')
                        email_input.send_keys(email)
                        time.sleep(1)

                        print('click button')
                        button = driver.find_element(By.XPATH, '//button[@]')
                        driver.execute_script("arguments[0].click()", button)
                        time.sleep(5)

                        print('prices')
                        quote = driver.find_element(By.XPATH, '//div[@]/ul[@]')
                        price_quote = quote.find_elements(By.XPATH, ".//li[@class='fgp-item']/h1[@class='price']")
                        for number in range(5):
                            item[6 number] = price_quote[number].text
                            print(f'{number} -> {price_quote[number].text}')

                        print(item)
                        ALL_ITEMS.append(item.copy())  # duplicate `item` because I will use the same list to get new results

                        print('go back')
                        driver.execute_script("window.history.go(-1)")
                        time.sleep(3)

                        reselect(item_index)

for item in ALL_ITEMS:
    print(item)

my_df = pd.DataFrame(ALL_ITEMS)
my_df.to_csv('lenovo_thinkpad.csv', index=False, header=False)
  • Related