Variables
chrome_path = 'chromedriver'
driver = webdriver.Chrome(chrome_path)
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--disable-popup-blocking")
driver.get("https://gibiru.com/")
driver.find_element(By.CSS_SELECTOR, '.form-control.has-feedback.has-clear').click()
driver.find_element(By.CSS_SELECTOR, '.form-control.has-feedback.has-clear').send_keys("lfc")
driver.find_element(By.CSS_SELECTOR, '.form-control.has-feedback.has-clear').send_keys(Keys.RETURN)
driver.find_element(By.XPATH, "/html/body/div[1]/main/div[1]/div/div/div/div[2]").click()
time.sleep(2)
I have this try-stratement, which works perfect, but needs to be looped arcordding to the value of page_length, which is equal to 10 in this situation.
try:
#1st page
page_length = len(driver.find_elements(By.CSS_SELECTOR, "div.gsc-resultsRoot.gsc-tabData.gsc-tabdActive div.gsc-cursor-box.gs-bidi-start-align div.gsc-cursor div.gsc-cursor-page"))
index_count = 0
current_page = int(page_length) - int(index_count)
print("Number of availables pages : " str(current_page)) #Print = 10
find_href = driver.find_elements(By.CSS_SELECTOR, 'img.gs-image.gs-image-scalable')
with open("txt.txt", "a") as textFile:
for my_href in find_href:
textFile.write(str(my_href.get_attribute("src")) "\n")
print(my_href.get_attribute("src"))
index_count = 1
driver.execute_script("window.scrollTo(100,document.body.scrollHeight);")
driver.find_element(By.XPATH, '/html/body/div[1]/main/div[2]/div[2]/div/div[1]/div/div/div/div/div[5]/div[2]/div[2]/div/div[2]/div/div[' str(index_count) ']').click()
time.sleep(2)
#2nd page
current_page = int(page_length) - int(index_count)
print("Number of availables pages : " str(current_page)) #Print = 10
find_href = driver.find_elements(By.CSS_SELECTOR, 'img.gs-image.gs-image-scalable')
with open("txt.txt", "a") as textFile:
for my_href in find_href:
textFile.write(str(my_href.get_attribute("src")) "\n")
print(my_href.get_attribute("src"))
index_count = 1
driver.execute_script("window.scrollTo(100,document.body.scrollHeight);")
driver.find_element(By.XPATH, '/html/body/div[1]/main/div[2]/div[2]/div/div[1]/div/div/div/div/div[5]/div[2]/div[2]/div/div[2]/div/div[' str(index_count) ']').click()
time.sleep(2)
except Exception as e:
print(e)
driver.quit()
But I seek help in regards to creating a for-loop. That can do what the try-statement can, but in fewer lines of code. This is what I'm thinking of :
for x in page_array_number:
index_count = 0
current_page = int(page_length) - int(index_count)
print("Number of availables pages : " str(current_page))
find_href = driver.find_elements(By.CSS_SELECTOR, 'img.gs-image.gs-image-scalable')
with open("txt.txt", "a") as textFile:
for my_href in find_href:
textFile.write(str(my_href.get_attribute("src")) "\n")
print(my_href.get_attribute("src"))
print("Counter is before : " str(index_count))
index_count = 1
print("Counter is after : " str(index_count))
driver.execute_script("window.scrollTo(100,document.body.scrollHeight);")
time.sleep(2)
driver.find_element(By.XPATH, '/html/body/div[1]/main/div[2]/div[2]/div/div[1]/div/div/div/div/div[5]/div[2]/div[2]/div/div[2]/div/div[' str(index_count) ']').click()
time.sleep(2)
if index_count == page_length:
print("Done scraping urls from " str(page_length) " pages")
break
The output I be getting is as such: It's seems like it is the counter that is the problem, it doesnt add 1 on for every loop.
CodePudding user response:
len()
returns an integer, which is not an iterable object. I would use the enumerate()
method, which returns the index and value of the next item in the iterable. enumerate()
is also faster in many cases.
pages = len(driver.find_elements())
page_length = len(pages)
for index, value in enumerate(pages):
current_page = page_length - index
...
Also, the last two lines of code is redundant. If index_count == page_length
, then that is the last iteration of the loop and will exit anyway.
Some other notes: if you are looping and don't need the loop variable, replace it with an underscore. In the above code, since we don't need the variable value
:
for index, _ in enumerate(pages):
# This is clear that we don't use the values contained in pages
current_page = page_length - index
...
Lastly, you can often get errors like NoSuchAttributeException
and ElementNotInteractableException
due to variations in page load and JS execution times. I would suggest encapsulating selenium code that interacts with the web page in try
except
statements.
CodePudding user response:
I got it working with this for loop :
for index, item in enumerate(page_array_number):
print(index)
current_page = int(page_length) - int(index)
print("Number of availables pages : " str(current_page))
index = index 1
print("Counter is after : " str(index))
find_href = driver.find_elements(By.CSS_SELECTOR, 'img.gs-image.gs-image-scalable')
with open("txt.txt", "a") as textFile:
for my_href in find_href:
textFile.write(str(my_href.get_attribute("src")) "\n")
print(my_href.get_attribute("src"))
driver.execute_script("window.scrollTo(100,document.body.scrollHeight);")
time.sleep(2)
if index == 1:
driver.find_element(By.XPATH, '/html/body/div[1]/main/div[2]/div[2]/div/div[1]/div/div/div/div/div[5]/div[2]/div[2]/div/div[2]/div/div[' str(index 1) ']').click()
time.sleep(2)
elif index > 1:
driver.find_element(By.XPATH, '/html/body/div[1]/main/div[2]/div[2]/div/div[1]/div/div/div/div/div[5]/div[2]/div[2]/div/div[2]/div/div[' str(index) ']').click()
time.sleep(2)
elif index == page_length:
print("Done scraping urls from " str(page_length) " pages")
break