I'm trying to use a multi-thread strategy with selenium. In shorts I'm trying to fill in input field with ids.
This is my script :
from concurrent.futures import ThreadPoolExecutor
from selenium.webdriver.common.by import By
import numpy as np
import sys
from selenium import webdriver
def driver_setup():
path = "geckodriver.exe"
options = webdriver.FirefoxOptions()
options.add_argument('--incognito')
# options.add_argument('--headless')
driver = webdriver.Firefox(options=options, executable_path=path)
return driver
def fetcher(id, driver):
print(id) #this works
# this doesnt work
driver.get(
"https://www.roboform.com/filling-test-all-fields")
driver.find_element(By.XPATH, '//input[@name="30_user_id"]').send_keys(id)
time.sleep(2)
print(i, " sent")
#return data
def crawler(ids):
for id in ids:
print(i)
results = fetcher(id, driver_setup())
drivers = [driver_setup() for _ in range(4)]
ids = list(range(0,50)) # generates ids
print(ids)
chunks = np.array_split(np.array(ids),4) #splits the id list into 4 chunks
with ThreadPoolExecutor(max_workers=4) as executor:
bucket = executor.map(crawler, chunks)
#results = [item for block in bucket for item in block]
[driver.quit() for driver in drivers]
Everything seems to work except the send_keys method. Both print() works so it seems the ids are sent to both functions. Weirdly, I don't get an error message (i get the pycharm's Process finished with exit code 0 notice) so I don't know what I'm doing wrong.
Any idea what is missing ?
I used this example : https://blog.devgenius.io/multi-threaded-web-scraping-with-selenium-dbcfb0635e83 if it helps
CodePudding user response:
Possibly you trying to invoke send_keys()
too early even before the <input>
field have rendered completely.
Solution
Ideally to send a character sequence to the element you need to induce WebDriverWait for the element_to_be_clickable() and you can use either of the following locator strategies:
Using NAME:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.NAME, "30_user_id"))).send_keys(id)
Using CSS_SELECTOR:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='30_user_id']"))).send_keys(id)
Using XPATH:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[@name='30_user_id']"))).send_keys(id)
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC
CodePudding user response:
when using threading, watch out for exceptions as they get embedded into futures. for example change your code to have the below-tweaked code(don't change any other line yet)
with ThreadPoolExecutor(max_workers=4) as executor:
bucket = executor.map(crawler, chunks)
# bucket is list of futures, so let's try to print it
for e_buck in bucket: # simpleapp add for demo
print(e_buck) #
you will see that you will get exception errors like:
- i is not defined, look at this statement
print(i, " sent")
andprint(i)
in crawler. - once you fix the above error, the next error will be in the id in send keys-
send_keys(id)
,id is of type numpy.int64
. change it to str by typecast, str(),send_keys(str(id))
so your code, after fixes will be like:
from concurrent.futures import ThreadPoolExecutor
from selenium.webdriver.common.by import By
import numpy as np
import sys
from selenium import webdriver
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains as AC
from selenium.webdriver.common.keys import Keys
import time
def driver_setup():
path = "geckodriver.exe"
options = webdriver.FirefoxOptions()
options.add_argument('--incognito')
# options.add_argument('--headless')
driver = webdriver.Firefox(options=options, executable_path=path)
return driver
def fetcher(id, driver):
print(id) #this works
# this doesnt work - it will work now :)
driver.get(
"https://www.roboform.com/filling-test-all-fields")
driver.find_element(By.XPATH, '//input[@name="30_user_id"]').send_keys(str(id))
time.sleep(2)
print(id, " sent")
#return data
def crawler(ids):
for id in ids:
print(id)
results = fetcher(id, driver_setup())
#drivers = [driver_setup() for _ in range(4)]
ids = list(range(0,50)) # generates ids
print(ids)
chunks = np.array_split(np.array(ids),4) #splits the id list into 4 chunks
with ThreadPoolExecutor(max_workers=4) as executor:
bucket = executor.map(crawler, chunks)
# bucket is list of futures, so let's try to print it
for e_buck in bucket: # simpleapp add for demo
print(e_buck) # check what print, you get, first time you will get that
# i is not defined, look at this statment print(i, " sent") and print(i) in crawler.
# once you fix the above error, next error will be in id in send keys- send_keys(id), id is of type ''numpy.int64''. change it to str by typecast, str(), send_keys(str(id))
#results = [item for block in bucket for item in block]
#[driver.quit() for driver in drivers]