I wrote a class to create a instance of Selenium webdriver then scrap some data. Everything works fine in single thread but when I try switching to multithread all operation only execute in one browser, the another one launched but nothing happened.
Here is my multithread code
def run_downloader(account, date_range, company_name, invoice_type):
downloader = InvoiceDownloaderEnchanted(account=account,
date_range=date_range,
company_name=company_name,
invoice_type=invoice_type)
downloader.start_webdriver()
downloader.process()
if __name__ == '__main__':
with ThreadPoolExecutor(max_workers=2) as executor:
executor.map(run_downloader,
repeat((cfg.USERNAME, cfg.PASSWORD)),
repeat(('01/08/2022', '31/08/2022')),
repeat('Company Name'),
[InvoiceType.INWARD, InvoiceType.OUTWARD])
And there is my webdriver Class init code:
class InvoiceDownloaderEnchanted:
def __init__(self,
account,
company_name: str,
invoice_type: InvoiceType,
date_range: tuple | None = None,
target_url: str | None = cfg.WEBSITE_URL
):
self.account = account
self.company_name = company_name
self.invoice_type = invoice_type
self.date_range = date_range
self.target_url = target_url
self.driver = None
def start_webdriver(self):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--disable-gpu")
chrome_prefs = cfg.CHROME_PREFS
chrome_prefs['download.default_directory'] = self.download_folder
print(chrome_prefs['download.default_directory'])
chrome_options.add_experimental_option('prefs', chrome_prefs)
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument("--remote-debugging-port=9222")
user_agent = UserAgent().random
chrome_options.add_argument(f'user-agent={user_agent}')
self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()),
options=chrome_options)
print(self.driver)
self.driver.maximize_window()
def process(self):
# Do scrapping stuffs
After some debug, i notice that download.default_directory
of two instances are different (as i expected) when i print(chrome_prefs['download.default_directory'])
but is the same in chrome_options.experimental_options
.
I checked two webdriver instances and driver.capabilities['chrome']['userDataDir]
are also different:
<selenium.webdriver.chrome.webdriver.WebDriver (session="93cef277d1796c241cb75f729dec222f")>
<selenium.webdriver.chrome.webdriver.WebDriver (session="de08b1c6baad30587717082e41c294b5")>
Sometimes i run the code, it only open one browser. I think the problem is two webdriver instance is reference to one variable or something like that, but have no idea how to fix it.
CodePudding user response:
I fixed it by remove argument chrome_options.add_argument("--remote-debugging-port=9222")
from start_webdrive