Trying to read a list of URLs from .csv file and scrape product price. Any suggestions to loop through urls would be great. I can return price and title of first product and then I get a connection refused error.
ConnectionRefusedError: [Errno 61] Connection refused error
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
import pandas as pd
import csv
import time
s=Service(executable_path="/Users/Downloads/chromedriver")
driver = webdriver.Chrome(service=s)
chrome_options = Options()
chrome_options.add_argument('--headless')
webdriver = webdriver.Chrome(service=s, options=chrome_options)
source = 'offsaks.csv'
with open(source, 'rt', encoding='utf-8-sig') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
next(readCSV)
data_list = []
urls = []
for row in readCSV:
url = row[1]
urls.append(url)
for url in urls:
with webdriver as driver:
wait = WebDriverWait(driver, 10)
time.sleep(10)
driver.get(url)
print(url)
xpath = driver.find_element(by=By.XPATH, value='//*[@id="maincontent"]/div[1]/div[2]/div[3]/div/div[1]/div/div/div/span/span[1]/span[2]/span/span').text
print(xpath)
title = driver.find_element(by=By.XPATH, value='//*[@id="maincontent"]/div[1]/div[2]/div[3]/div/h1/span[2]/span/span').text
print(title)
data = {
'Price': xpath,
'Title': title
}
data.update()
data_list.append(data)
driver.refresh()
time.sleep(5)
driver.quit()
df = pd.DataFrame(data_list)
df.to_csv('test_1.csv', index=False)
CodePudding user response:
I believe it's because of the driver.quit(), you're using it before the for is complete. Try moving it to another path within the code when finished. I also recommend using try and catch in your python application in selenium, I believe it will be easier to find errors.
Try like this:
try:
xxxxxxxxxx
for url in urls:
xxxxxxx
finally:
driver.quit()