I am trying to run the following python code
Technology: Python, Selenium scraper
Device: Windows device
Getting error......
Traceback (most recent call last):
File "scraper.py", line 35, in for row in cp_url:
ValueError: I/O operation on closed file.
#!/usr/bin/python3
# Description: The Python code below will search selenium in Google.
import time
import csv
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
#EVERYTIME CHANGE THE DRIVER PATH TO THE CHROME DRIVER FOR LATEST CHROME VERSION
driver = webdriver.Chrome(
executable_path="D:\chromedriver.exe")
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
contents = []
filePath = 'output1.csv'
# As file at filePath is deleted now, so we should check if file
# exists or not not before deleting them
if os.path.exists(filePath):
os.remove(filePath)
else:
print("Can not delete the file as it doesn't exists")
f = open("output1.csv", "a")
f.write("website," "htmltag," "type," "id," "classname," "for," "href," "alt," "type," "src,"
"name," "width," "height," "data-src," 'inner-text,' 'action,' 'value,' "\n")
with open('inputLinks1.csv', 'rt') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
links = row[0]
contents.append(links)
driver.get(links)
with open('xpathtags.csv', 'rt') as cp2_csv:
cp_url2 = csv.reader(cp2_csv)
for row1 in cp_url2:
print(row[0])
(xtype, xpathtext) = row1[0].split(';')
print(xtype, xpathtext)
contents.append(xtype)
contents.append(xpathtext)
elems = driver.find_elements_by_xpath(xpathtext)
for elem in elems:
f = open('output1.csv', 'a', encoding='utf-8')
f.write( links ", " xtype ","
str(elem.get_attribute('type')) ', '
str(elem.get_attribute('id')) ', '
str(elem.get_attribute('class')) ', '
str(elem.get_attribute('for')) ', '
str(elem.get_attribute('href')) ', '
str(elem.get_attribute('alt')) ', '
str(elem.get_attribute('type')) ', '
str(elem.get_attribute('src')) ', '
str(elem.get_attribute('name')) ', '
str(elem.get_attribute('width')) ', '
str(elem.get_attribute('height')) ', '
str(elem.get_attribute('data-src')) ', '
str(elem.get_attribute('innerText').strip()) ', '
str(elem.get_attribute('action')) ', '
str(elem.get_attribute('value')) ', '
'\n')
f.close()
driver.close()
I am using the following CSV files
A) inputlinks1.csv
www.flipkart.com
www.ebay.com
B) xpathtags.csv
Link;//a[@href]
Button;//button
Image;//img
Heading1;//h1
Heading2;//h2
Heading3;//h3
Heading4;//h4
C) Output.csv
is a blank file
I am getting the following error
Traceback (most recent call last):
File "scraper.py", line 35, in <module>
for row in cp_url:
ValueError: I/O operation on closed file.
CodePudding user response:
I can't test it but I think your problem is that you have wrong indentations
with open('inputLinks1.csv', 'rt') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
# ...rest...
so you run for
-loop outside with...as...
and with...as...
automatically closes file.
You should run for
-loop inside with...as...
with open('inputLinks1.csv', 'rt') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
# ...rest...
Or you could use standard open()
and close()
cp_csv = open('inputLinks1.csv', 'rt')
cp_url = csv.reader(cp_csv)
for row in cp_url:
# ...rest...
cp_csv.close()
CodePudding user response:
Had to make a few changes to your code to get it working.
After fixing indentation, it threw another error w.r.t inputlinks1.csv
file.
Changed it to-
https://www.flipkart.com
https://www.ebay.com
And always try to use with open
when handling files.
Code snippet:-
contents = []
filePath = 'output1.csv'
# As file at filePath is deleted now, so we should check if file
# exists or not not before deleting them
if os.path.exists(filePath):
os.remove(filePath)
else:
print("Can not delete the file as it doesn't exists")
with open("output1.csv", "a") as f:
f.write("website," "htmltag," "type," "id," "classname," "for," "href," "alt," "type," "src,"
"name," "width," "height," "data-src," 'inner-text,' 'action,' 'value,' "\n")
with open('inputLinks1.csv', 'r') as cp_csv:
cp_url = csv.reader(cp_csv)
for row in cp_url:
links = row[0]
print(links)
contents.append(links)
driver.get(links)
with open('xpathtags.csv', 'r') as cp2_csv:
cp_url2 = csv.reader(cp2_csv)
for row1 in cp_url2:
print(row[0])
(xtype, xpathtext) = row1[0].split(';')
print(xtype, xpathtext)
contents.append(xtype)
contents.append(xpathtext)
elems = driver.find_elements_by_xpath(xpathtext)
for elem in elems:
with open('output1.csv', 'a', encoding='utf-8') as f:
f.write( links ", " xtype ","
str(elem.get_attribute('type')) ', '
str(elem.get_attribute('id')) ', '
str(elem.get_attribute('class')) ', '
str(elem.get_attribute('for')) ', '
str(elem.get_attribute('href')) ', '
str(elem.get_attribute('alt')) ', '
str(elem.get_attribute('type')) ', '
str(elem.get_attribute('src')) ', '
str(elem.get_attribute('name')) ', '
str(elem.get_attribute('width')) ', '
str(elem.get_attribute('height')) ', '
str(elem.get_attribute('data-src')) ', '
str(elem.get_attribute('innerText').strip()) ', '
str(elem.get_attribute('action')) ', '
str(elem.get_attribute('value')) ', '
'\n')
driver.close()