Home > Software engineering >  Python Selenium Scraper error - ValueError: I/O operation on closed file
Python Selenium Scraper error - ValueError: I/O operation on closed file

Time:09-25

I am trying to run the following python code

Technology: Python, Selenium scraper
Device: Windows device

Getting error......

Traceback (most recent call last): 
File "scraper.py", line 35, in for row in cp_url: 
ValueError: I/O operation on closed file.
#!/usr/bin/python3
# Description: The Python code below will search selenium in Google.
import time
import csv
import os


from selenium import webdriver
from selenium.webdriver.common.keys import Keys

#EVERYTIME CHANGE THE DRIVER PATH TO THE CHROME DRIVER FOR LATEST CHROME VERSION
driver = webdriver.Chrome(
    executable_path="D:\chromedriver.exe")

options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])

contents = []

filePath = 'output1.csv'
# As file at filePath is deleted now, so we should check if file
# exists or not not before deleting them
if os.path.exists(filePath):
    os.remove(filePath)
else:
    print("Can not delete the file as it doesn't exists")

f = open("output1.csv", "a")
f.write("website,"   "htmltag,"   "type,"   "id,"   "classname,"   "for,"   "href,"   "alt,"   "type,"   "src,"
  "name,"   "width,"   "height,"   "data-src,"  'inner-text,'   'action,'   'value,'   "\n")


with open('inputLinks1.csv', 'rt') as cp_csv:
 cp_url = csv.reader(cp_csv)
for row in cp_url:
        links = row[0]
        contents.append(links)
        driver.get(links)
        with open('xpathtags.csv', 'rt') as cp2_csv:
            cp_url2 = csv.reader(cp2_csv)
            for row1 in cp_url2:
                print(row[0])
                (xtype, xpathtext) = row1[0].split(';') 
                print(xtype, xpathtext)
                contents.append(xtype)
                contents.append(xpathtext)
                elems = driver.find_elements_by_xpath(xpathtext)
                for elem in elems:
                    f = open('output1.csv', 'a', encoding='utf-8')
                    f.write( links   ", "  xtype   "," 
                          str(elem.get_attribute('type'))   ', '
                          str(elem.get_attribute('id'))   ', '
                          str(elem.get_attribute('class'))   ', '
                          str(elem.get_attribute('for'))   ', '
                          str(elem.get_attribute('href'))   ', '
                          str(elem.get_attribute('alt'))   ', '                        
                          str(elem.get_attribute('type'))   ', '
                          str(elem.get_attribute('src'))   ', '
                          str(elem.get_attribute('name'))   ', '
                          str(elem.get_attribute('width'))   ', '
                          str(elem.get_attribute('height'))   ', '
                          str(elem.get_attribute('data-src'))   ', '
                          str(elem.get_attribute('innerText').strip())   ', '
                          str(elem.get_attribute('action'))   ', '
                           str(elem.get_attribute('value'))   ', '

                          '\n')
                   
                    f.close()  


driver.close()

I am using the following CSV files

A) inputlinks1.csv

www.flipkart.com
www.ebay.com

B) xpathtags.csv

Link;//a[@href]
Button;//button
Image;//img
Heading1;//h1
Heading2;//h2
Heading3;//h3
Heading4;//h4

C) Output.csv is a blank file

I am getting the following error

Traceback (most recent call last):
  File "scraper.py", line 35, in <module>
    for row in cp_url:
  ValueError: I/O operation on closed file.

CodePudding user response:

I can't test it but I think your problem is that you have wrong indentations

with open('inputLinks1.csv', 'rt') as cp_csv:
 cp_url = csv.reader(cp_csv)
for row in cp_url:
    # ...rest...

so you run for-loop outside with...as... and with...as... automatically closes file.

You should run for-loop inside with...as...

with open('inputLinks1.csv', 'rt') as cp_csv:
    cp_url = csv.reader(cp_csv)
    for row in cp_url:
        # ...rest...

Or you could use standard open() and close()

cp_csv = open('inputLinks1.csv', 'rt')

cp_url = csv.reader(cp_csv)
for row in cp_url:
    # ...rest...

cp_csv.close()

CodePudding user response:

Had to make a few changes to your code to get it working.

After fixing indentation, it threw another error w.r.t inputlinks1.csv file. Changed it to-

https://www.flipkart.com
https://www.ebay.com

And always try to use with open when handling files.

Code snippet:-

contents = []

filePath = 'output1.csv'
# As file at filePath is deleted now, so we should check if file
# exists or not not before deleting them
if os.path.exists(filePath):
    os.remove(filePath)
else:
    print("Can not delete the file as it doesn't exists")

with open("output1.csv", "a") as f:
    f.write("website,"   "htmltag,"   "type,"   "id,"   "classname,"   "for,"   "href,"   "alt,"   "type,"   "src,"
      "name,"   "width,"   "height,"   "data-src,"  'inner-text,'   'action,'   'value,'   "\n")

with open('inputLinks1.csv', 'r') as cp_csv:
    cp_url = csv.reader(cp_csv)
    for row in cp_url:
            links = row[0]
            print(links)
            contents.append(links)
            driver.get(links)
            with open('xpathtags.csv', 'r') as cp2_csv:
                cp_url2 = csv.reader(cp2_csv)
                for row1 in cp_url2:
                    print(row[0])
                    (xtype, xpathtext) = row1[0].split(';') 
                    print(xtype, xpathtext)
                    contents.append(xtype)
                    contents.append(xpathtext)
                    elems = driver.find_elements_by_xpath(xpathtext)
                    for elem in elems:
                        with open('output1.csv', 'a', encoding='utf-8') as f:
                            f.write( links   ", "  xtype   "," 
                              str(elem.get_attribute('type'))   ', '
                              str(elem.get_attribute('id'))   ', '
                              str(elem.get_attribute('class'))   ', '
                              str(elem.get_attribute('for'))   ', '
                              str(elem.get_attribute('href'))   ', '
                              str(elem.get_attribute('alt'))   ', '                        
                              str(elem.get_attribute('type'))   ', '
                              str(elem.get_attribute('src'))   ', '
                              str(elem.get_attribute('name'))   ', '
                              str(elem.get_attribute('width'))   ', '
                              str(elem.get_attribute('height'))   ', '
                              str(elem.get_attribute('data-src'))   ', '
                              str(elem.get_attribute('innerText').strip())   ', '
                              str(elem.get_attribute('action'))   ', '
                              str(elem.get_attribute('value'))   ', '

                              '\n')
                   

driver.close()
  • Related