I am trying to write a scraper that will go on the eia.gov website and scrape electricity rates.
This is my scrape function:
from listOfElements import pieces
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import pyperclip
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
#We need a function that scrapes the eia.gov website for electricity rates..
def scrape1():
for piece in pieces:
try:
driver.get('https://www.eia.gov/electricity/monthly/epm_table_grapher.php?t=epmt_5_06_a')
element = driver.find_element(By.XPATH, piece)
#element.send_keys(Keys.CONTROL,'a')
element.send_keys(Keys.CONTROL,'c')
text = pyperclip.paste()
with open('output.txt', 'w', encoding='utf-8') as f:
f.write(text)
except Exception as e:
print(f'Exception while processing {piece} -> {e}')
This is my separate file (called listOfElements) that is a list of elements (XPATH's)
pieces = ['/html/body/div[1]/div[2]/div/div[3]/div/div/table/tbody/tr[3]/td[2]', #Maine Residential
'/html/body/div[1]/div[2]/div/div[3]/div/div/table/tbody/tr[3]/td[4]', #Maine Commercial
'/html/body/div[1]/div[2]/div/div[3]/div/div/table/tbody/tr[3]/td[6]', #Maine Industrial
'/html/body/div[1]/div[2]/div/div[3]/div/div/table/tbody/tr[4]/td[2]', #Massachusetts Residential
'/html/body/div[1]/div[2]/div/div[3]/div/div/table/tbody/tr[4]/td[5]', #Massachusetts Commercial
'/html/body/div[1]/div[2]/div/div[3]/div/div/table/tbody/tr[4]/td[7]', #Massachusetts Industrial
'/html/body/div[1]/div[2]/div/div[3]/div/div/table/tbody/tr[4]/td[8]',] #Massachusetts Transportation
This is the error I am getting (partial stacktrace):
runfile('C:/Users/MYNAME/Desktop/Price Grabber/priceGrabber.py', wdir='C:/Users/MYNAME/Desktop/Price Grabber')
Reloaded modules: listOfElements
C:\Users\MYNAME\Desktop\Price Grabber\priceGrabber.py:21: DeprecationWarning: executable_path has been deprecated, please pass in a Service object
driver = webdriver.Chrome(PATH)
Exception while processing /html/body/div[1]/div[2]/div/div[3]/div/div/table/tbody/tr[3]/td[2] -> Message: element not interactable
(Session info: chrome=100.0.4896.127)
Stacktrace:
Backtrace:
Ordinal0 [0x00C97413 2389011]
Ordinal0 [0x00C29F61 1941345]
Ordinal0 [0x00B1C520 836896]
Ordinal0 [0x00B448E3 1001699]
Ordinal0 [0x00B43FBE 999358]
Ordinal0 [0x00B6414C 1130828]
Ordinal0 [0x00B3F974 981364]
Ordinal0 [0x00B64364 1131364]
Ordinal0 [0x00B74302 1196802]
Ordinal0 [0x00B63F66 1130342]
Ordinal0 [0x00B3E546 976198]
Ordinal0 [0x00B3F456 980054]
GetHandleVerifier [0x00E49632 1727522]
GetHandleVerifier [0x00EFBA4D 2457661]
GetHandleVerifier [0x00D2EB81 569713]
GetHandleVerifier [0x00D2DD76 566118]
Ordinal0 [0x00C30B2B 1968939]
Ordinal0 [0x00C35988 1989000]
Ordinal0 [0x00C35A75 1989237]
Ordinal0 [0x00C3ECB1 2026673]
BaseThreadInitThunk [0x776DFA29 25]
RtlGetAppContainerNamedObjectPath [0x77C37A7E 286]
RtlGetAppContainerNamedObjectPath [0x77C37A4E 238]
Exception while processing /html/body/div[1]/div[2]/div/div[3]/div/div/table/tbody/tr[3]/td[4] -> Message: element not interactable
(Session info: chrome=100.0.4896.127)
Stacktrace:
Backtrace:
Ordinal0 [0x00C97413 2389011]
Ordinal0 [0x00C29F61 1941345]
Ordinal0 [0x00B1C520 836896]
Ordinal0 [0x00B448E3 1001699]
Ordinal0 [0x00B43FBE 999358]
How to I fix this?
Thanks in advanced.
CodePudding user response:
the problem is in the way you get the data, it is not necessary to simulate CTRL C to get the data, what you had was enough, to get the data you use the text attribute of the driver object and that's it!
#!/usr/bin/env python
from listOfElements import pieces
from selenium import webdriver
#from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
#import pyperclip
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
#We need a function that scrapes the eia.gov website for electricity rates..
def scrape1():
url= 'https://www.eia.gov/electricity/monthly/epm_table_grapher.php?t=epmt_5_06_a'
data = []
for piece in pieces:
try:
driver.get(url)
element = driver.find_element(By.XPATH, piece)
#element.send_keys(Keys.CONTROL,'a')
#element.send_keys(Keys.CONTROL,'c')
#text = element.text
data.append(element.text)
#with open('output.txt', 'w', encoding='utf-8') as f:
# f.write(text)
except Exception as e:
print(f'Exception while processing {piece} -> {e}')
output_file = open('output.txt', 'w')
for value in data:
output_file.write(value)
output_file.write(" ")
output_file.close()
scrape1()