I'm trying to run a code to do some web scraping in Twitter but I keep having an error that I don't manage to solve. I guess it is related to selenium webdriver, but I am not very familiar with it and I haven't found any solutions so far.
This is the code:
import pandas
import numpy
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
class SeleniumClient(object):
def __init__(self):
#Initialization method.
self.chrome_options = webdriver.ChromeOptions('D:/chromedriver_win32/chromedriver', options=self.chrome_options)
self.chrome_options.add_argument('--headless')
self.chrome_options.add_argument('--no-sandbox')
self.chrome_options.add_argument('--disable-setuid-sandbox')
# you need to provide the path of chromdriver in your system
self.browser = webdriver.Chrome('C:\Program Files\Google\chromedriver.exe')
self.base_url = 'https://twitter.com/search?q='
def get_tweets(self, query):
'''
Function to fetch tweets.
'''
try:
self.browser.get(self.base_url query)
time.sleep(2)
body = self.browser.find_element_by_tag_name('body')
for _ in range(3000):
body.send_keys(Keys.PAGE_DOWN)
time.sleep(0.3)
timeline = self.browser.find_element_by_id('timeline')
tweet_nodes = timeline.find_elements_by_css_selector('.tweet-text')
return pd.DataFrame({'tweets': [tweet_node.text for tweet_node in tweet_nodes]})
except:
print("Selenium - An error occured while fetching tweets.")
#this next line is the one that gives the error, but it must be located in the class
selenium_client = SeleniumClient()
tweets_df = selenium_client.get_tweets('AI and Deep learning')
This is what the error says:
AttributeError Traceback (most recent call last)
<ipython-input-5-3bd40446c1fd> in <module>
----> 1 selenium_client = SeleniumClient()
2 #tweets_df = selenium_client.get_tweets('AI and Deep learning')
<ipython-input-3-f0c81bf234aa> in __init__(self)
4 def __init__(self):
5 #Initialization method.
----> 6 self.chrome_options = webdriver.ChromeOptions('D:/chromedriver_win32/chromedriver', options=self.chrome_options)
7 self.chrome_options.add_argument('--headless')
8 self.chrome_options.add_argument('--no-sandbox')
AttributeError: 'SeleniumClient' object has no attribute 'chrome_options'
CodePudding user response:
Possibly you are using selenium3 and using Chrome()
the default argument is executable_path
which takes the absolute path of the ChromeDriver executable. Where as you have passed the absolute path of the Chrome executable.
Moreover, ChromeOptions()
object doesn't needs any location as an argument.
Effectively your code block will be:
from selenium import webdriver
class SeleniumClient(object):
def __init__(self):
#Initialization method.
self.chrome_options = webdriver.ChromeOptions()
self.chrome_options.add_argument('--headless')
self.chrome_options.add_argument('--no-sandbox')
self.chrome_options.add_argument('--disable-setuid-sandbox')
# you need to provide the path of chromdriver in your system
self.browser = webdriver.Chrome(executable_path=r'C:\Program Files\Google\chromedriver.exe', options=self.chrome_options)
self.base_url = 'https://twitter.com/search?q='
CodePudding user response:
You have to import Options Class
from selenium.webdriver.chrome.options import Options
Then calling Options() and store in a variable
options = Options()
#chromedriver.exe in the same directory folder/file
self.driver = webdriver.ChromeOptions('chromedriver.exe', options=self.options)