I am trying to scrape data for a given keyword at a metro level in google trends. I have gotten so far as entering the keyword that I want into googletrends via selenium, but I just cant find a way to click on a dropdown menu, select an and download it as a csv. I am especially stuck at the dropdown menu part (perhaps I am not able to find out the right class/id).
I have a restriction that this needs to be done as scraper and not via pyTrends API.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
driver = webdriver.Chrome('chromedriver.exe')
driver.get('https://trends.google.com/trends/?geo=US')
element = driver.find_element(By.XPATH,'.//*[@id="input-254"]')
element.send_keys("glass")
element.send_keys(Keys.ENTER)
#till this point I am able to enter the word and arrive at the selection page
dropdown1 =driver.find_element(By.CLASS_NAME,'_md-select-value').click() #this is my attempt at clicking on the drop down. However this doesn't seem to work.
I wish to select METRO under the subregion drop down and download the csv using the button next to subregion and store it in a specific path.
Can someone help me with selecting and downloading the csv for a given keyword and region as metro?
Thank you.
Update:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
driver = webdriver.Chrome('chromedriver.exe')
driver.get('https://trends.google.com/trends/?geo=US')
element = driver.find_element(By.XPATH,'.//*[@id="input-254"]')
driver.maximize_window()
keywords = ["glass","wolf","cat","dog"]
for key in keywords:
#loop through the keywords
element.send_keys(key)
element.send_keys(Keys.ENTER)
#click on cookie
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".cookieBarButton.cookieBarConsentButton"))).click()
#click on subregion
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[normalize-space(.)='Subregion']"))).click()
#click on metro
elementbtn=WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//md-option[@value='metro']")))
driver.execute_script("arguments[0].click();", elementbtn)
#download file
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//html/body/div[2]/div[2]/div/md-content/div/div/div[2]/trends-widget/ng-include/widget/div/div/div/widget-actions/div/button[1]/i"))).click()
This however only downloads the first element in the list of keywords and then results in an error : Th "Message: stale element reference: element is not attached to the page document".
CodePudding user response:
The class_name you have used it is having 23 elements on the page and not unique. Try unique xpath instead.
Use webdriverwait and wait for element to be clickable. First click on cookie button and then the respective dropdown
code:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
driver = webdriver.Chrome('chromedriver.exe')
driver.get('https://trends.google.com/trends/?geo=US')
element = driver.find_element(By.XPATH,'.//*[@id="input-254"]')
driver.maximize_window()
element.send_keys("glass")
element.send_keys(Keys.ENTER)
#click on cookie
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".cookieBarButton.cookieBarConsentButton"))).click()
#click on subregion
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[normalize-space(.)='Subregion']"))).click()
#click on city
elementbtn=WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//md-option[@value='city']")))
driver.execute_script("arguments[0].click();", elementbtn)
Update
driver = webdriver.Chrome('chromedriver.exe')
keywords = ["glass","wolf","cat","dog"]
for key in keywords:
#loop through the keywords
driver.get('https://trends.google.com/trends/?geo=US')
element = driver.find_element(By.XPATH,'.//*[@id="input-254"]')
driver.maximize_window()
element.send_keys(key)
element.send_keys(Keys.ENTER)
#click on cookie
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".cookieBarButton.cookieBarConsentButton"))).click()
#click on subregion
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[normalize-space(.)='Subregion']"))).click()
#click on metro
elementbtn=WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//md-option[@value='metro']")))
driver.execute_script("arguments[0].click();", elementbtn)
#download file
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//html/body/div[2]/div[2]/div/md-content/div/div/div[2]/trends-widget/ng-include/widget/div/div/div/widget-actions/div/button[1]/i"))).click()
time.sleep(2)#slowdown the loop