I'm trying to get the page to click on the "Load more" button until it gets to the bottom of the page to extract all data. The button however sometimes doesn't load despite the Selenium's explicit wait or it does appear but not clickable (I put in print("Load more click") to see if the automation does click and it appears to be doing so -it's just that the button is not clickable). I'm using Selenium 4.1.5 and Chrome driver 104. Here's the code:
import pandas as pd
import time
import csv
import selenium.webdriver as webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException,
ElementClickInterceptedException, NoSuchElementException, ElementNotInteractableException
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.support.ui import WebDriverWait, Select
from webdriver_manager.chrome import ChromeDriverManager
def ChromeDriver():
options = ChromeOptions()
options.add_argument('--disable-gpu')
options.add_argument("force-device-scale-factor=1.00")
options.headless = False
service = ChromeService("C:\Python39\chromedriver.exe")
driver = webdriver.Chrome(options=options, service=service)
#driver.maximize_window()
return driver
def toggle_button():
toggle_button = driver.find_element(By.XPATH, "//input[@type='checkbox']")
toggle_button.click()
def set_currency(currency):
active_currency = driver.find_element(By.XPATH, "//span[contains(@class, 'modeTitleActive-AXg2YMF0')]").text
if active_currency!= currency:
toggle_button()
else:
pass
def accept_all():
try:
promtpw = driver.find_element(By.XPATH, "//div[@class='item-x59qvYIc']")
promtpw = promtpw.find_element(By.XPATH, "//button[@class='acceptAll-ofWyKNKk button-KXqj4Lf5 size-xsmall-KXqj4Lf5 color-brand-KXqj4Lf5 variant-primary-KXqj4Lf5']").click()
except NoSuchElementException:
pass
except ElementNotInteractableException:
pass
def save_as_csv(data_list):
for idx, data in enumerate(data_list[0:4]):
file_title = data_list[4][idx]
file = f"csv/tv_crypto_prices_{file_title}.csv"
with open(file, "w", newline="") as csv_file:
tv_writer = csv.writer(csv_file)
tv_writer.writerow([file_title.upper()])
data.to_csv(file, mode="a")
print("csv files saved")
def quit_driver():
driver.close()
driver.quit()
def get_categories():
categories = driver.find_elements(By.XPATH, "//div[starts-with(@class, 'item-x59qvYIc')]")
return categories
def get_categories_list(categories_objs):
categories_list = []
for category in categories:
append_category_text = categories_list.append(category.text)
return categories_list
def fetch_data():
data = []
categories_list = get_categories_list(categories)
#categories_list = categories_list.append(categories_list.pop(categories_list.index("Overview")))
accept_all()
for category in categories:
category.click()
time.sleep(2)
print(category.text)
#Load data
load_more = True
while load_more:
try:
time.sleep(2)
load_more_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[@class='tv-load-more__btn']")))
load_more_button.click()
print("Load more clicked")
except NoSuchElementException:
load_more = False
except ElementNotInteractableException:
load_more = False
crypto = pd.read_html(driver.page_source)[1]
crypto.columns.values[0] = "Name" #Filter column values
append_data = data.append(crypto)
append_categories = data.append(categories_list)
quit_driver()
return data
def run_tvc():
#Get driver
driver = ChromeDriver()
driver.get("https://www.tradingview.com/markets/cryptocurrencies/prices-all/")
#Set currency
currency = "BTC"
set_currency(currency)
#Get data
d = fetch_data()
save_as_csv(d)
run_tvc()
I also tried to swap out load_more_button = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[@class='tv-load-more__btn']")))
for load_more_button = driver.find_element(By.XPATH, "//span[@class='tv-load-more__btn']")
and this seemed to be doing its job for all categories except for the first one ("Overview") where the page first loads along with the url. I would like it to work either ways, in short the problems are:
- How do you make it work with WebDriverWait where it ends with TimeOutException everytime?
- Why doesn't the load more button appear for the first category "Overview" with Selenium and how do you fix it? I stopped the process midway and tried reloading the page with the exact same Selenium's Chrome window and the button always appeared and functioned.
CodePudding user response:
It's really need selenium? If not so, then the alternative solution is that the required data is also loaded via API.You can grab the entire data from API where method is post and data format is json using requests
module only.
Script as example:
import requests
import pandas as pd
payload = {"filter":[{"left":"market_cap_calc","operation":"nempty"},{"left":"sector","operation":"nempty"},{"left":"name","operation":"match","right":"BTC$"}],"options":{"lang":"en"},"markets":["crypto"],"symbols":{"query":{"types":[]},"tickers":[]},"columns":["base_currency_logoid","sector","market_cap_calc","market_cap_diluted_calc","close","total_shares_outstanding","total_shares_diluted","total_value_traded","change","pricescale","minmov","fractional","minmove2"],"sort":{"sortBy":"market_cap_calc","sortOrder":"desc"},"range":[0,450]}
url= 'https://scanner.tradingview.com/crypto/scan'
headers= {
"content-type": "application/x-www-form-urlencoded",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
}
lst=[]
req=requests.post(url,headers=headers,json=payload).json()
for item in req['data']:
lst.append({
"Name":item['d'][1]})
df = pd.DataFrame(lst)#.to_csv('out.csv',index=False)#to store data just uncomment
print(df)
Output:
Name
0 Bitcoin
1 Ethereum
2 Tether
3 USD Coin
4 Binance Coin
.. ...
445 Tornado Cash
446 Vertcoin
447 TokenClub
448 TrustVerse
449 Atletico De Madrid Fan Token
[450 rows x 1 columns]