so here is the website link :-https://finance.yahoo.com/cryptocurrencies?count=100&offset=0 m trying to scrape prices of all crypto listed on this page but it is returning me nothing. Maybe I am scraping wrong tag. Please look into it. and here is my code:-
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from bs4 import BeautifulSoup
PATH = "C:\Program Files\chromedriver.exe"
driver= webdriver.Chrome(PATH)
driver.get('https://finance.yahoo.com/cryptocurrencies?count=100&offset=0')
# print(driver.title)
# search = driver.find_element_by_xpath('/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
# search.send_keys('python')
# search.send_keys(Keys.RETURN)
def fun1(Name):
for name in Names:
print(f'Title:- {name.text}')
def fun2(Price):
for result in Price:
print(f'Price:- {result}')
i= 1
try:
while i < 5:
driver.implicitly_wait(20)
soup = BeautifulSoup(driver.page_source, 'html.parser')
Names = soup.findAll('a', {'class': 'Fw(600) C($linkColor)'})
Price = soup.findAll('span', {'class': '_11248a25 c916dce9'})
# fun1(Names)
fun2(Price)
# driver.find_element_by_xpath('//*[@id="scr-res-table"]/div[2]/button[3]/span/span').click()
i = i 1
# for result in results:
# print(result.text)
# _url = result.find('a')['href']
# print(_url)
# print()
except:
# driver.quit()
pass
# i= 1
# while i<5:
# driver.find_element_by_css_selector('#pnnext').click()
# i = i 1
CodePudding user response:
wait=WebDriverWait(driver,10)
driver.get('https://finance.yahoo.com/cryptocurrencies?count=100&offset=0')
names=[x.text for x in wait.until(EC.visibility_of_all_elements_located((By.XPATH,"//td[@aria-label='Name']")))]
print(names)
Here's a simple way to wait for all the names in that table to be visible.
Price is //td[@aria-label='Price (Intraday)']
Import:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
Output:
['Bitcoin USD', 'Ethereum USD', 'Tether USD', 'Binance Coin USD', 'USD Coin USD', 'XRP USD', 'Cardano USD', 'HEX USD', 'Solana USD', 'Avalanche USD', 'Terra USD', 'Dogecoin USD', 'Polkadot USD', 'Binance USD USD', 'SHIBA INU USD', 'Polygon USD', 'Crypto.com Coin USD', 'TerraUSD USD', 'Wrapped Bitcoin USD', 'Dai USD', 'Litecoin USD', 'Cosmos USD', 'Chainlink USD', 'NEAR Protocol USD', 'Uniswap USD', 'TRON USD', 'Algorand USD', 'FTX Token USD', 'Bitcoin Cash USD', 'Decentraland USD', 'Lido stETH USD', 'UNUS SED LEO USD', 'Stellar USD', 'Fantom USD', 'Hedera USD', 'The Sandbox USD', 'Bitcoin BEP2 USD', 'Internet Computer USD', 'Ethereum Classic USD', 'Elrond USD', 'THETA USD', 'Axie Infinity USD', 'VeChain USD', 'Filecoin USD', 'Tezos USD', 'Klaytn USD', 'Toncoin USD', 'Monero USD', 'Helium USD', 'Frax USD', 'IOTA USD', 'Osmosis USD', 'EOS USD', 'Flow USD', 'Gala USD', 'Aave USD', 'The Graph USD', 'Harmony USD', 'PancakeSwap USD', 'BitTorrent USD', 'Wrapped BNB USD', 'Maker USD', 'BitTorrent (new) USD', 'Stacks USD', 'Bitcoin SV USD', 'Huobi BTC USD', 'Neo USD', 'Zcash USD', 'Enjin Coin USD', 'KuCoin Token USD', 'eCash USD', 'Quant USD', 'Huobi Token USD', 'TrueUSD USD', 'Kusama USD', 'THORChain USD', 'Convex Finance USD', 'Curve DAO Token USD', 'Kadena USD', 'Amp USD', 'Chiliz USD', 'Loopring USD', 'OKB USD', 'Basic Attention Token USD', 'yOUcash USD', 'Celo USD', 'Chainbing USD', 'Nexo USD', 'Theta Fuel USD', 'Arweave USD', 'Dash USD', 'Waves USD', 'DeFiChain USD', 'Oasis Network USD', 'ECOMI USD', 'NEM USD', 'Counos X USD', 'BitDAO USD', 'Mina USD', 'Secret USD']
CodePudding user response:
In your case, you don't need beautifulSoup if you're using selenium. I've tried this code and it printed all the names and prices.
from selenium import webdriver
PATH = "C:\Program Files\chromedriver.exe"
driver= webdriver.Chrome(PATH)
driver.get('https://finance.yahoo.com/cryptocurrencies?count=100&offset=0')
def fun1(names):
for name in names:
print(f'Title:- {name.text}')
def fun2(prices):
for price in prices:
print(f'Price:- {price.text}')
i= 1
while i < 5:
driver.implicitly_wait(20)
names = driver.find_elements_by_xpath('//a[@]')
prices = driver.find_elements_by_xpath('//span[@]')
fun1(names)
fun2(prices)
i = 1
CodePudding user response:
It would be quicker and more efficient to get the data through direct requests as oppose to automating with Selenium here:
import requests
import pandas as pd
import re
import json
url = "https://finance.yahoo.com/cryptocurrencies"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36'}
s = requests.Session()
html = s.get(url, headers=headers)
cookies = s.cookies.get_dict()
cookieStr = ''
for k,v in cookies.items():
cookieStr = f'{k}={v};'
headers.update({'content-type': 'application/json',
'cookie':cookieStr})
pattern = 'root.App.main = ({.*})'
jsonStr = re.search(pattern, html.text).group(1)
jsonData = json.loads(jsonStr)
crumb = jsonData['context']['dispatcher']['stores']['CrumbStore']['crumb']
payload = {
'crumb': crumb,
'lang': 'en-US',
'region': 'US',
'formatted': 'true',
'corsDomain': 'finance.yahoo.com'}
rows = []
page = 0
count = 250
while count == 250:
offset = page*250
query = {
"offset":offset,
"size":250,
"sortType":"DESC",
"sortField":"intradaymarketcap",
"quoteType":"CRYPTOCURRENCY",
"query":{"operator":"and",
"operands":[{"operator":"eq",
"operands":
["currency","USD"]},
{"operator":"eq","operands":
["exchange","CCC"]}]},
"userId":"","userIdType":"guid"}
url = 'https://query2.finance.yahoo.com/v1/finance/screener'
jsonData = s.post(url, headers=headers, params=payload, json=query).json()
results = jsonData['finance']['result'][0]['quotes']
count = len(results)
for idx, each in enumerate(results):
for k,v in each.items():
if isinstance(v, dict):
each.update({k:v['raw']})
rows.append(each)
print('Aquired: %s of %s' %(len(rows),jsonData['finance']['result'][0]['total'] ))
page = 1
df = pd.DataFrame(rows)
Output:
print(df)
symbol twoHundredDayAverageChangePercent ... algorithm maxSupply
0 BTC-USD -0.105620 ... NaN NaN
1 ETH-USD -0.122105 ... NaN NaN
2 USDT-USD 0.000251 ... NaN NaN
3 BNB-USD -0.089717 ... NaN NaN
4 USDC-USD -0.000304 ... NaN NaN
... ... ... ... ...
9491 FRT1-USD NaN ... NaN NaN
9492 ANTIS-USD NaN ... NaN NaN
9493 XRPUP-USD -0.575412 ... NaN NaN
9494 PLGR-USD NaN ... NaN NaN
9495 MINIKISHU-USD -0.677844 ... NaN NaN
[9496 rows x 57 columns]