I am trying to scrape prices of all crypto listed on this page but it is returning me nothing (Give-CodePudding

so here is the website link :-https://finance.yahoo.com/cryptocurrencies?count=100&offset=0 m trying to scrape prices of all crypto listed on this page but it is returning me nothing. Maybe I am scraping wrong tag. Please look into it. and here is my code:-

from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from bs4 import BeautifulSoup

PATH = "C:\Program Files\chromedriver.exe"
driver= webdriver.Chrome(PATH)
driver.get('https://finance.yahoo.com/cryptocurrencies?count=100&offset=0')
# print(driver.title)
# search = driver.find_element_by_xpath('/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
# search.send_keys('python')
# search.send_keys(Keys.RETURN)
def fun1(Name):
    for name in Names:
        print(f'Title:- {name.text}')
def fun2(Price):
    for result in Price:
        print(f'Price:- {result}')


i= 1
try:
    while i < 5:
        driver.implicitly_wait(20)
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        Names = soup.findAll('a', {'class': 'Fw(600) C($linkColor)'})
        Price = soup.findAll('span', {'class': '_11248a25 c916dce9'})
        # fun1(Names)
        fun2(Price)






        # driver.find_element_by_xpath('//*[@id="scr-res-table"]/div[2]/button[3]/span/span').click()
        i = i   1

    # for result in results:
    #     print(result.text)
        # _url = result.find('a')['href']
        # print(_url)
        # print()

except:
    # driver.quit()
    pass
# i= 1
# while i<5:
#     driver.find_element_by_css_selector('#pnnext').click()
#     i = i 1

CodePudding user response：

wait=WebDriverWait(driver,10)
driver.get('https://finance.yahoo.com/cryptocurrencies?count=100&offset=0')
names=[x.text for x in wait.until(EC.visibility_of_all_elements_located((By.XPATH,"//td[@aria-label='Name']")))]
print(names)

Here's a simple way to wait for all the names in that table to be visible.

Price is //td[@aria-label='Price (Intraday)']

Import:

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC

Output:

['Bitcoin USD', 'Ethereum USD', 'Tether USD', 'Binance Coin USD', 'USD Coin USD', 'XRP USD', 'Cardano USD', 'HEX USD', 'Solana USD', 'Avalanche USD', 'Terra USD', 'Dogecoin USD', 'Polkadot USD', 'Binance USD USD', 'SHIBA INU USD', 'Polygon USD', 'Crypto.com Coin USD', 'TerraUSD USD', 'Wrapped Bitcoin USD', 'Dai USD', 'Litecoin USD', 'Cosmos USD', 'Chainlink USD', 'NEAR Protocol USD', 'Uniswap USD', 'TRON USD', 'Algorand USD', 'FTX Token USD', 'Bitcoin Cash USD', 'Decentraland USD', 'Lido stETH USD', 'UNUS SED LEO USD', 'Stellar USD', 'Fantom USD', 'Hedera USD', 'The Sandbox USD', 'Bitcoin BEP2 USD', 'Internet Computer USD', 'Ethereum Classic USD', 'Elrond USD', 'THETA USD', 'Axie Infinity USD', 'VeChain USD', 'Filecoin USD', 'Tezos USD', 'Klaytn USD', 'Toncoin USD', 'Monero USD', 'Helium USD', 'Frax USD', 'IOTA USD', 'Osmosis USD', 'EOS USD', 'Flow USD', 'Gala USD', 'Aave USD', 'The Graph USD', 'Harmony USD', 'PancakeSwap USD', 'BitTorrent USD', 'Wrapped BNB USD', 'Maker USD', 'BitTorrent (new) USD', 'Stacks USD', 'Bitcoin SV USD', 'Huobi BTC USD', 'Neo USD', 'Zcash USD', 'Enjin Coin USD', 'KuCoin Token USD', 'eCash USD', 'Quant USD', 'Huobi Token USD', 'TrueUSD USD', 'Kusama USD', 'THORChain USD', 'Convex Finance USD', 'Curve DAO Token USD', 'Kadena USD', 'Amp USD', 'Chiliz USD', 'Loopring USD', 'OKB USD', 'Basic Attention Token USD', 'yOUcash USD', 'Celo USD', 'Chainbing USD', 'Nexo USD', 'Theta Fuel USD', 'Arweave USD', 'Dash USD', 'Waves USD', 'DeFiChain USD', 'Oasis Network USD', 'ECOMI USD', 'NEM USD', 'Counos X USD', 'BitDAO USD', 'Mina USD', 'Secret USD']

CodePudding user response：

In your case, you don't need beautifulSoup if you're using selenium. I've tried this code and it printed all the names and prices.

from selenium import webdriver

PATH = "C:\Program Files\chromedriver.exe"
driver= webdriver.Chrome(PATH)
driver.get('https://finance.yahoo.com/cryptocurrencies?count=100&offset=0')

def fun1(names):
    for name in names:
        print(f'Title:- {name.text}')

def fun2(prices):
    for price in prices:
        print(f'Price:- {price.text}')

i= 1
while i < 5:
    driver.implicitly_wait(20)
    names = driver.find_elements_by_xpath('//a[@]')
    prices = driver.find_elements_by_xpath('//span[@]')
    fun1(names)
    fun2(prices)
    i  = 1

CodePudding user response：

It would be quicker and more efficient to get the data through direct requests as oppose to automating with Selenium here:

import requests
import pandas as pd
import re
import json

url = "https://finance.yahoo.com/cryptocurrencies"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36'}

s = requests.Session()
html = s.get(url, headers=headers)

cookies = s.cookies.get_dict()
cookieStr = ''
for k,v in cookies.items():
    cookieStr  = f'{k}={v};'
    
headers.update({'content-type': 'application/json',
               'cookie':cookieStr})

pattern = 'root.App.main = ({.*})'
jsonStr = re.search(pattern, html.text).group(1)
jsonData = json.loads(jsonStr)

crumb = jsonData['context']['dispatcher']['stores']['CrumbStore']['crumb']
payload = {
'crumb': crumb,
'lang': 'en-US',
'region': 'US',
'formatted': 'true',
'corsDomain': 'finance.yahoo.com'}

rows = []
page = 0
count = 250

while count == 250:
    offset = page*250
    query = {
     "offset":offset,
     "size":250,
     "sortType":"DESC",
     "sortField":"intradaymarketcap",
     "quoteType":"CRYPTOCURRENCY",
     "query":{"operator":"and",
              "operands":[{"operator":"eq",
                           "operands":
                               ["currency","USD"]},
                                {"operator":"eq","operands":
                                ["exchange","CCC"]}]},
        "userId":"","userIdType":"guid"}


    url = 'https://query2.finance.yahoo.com/v1/finance/screener'
    jsonData = s.post(url, headers=headers, params=payload, json=query).json()
    
    results = jsonData['finance']['result'][0]['quotes']
    count = len(results)
    
    for idx, each in enumerate(results):
        for k,v in each.items():
            if isinstance(v, dict):
                each.update({k:v['raw']})
                
    
        rows.append(each)
    
    print('Aquired: %s of %s' %(len(rows),jsonData['finance']['result'][0]['total'] ))
    page  = 1
    
    
    
df = pd.DataFrame(rows)

Output:

print(df)
             symbol  twoHundredDayAverageChangePercent  ...  algorithm maxSupply
0           BTC-USD                          -0.105620  ...        NaN       NaN
1           ETH-USD                          -0.122105  ...        NaN       NaN
2          USDT-USD                           0.000251  ...        NaN       NaN
3           BNB-USD                          -0.089717  ...        NaN       NaN
4          USDC-USD                          -0.000304  ...        NaN       NaN
            ...                                ...  ...        ...       ...
9491       FRT1-USD                                NaN  ...        NaN       NaN
9492      ANTIS-USD                                NaN  ...        NaN       NaN
9493      XRPUP-USD                          -0.575412  ...        NaN       NaN
9494       PLGR-USD                                NaN  ...        NaN       NaN
9495  MINIKISHU-USD                          -0.677844  ...        NaN       NaN

[9496 rows x 57 columns]