I have this site I want to scrape data from. I'm trying to extract the data below the two graphs, specifically, Volume, Fees and TVL. The problem is that when I use Selenium, I don't get any elements to access the values. I tried accessing the source page and it also does not have content that is being shown on the page.
How could I scrape such a site?
Here is my code.
'''
THIS CODE WILL BE USED TO EXTRACT ETHERIUM DATA FROM UNISWAP V3 SITE
'''
# import required files
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup as BSoup
# base url
eth_uniswap_url = 'https://info.uniswap.org/#/'
# function to get driver
def get_driver():
options = Options()
options.add_argument('--no-sandbox')
options.add_argument('--headlesss')
options.add_argument('--disable-dev-shm-usage')
webdriver_service = Service("D:\\Software\\Selenium WebDrivers\\chrome_v107\\chromedriver_win32\\chromedriver.exe")
driver = webdriver.Chrome(options=options, service=webdriver_service)
return driver
# function to open site
def access_site(driver,url):
driver.get(url)
WebDriverWait(driver, 20)
bs_Object = BSoup(driver.page_source, 'html.parser')
return bs_Object
if __name__ == '__main__':
print('We are going to create the driver')
driver = get_driver()
print('We are now going to access the page')
site_access = access_site(driver, eth_uniswap_url)
data = site_access.find_all('div', {'class':'sc-jKJlTe sc-hMqMXs sc-hSdWYo eJnjNO'})
print(len(data))
print(data)
This prints an empty list, meaning that the elements cannot be found in the source page.
How can I go about this?
CodePudding user response:
You can try the next example:
from selenium import webdriver
import time
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.service import Service
import pandas as pd
webdriver_service = Service("./chromedriver") #Your chromedriver path
driver = webdriver.Chrome(service=webdriver_service)
data = []
driver.get('https://info.uniswap.org/#/')
driver.maximize_window()
time.sleep(8)
chart_data = []
soup = BeautifulSoup(driver.page_source,"html.parser")
graphs = soup.select('div[]')
d = {x.select_one('div[]').get_text(strip=True) : x.select_one('div[]').get_text(strip=True) for x in graphs}
chart_data.append(d)
print(chart_data)
# df = pd.DataFrame(chart_data)
# print(df)
Output:
[{'Volume 24H:': '$1.35b', 'Fees 24H:': '$1.41m', 'TVL:': '$3.68b'}]