Good day. I am trying to grab a few data from a url however, only a few lines of my script works since it encountered errors. Any idea will do. Thank you
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
import requests
header = {"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:92.0) Gecko/20100101 Firefox/92.0"}
url = "https://www.pinksale.finance/#/pinklock/detail/0x5f7faccaff14ce5fe5ae2ff5bb1ea2fa1b7fc526?chain=BSC"
print ("Link:", url)
urlpage = requests.get(url, headers=header, timeout=10, allow_redirects=False)
site = BeautifulSoup(urlpage.content, 'html.parser')
item1 = site.find('div', class_='ant-list-item').get_text()
item2 = site.find('div', class_='Total Amount Locked').get_text()
item3 = site.find('div', class_='Total Values Locked').get_text()
print ("item1: ", item1)
print ("item2: ", item2)
print ("item3: ", item3)
Current Output:
Link: https://www.pinksale.finance/#/pinklock/detail/0x5f7faccaff14ce5fe5ae2ff5bb1ea2fa1b7fc526?chain=BSC
AttributeError: 'NoneType' object has no attribute 'get_text'
Wanted Output:
0x7A9b...c0b2 7.104591955602949963 2022.04.04 16:32 UTC
Total Amount Locked 7.10459195560294996
Total Values Locked $1,843
CodePudding user response:
The url entirely depends on JavaScript. So you need automation something like selenium. Now it's working as expectation. You can just run the code.
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
url ='https://www.pinksale.finance/#/pinklock/detail/0x5f7faccaff14ce5fe5ae2ff5bb1ea2fa1b7fc526?chain=BSC'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()
driver.get(url)
time.sleep(8)
site = BeautifulSoup(driver.page_source, 'lxml')
driver.close()
item1 = ','.join([x.get_text().strip() for x in site.select('.ant-spin-container div ul li div.LockRecord_tvl__1cBpD')])
item2 = [x.get_text() for x in site.select('td.has-text-right')]
print ("item1: ", item1)
print ("item2: ", item2[0])
print ("item3: ", item2[1])
Output:
item1: 0x7A9b...c0b2,7.104591955602949963,2022.04.04 16:32 UTC
item2: 7.104591955602949963
item3: $1,711