I currently try scrape a value at this specific website for a school project
This median income estimate is what I'm looking for
I tried serveral method on the sites to go over the nested divs but I'm not able to get any results after runned, below is a code that I tried to use, but it just kept returning nothing to me. Any help will be appreciate, thanks!
import csv
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pandas as pd
from bs4 import BeautifulSoup
DRIVER_PATH = 'chromedriver_107.exe'
driver = webdriver.Chrome(executable_path=DRIVER_PATH)
url = 'https://data.census.gov/cedsci/table?q=' '53706' ' income&tid=ACSST5Y2020.S1901'
driver.get(url)
page = requests.get(url)
content = driver.page_source
soup = BeautifulSoup(content, 'lxml')
a = soup.findAll("div", {"comp-id":"1539"})
print(a)
CodePudding user response:
Try with this:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#set up Chrome driver
options=webdriver.ChromeOptions()
#Define web driver as a Chrome driver and navigate
driver = webdriver.Chrome()
driver.maximize_window()
url = 'https://data.census.gov/cedsci/table?q=53703 income&tid=ACSST5Y2020.S1901'
driver.get(url)
# We print the label of row 11 (Which is the median)
label = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, "(//div[@row-id='11'])[1]")))
print(label.text)
# We print the values of row 11 (Which is the median)
values = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, "(//div[@row-id='11'])[2]")))
print(values.text)
Output:
Median income (dollars)
42,153
±3,200
114,643
±28,572
139,694