I'm trying to scroll down to the very bottom of the following
CodePudding user response:
To scroll down to the very bottom of the zillow website on the right side of the webpage you can scrollIntoView the pagination element once it is visible inducing WebDriverWait for the visibility_of_element_located() and you can use either of the following locator strategies:
Code Block:
driver.get("https://www.zillow.com/clayton-county-ga/houses/3-_beds/2.0-_baths/?searchQueryState={"pagination":{},"usersSearchTerm":"Clayton County, GA","mapBounds":{"west":-85.19662367135751,"east":-83.66952406198251,"south":33.16207210856734,"north":33.76924644337602},"regionSelection":[{"regionId":1622,"regionType":4}],"isMapVisible":true,"filterState":{"beds":{"min":3},"baths":{"min":2},"sqft":{"min":1000,"max":3000},"built":{"min":1965},"con":{"value":false},"apa":{"value":false},"mf":{"value":false},"ah":{"value":true},"sort":{"value":"globalrelevanceex"},"lot":{"max":43560},"land":{"value":false},"gar":{"value":true},"tow":{"value":false},"manu":{"value":false},"apco":{"value":false}},"isListVisible":true,"mapZoom":9}") driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[@class='search-pagination']"))))
Browser Snapshot:
CodePudding user response:
One of the most smart websites in the internet world is zillow
that also provides API
.If you really love and want to learn web scraping from heart why hardcode, why not how to extract data from api? Below is an example how to scrape data from api as json response using requests
module only.
Script:
import requests
headers = {
"User-Agent": "Mozilla/5.0 ",
"Accept": "*/*",
"Accept-Language": "en-US,en;q=0.5",
"Pragma": "no-cache",
"Cache-Control": "no-cache"
}
with requests.Session() as s:
s.headers.update(headers)
s.head('https://www.zillow.com/')
for page in range(1,4):
params = {
"searchQueryState": '{"pagination":{"currentPage":2},"usersSearchTerm":"Clayton County, GA","mapBounds":{"west":-84.85604749948251,"east":-84.01010023385751,"south":33.18506146243053,"north":33.746411533949434},"regionSelection":[{"regionId":1622,"regionType":4}],"isMapVisible":false,"filterState":{"beds":{"min":3},"baths":{"min":2},"sqft":{"min":1000,"max":3000},"built":{"min":1965},"isCondo":{"value":false},"isApartment":{"value":false},"isMultiFamily":{"value":false},"isAllHomes":{"value":true},"sortSelection":{"value":"globalrelevanceex"},"lotSize":{"max":43560},"isLotLand":{"value":false},"hasGarage":{"value":true},"isTownhouse":{"value":false},"isManufactured":{"value":false},"isApartmentOrCondo":{"value":false}},"isListVisible":true}',
"wants": '{"cat1":["listResults"],"cat2":["total"]}'
}
r = s.get('https://www.zillow.com/search/GetSearchPageState.htm',params=params).json()
#print(r)
for card in r['cat1']['searchResults']['listResults']:
price=card['price']
print(price)
Output:
$235,000
$299,000
$259,000
$290,000
$255,000
$300,000
$295,000
$250,000
$250,000
$259,900
$290,000
$315,000
$249,000
$289,900
... so on