Home > database >  How to scroll down through the right part of the zillow webpage using Selenium
How to scroll down through the right part of the zillow webpage using Selenium

Time:04-17

I'm trying to scroll down to the very bottom of the following enter image description here

CodePudding user response:

To scroll down to the very bottom of the zillow website on the right side of the webpage you can scrollIntoView the pagination element once it is visible inducing WebDriverWait for the visibility_of_element_located() and you can use either of the following locator strategies:

  • Code Block:

    driver.get("https://www.zillow.com/clayton-county-ga/houses/3-_beds/2.0-_baths/?searchQueryState={"pagination":{},"usersSearchTerm":"Clayton County, GA","mapBounds":{"west":-85.19662367135751,"east":-83.66952406198251,"south":33.16207210856734,"north":33.76924644337602},"regionSelection":[{"regionId":1622,"regionType":4}],"isMapVisible":true,"filterState":{"beds":{"min":3},"baths":{"min":2},"sqft":{"min":1000,"max":3000},"built":{"min":1965},"con":{"value":false},"apa":{"value":false},"mf":{"value":false},"ah":{"value":true},"sort":{"value":"globalrelevanceex"},"lot":{"max":43560},"land":{"value":false},"gar":{"value":true},"tow":{"value":false},"manu":{"value":false},"apco":{"value":false}},"isListVisible":true,"mapZoom":9}")
    driver.execute_script("return arguments[0].scrollIntoView(true);", WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[@class='search-pagination']"))))
    
  • Browser Snapshot:

zillow_pagination

CodePudding user response:

One of the most smart websites in the internet world is zillow that also provides API.If you really love and want to learn web scraping from heart why hardcode, why not how to extract data from api? Below is an example how to scrape data from api as json response using requests module only.

Script:

import requests

headers = {
    "User-Agent": "Mozilla/5.0 ",
    "Accept": "*/*",
    "Accept-Language": "en-US,en;q=0.5",
    "Pragma": "no-cache",
    "Cache-Control": "no-cache"
}

with requests.Session() as s:
    s.headers.update(headers)
    s.head('https://www.zillow.com/')
    for page in range(1,4):
        params = {
                "searchQueryState": '{"pagination":{"currentPage":2},"usersSearchTerm":"Clayton County, GA","mapBounds":{"west":-84.85604749948251,"east":-84.01010023385751,"south":33.18506146243053,"north":33.746411533949434},"regionSelection":[{"regionId":1622,"regionType":4}],"isMapVisible":false,"filterState":{"beds":{"min":3},"baths":{"min":2},"sqft":{"min":1000,"max":3000},"built":{"min":1965},"isCondo":{"value":false},"isApartment":{"value":false},"isMultiFamily":{"value":false},"isAllHomes":{"value":true},"sortSelection":{"value":"globalrelevanceex"},"lotSize":{"max":43560},"isLotLand":{"value":false},"hasGarage":{"value":true},"isTownhouse":{"value":false},"isManufactured":{"value":false},"isApartmentOrCondo":{"value":false}},"isListVisible":true}',
                "wants": '{"cat1":["listResults"],"cat2":["total"]}'
                }
   

        r = s.get('https://www.zillow.com/search/GetSearchPageState.htm',params=params).json()
        #print(r)
        for card in r['cat1']['searchResults']['listResults']:
            price=card['price']
            print(price)

Output:

$235,000
$299,000
$259,000
$290,000
$255,000
$300,000
$295,000
$250,000
$250,000
$259,900
$290,000
$315,000
$249,000
$289,900

... so on

  • Related