I'm trying to scrape the zestimate for this address, 1205 Clover St, Accokeek, MD 20607
from this webpage using requests module. When I use that address in the inputbox of that webpage, I get this zpid 37374749
number, which, when I use within params and issue a post request, I can get the zestimate.
The problem is the script works only when I hardcode cookies from dev tools within the headers of the requests; otherwise, it fails miserably and throws a JSONDecodeError.
import requests
from pprint import pprint
from bs4 import BeautifulSoup
url = 'https://www.zillow.com/graphql/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
'referer': 'https://www.zillow.com/how-much-is-my-home-worth/',
'cookie': '_px3=00d7bf4fc2e331a793b7ebf9f9e8059013b86b39ead67dfc15fd89fe4e1b459c:GrviXjUIrsA7lYSeSBB8Y8yQedr9ng0ByK7xkaB6CLDFCGYjxBL2r8dPy6MwnUk0Eqv75g0t1fcIa4c8ksIHoA==:1000:1qeUILwk/42tR6FaZ0ybaI63rAWDBAc7fsAeLcH02MyzXKEEoT0SDnk/zMCLA3Zj1BpBmuqO/4RQDntxiOgtsCGPF1VuxfpmXAeMPcxegynJ/PGXWYE3OLIa9vJ9XkVkGaSoH9knD3Ls7nHyuP/0DhapZbThlzDLoQ//Vflzi0eGv12lCSX/2msW rCeFgO0RhsBMNSN93EUhmuMBXcBMg==;'
}
params = {"operationName":"HowMuchIsMyHomeWorthReviewQuery","variables":{"zpid":37374749},"query":"query HowMuchIsMyHomeWorthReviewQuery($zpid: ID!) {\n property(zpid: $zpid) {\n streetAddress\n city\n state\n zipcode\n bedrooms\n bathrooms\n livingArea\n zestimate\n homeStatus\n photos(size: XL) {\n url\n __typename\n }\n ...OmpHomeWorthUpsell_property\n isConfirmedClaimedByCurrentSignedInUser\n isVerifiedClaimedByCurrentSignedInUser\n ...UARequiredPropertyDimensions_property\n ...ContactAgentForm_property\n ...HomeInfo_property\n __typename\n }\n viewer {\n ...ContactAgentForm_viewer\n __typename\n }\n abTests {\n ...OmpHomeWorthUpsell_abTests\n ...UARequiredPropertyDimensions_abTests\n ...ContactAgentForm_abTests\n __typename\n }\n}\n\nfragment OmpHomeWorthUpsell_property on Property {\n zpid\n onsiteMessage(placementNames: [\"HMIMHWTopSlot\"]) {\n ...onsiteMessage_fragment\n __typename\n }\n __typename\n}\n\nfragment onsiteMessage_fragment on OnsiteMessageResultType {\n eventId\n decisionContext\n messages {\n skipDisplayReason\n shouldDisplay\n isGlobalHoldout\n isPlacementHoldout\n placementName\n testPhase\n bucket\n placementId\n passThrottle\n lastModified\n eventId\n decisionContext\n selectedTreatment {\n id\n name\n component\n status\n renderingProps\n lastModified\n __typename\n }\n qualifiedTreatments {\n id\n name\n status\n lastModified\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment OmpHomeWorthUpsell_abTests on ABTests {\n HMIMHW_ZO_NFS_UPSELL_ONSITE_MESSAGING: abTest(\n trial: \"HMIMHW_ZO_NFS_UPSELL_ONSITE_MESSAGING\"\n )\n __typename\n}\n\nfragment UARequiredPropertyDimensions_property on Property {\n currency\n featuredListingTypeDimension\n hasPublicVideo\n hdpTypeDimension\n listingTypeDimension\n price\n propertyTypeDimension\n standingOffer {\n isStandingOfferEligible\n __typename\n }\n zpid\n isZillowOwned\n zillowOfferMarket {\n legacyName\n __typename\n }\n ...ShouldShowVideo_property\n __typename\n}\n\nfragment ShouldShowVideo_property on Property {\n homeStatus\n isZillowOwned\n hasPublicVideo\n primaryPublicVideo {\n sources {\n src\n __typename\n }\n __typename\n }\n richMediaVideos {\n mp4Url\n hlsUrl\n __typename\n }\n __typename\n}\n\nfragment UARequiredPropertyDimensions_abTests on ABTests {\n ZO_HDP_HOUR_ONE_VIDEO: abTest(trial: \"ZO_HDP_HOUR_ONE_VIDEO\")\n __typename\n}\n\nfragment ContactAgentForm_property on Property {\n streetAddress\n state\n city\n zipcode\n zpid\n homeStatus\n homeType\n zestimate\n homeType\n isInstantOfferEnabled\n zillowOfferMarket {\n name\n code\n __typename\n }\n __typename\n}\n\nfragment ContactAgentForm_viewer on Viewer {\n name\n email\n zuid\n __typename\n}\n\nfragment ContactAgentForm_abTests on ABTests {\n SHOW_PL_LEAD_FORM: abTest(trial: \"SHOW_PL_LEAD_FORM\")\n __typename\n}\n\nfragment HomeInfo_property on Property {\n streetAddress\n city\n state\n zipcode\n bedrooms\n bathrooms\n livingArea\n homeStatus\n homeType\n contingentListingType\n photos(size: XL) {\n url\n __typename\n }\n listing_sub_type {\n is_newHome\n is_FSBO\n is_bankOwned\n is_foreclosure\n is_forAuction\n is_comingSoon\n __typename\n }\n __typename\n}\n"}
with requests.Session() as s:
s.headers.update(headers)
res = s.post(url,json=params)
pprint(res.json()['data']['property']['zestimate'])
How can I find success without hardcoding cookies within the headers?
CodePudding user response:
To get zpid
dynamically, you have to first submit a request where you put the address. Then use this zpid
in second request:
import requests
api_url = "https://www.zillowstatic.com/autocomplete/v3/suggestions/"
graphql_url = "https://www.zillow.com/graphql/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
"referer": "https://www.zillow.com/how-much-is-my-home-worth/",
}
with requests.session() as s:
s.headers.update(headers)
params = {
"q": "1205 Clover St, Accokeek, MD 20607",
"resultTypes": "allAddress",
"resultCount": "5",
}
data = s.get(api_url, params=params).json()
zpid = data["results"][0]["metaData"]["zpid"]
payload = {
"operationName": "HowMuchIsMyHomeWorthReviewQuery",
"query": 'query HowMuchIsMyHomeWorthReviewQuery($zpid: ID!) {\n property(zpid: $zpid) {\n streetAddress\n city\n state\n zipcode\n bedrooms\n bathrooms\n livingArea\n zestimate\n homeStatus\n photos(size: XL) {\n url\n __typename\n }\n ...OmpHomeWorthUpsell_property\n isConfirmedClaimedByCurrentSignedInUser\n isVerifiedClaimedByCurrentSignedInUser\n ...UARequiredPropertyDimensions_property\n ...ContactAgentForm_property\n ...HomeInfo_property\n __typename\n }\n viewer {\n ...ContactAgentForm_viewer\n __typename\n }\n abTests {\n ...OmpHomeWorthUpsell_abTests\n ...UARequiredPropertyDimensions_abTests\n ...ContactAgentForm_abTests\n __typename\n }\n}\n\nfragment OmpHomeWorthUpsell_property on Property {\n zpid\n onsiteMessage(placementNames: ["HMIMHWTopSlot"]) {\n ...onsiteMessage_fragment\n __typename\n }\n __typename\n}\n\nfragment onsiteMessage_fragment on OnsiteMessageResultType {\n eventId\n decisionContext\n messages {\n skipDisplayReason\n shouldDisplay\n isGlobalHoldout\n isPlacementHoldout\n placementName\n testPhase\n bucket\n placementId\n passThrottle\n lastModified\n eventId\n decisionContext\n selectedTreatment {\n id\n name\n component\n status\n renderingProps\n lastModified\n __typename\n }\n qualifiedTreatments {\n id\n name\n status\n lastModified\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment OmpHomeWorthUpsell_abTests on ABTests {\n HMIMHW_ZO_NFS_UPSELL_ONSITE_MESSAGING: abTest(\n trial: "HMIMHW_ZO_NFS_UPSELL_ONSITE_MESSAGING"\n )\n __typename\n}\n\nfragment UARequiredPropertyDimensions_property on Property {\n currency\n featuredListingTypeDimension\n hasPublicVideo\n hdpTypeDimension\n listingTypeDimension\n price\n propertyTypeDimension\n standingOffer {\n isStandingOfferEligible\n __typename\n }\n zpid\n isZillowOwned\n zillowOfferMarket {\n legacyName\n __typename\n }\n ...ShouldShowVideo_property\n __typename\n}\n\nfragment ShouldShowVideo_property on Property {\n homeStatus\n isZillowOwned\n hasPublicVideo\n primaryPublicVideo {\n sources {\n src\n __typename\n }\n __typename\n }\n richMediaVideos {\n mp4Url\n hlsUrl\n __typename\n }\n __typename\n}\n\nfragment UARequiredPropertyDimensions_abTests on ABTests {\n ZO_HDP_HOUR_ONE_VIDEO: abTest(trial: "ZO_HDP_HOUR_ONE_VIDEO")\n __typename\n}\n\nfragment ContactAgentForm_property on Property {\n streetAddress\n state\n city\n zipcode\n zpid\n homeStatus\n homeType\n zestimate\n homeType\n isInstantOfferEnabled\n zillowOfferMarket {\n name\n code\n __typename\n }\n __typename\n}\n\nfragment ContactAgentForm_viewer on Viewer {\n name\n email\n zuid\n __typename\n}\n\nfragment ContactAgentForm_abTests on ABTests {\n SHOW_PL_LEAD_FORM: abTest(trial: "SHOW_PL_LEAD_FORM")\n __typename\n}\n\nfragment HomeInfo_property on Property {\n streetAddress\n city\n state\n zipcode\n bedrooms\n bathrooms\n livingArea\n homeStatus\n homeType\n contingentListingType\n photos(size: XL) {\n url\n __typename\n }\n listing_sub_type {\n is_newHome\n is_FSBO\n is_bankOwned\n is_foreclosure\n is_forAuction\n is_comingSoon\n __typename\n }\n __typename\n}\n',
"variables": {"zpid": zpid},
}
data = s.post(graphql_url, json=payload).json()
print(data["data"]["property"]["zestimate"])
Prints:
444700