Home > database >  Scrapy API request 403 error issue. Works with requests, but not scrapy
Scrapy API request 403 error issue. Works with requests, but not scrapy

Time:02-13

I've having a weird issue where my API request call works perfectly with standard request library, but it throws a 403 using scrapy only. The error message does not give me anything useful, except the "403 error" message. I have dropped my API key from this post, but you can easily obtain your own. Let me know if you need help getting an API key.

Working python code (no scrapy)

import requests

url = "https://www.airbnb.ca/api/v3/ExploreSections"

querystring = {"operationName":"ExploreSections","locale":"en-CA","currency":"CAD","_cb":"1db02z70xkcr690n1h3gp0py4nmy","variables":"{\"isInitialLoad\":true,\"hasLoggedIn\":false,\"cdnCacheSafe\":false,\"source\":\"EXPLORE\",\"exploreRequest\":{\"metadataOnly\":false,\"version\":\"1.8.3\",\"itemsPerGrid\":20,\"tabId\":\"home_tab\",\"refinementPaths\":[\"/homes\"],\"flexibleTripDates\":[\"february\",\"march\"],\"flexibleTripLengths\":[\"weekend_trip\"],\"datePickerType\":\"calendar\",\"placeId\":\"ChIJpTvG15DL1IkRd8S0KlBVNTI\",\"checkin\":\"2022-03-15\",\"checkout\":\"2022-03-16\",\"adults\":2,\"source\":\"structured_search_input_header\",\"searchType\":\"autocomplete_click\",\"query\":\"Toronto, ON\",\"cdnCacheSafe\":false,\"treatmentFlags\":[\"flex_destinations_june_2021_launch_web_treatment\",\"new_filter_bar_v2_fm_header\",\"merch_header_breakpoint_expansion_web\",\"flexible_dates_12_month_lead_time\",\"storefronts_nov23_2021_homepage_web_treatment\",\"flexible_dates_options_extend_one_three_seven_days\",\"super_date_flexibility\",\"micro_flex_improvements\",\"micro_flex_show_by_default\",\"search_input_placeholder_phrases\",\"pets_fee_treatment\"],\"screenSize\":\"large\",\"isInitialLoad\":true,\"hasLoggedIn\":false},\"removeDuplicatedParams\":false}","extensions":"{\"persistedQuery\":{\"version\":1,\"sha256Hash\":\"0d0a5c3b44e87ccaecf084cfc3027a175af11955cffa04bb986406e9b4bdfe6e\"}}"}

headers = {
    "x-airbnb-api-key": "YOUR_KEY",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36",
    "content-type": "application/json",
    "accept-language": "en-US,en;q=0.9"
}

response = requests.request("GET", url, headers=headers, params=querystring)

print(response.text)

Scrapy spider code:

import scrapy
import json
from urllib.parse import urlencode


class ListingsSpider(scrapy.Spider):
    name = 'listings'
    allowed_domains = ['airbnb.ca']


    def start_requests(self):
        params = {"operationName":"ExploreSections","locale":"en-CA","currency":"CAD","_cb":"1db02z70xkcr690n1h3gp0py4nmy","variables":"{\"isInitialLoad\":true,\"hasLoggedIn\":false,\"cdnCacheSafe\":false,\"source\":\"EXPLORE\",\"exploreRequest\":{\"metadataOnly\":false,\"version\":\"1.8.3\",\"itemsPerGrid\":20,\"tabId\":\"home_tab\",\"refinementPaths\":[\"/homes\"],\"flexibleTripDates\":[\"february\",\"march\"],\"flexibleTripLengths\":[\"weekend_trip\"],\"datePickerType\":\"calendar\",\"placeId\":\"ChIJpTvG15DL1IkRd8S0KlBVNTI\",\"checkin\":\"2022-03-15\",\"checkout\":\"2022-03-16\",\"adults\":2,\"source\":\"structured_search_input_header\",\"searchType\":\"autocomplete_click\",\"query\":\"Toronto, ON\",\"cdnCacheSafe\":false,\"treatmentFlags\":[\"flex_destinations_june_2021_launch_web_treatment\",\"new_filter_bar_v2_fm_header\",\"merch_header_breakpoint_expansion_web\",\"flexible_dates_12_month_lead_time\",\"storefronts_nov23_2021_homepage_web_treatment\",\"flexible_dates_options_extend_one_three_seven_days\",\"super_date_flexibility\",\"micro_flex_improvements\",\"micro_flex_show_by_default\",\"search_input_placeholder_phrases\",\"pets_fee_treatment\"],\"screenSize\":\"large\",\"isInitialLoad\":true,\"hasLoggedIn\":false},\"removeDuplicatedParams\":false}","extensions":"{\"persistedQuery\":{\"version\":1,\"sha256Hash\":\"0d0a5c3b44e87ccaecf084cfc3027a175af11955cffa04bb986406e9b4bdfe6e\"}}"}
        url = f"https://www.airbnb.ca/api/v3/ExploreSections?{urlencode(params)}"
        headers = {
            "x-airbnb-api-key": "YOUR_KEY",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36",
            "content-type": "application/json",
            "accept-language": "en-US,en;q=0.9"
        }
        yield scrapy.Request(
            url=url,
            method='GET',
            headers=headers,
            callback=self.parse_listings,
        )
    
    def parse_listings(self, response):
        resp_dict = json.loads(response.body)
        yield resp_dict

CodePudding user response:

The problem is with the headers, copy them from the browser and remove cookie and content-length if they are present.

import scrapy
import json
from urllib.parse import urlencode


class ListingsSpider(scrapy.Spider):
    name = 'listings'
    allowed_domains = ['airbnb.ca']

    custom_settings = {
        'DOWNLOAD_DELAY': 0.5
    }
    def start_requests(self):
        params = {"operationName":"ExploreSections","locale":"en-CA","currency":"CAD","_cb":"1db02z70xkcr690n1h3gp0py4nmy","variables":"{\"isInitialLoad\":true,\"hasLoggedIn\":false,\"cdnCacheSafe\":false,\"source\":\"EXPLORE\",\"exploreRequest\":{\"metadataOnly\":false,\"version\":\"1.8.3\",\"itemsPerGrid\":20,\"tabId\":\"home_tab\",\"refinementPaths\":[\"/homes\"],\"flexibleTripDates\":[\"february\",\"march\"],\"flexibleTripLengths\":[\"weekend_trip\"],\"datePickerType\":\"calendar\",\"placeId\":\"ChIJpTvG15DL1IkRd8S0KlBVNTI\",\"checkin\":\"2022-03-15\",\"checkout\":\"2022-03-16\",\"adults\":2,\"source\":\"structured_search_input_header\",\"searchType\":\"autocomplete_click\",\"query\":\"Toronto, ON\",\"cdnCacheSafe\":false,\"treatmentFlags\":[\"flex_destinations_june_2021_launch_web_treatment\",\"new_filter_bar_v2_fm_header\",\"merch_header_breakpoint_expansion_web\",\"flexible_dates_12_month_lead_time\",\"storefronts_nov23_2021_homepage_web_treatment\",\"flexible_dates_options_extend_one_three_seven_days\",\"super_date_flexibility\",\"micro_flex_improvements\",\"micro_flex_show_by_default\",\"search_input_placeholder_phrases\",\"pets_fee_treatment\"],\"screenSize\":\"large\",\"isInitialLoad\":true,\"hasLoggedIn\":false},\"removeDuplicatedParams\":false}","extensions":"{\"persistedQuery\":{\"version\":1,\"sha256Hash\":\"0d0a5c3b44e87ccaecf084cfc3027a175af11955cffa04bb986406e9b4bdfe6e\"}}"}
        url = f"https://www.airbnb.ca/api/v3/ExploreSections?{urlencode(params)}"

        headers = {
            "Accept": "*/*",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "en-US,en;q=0.5",
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "Content-Type": "application/json",
            "DNT": "1",
            "Host": "www.airbnb.ca",
            "Pragma": "no-cache",
            "Referer": "https://www.airbnb.ca/",
            "Sec-Fetch-Dest": "empty",
            "Sec-Fetch-Mode": "cors",
            "Sec-Fetch-Site": "same-origin",
            "Sec-GPC": "1",
            "TE": "trailers",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36",
            "X-Airbnb-API-Key": "API_KEY",
            "X-Airbnb-GraphQL-Platform": "web",
            "X-Airbnb-GraphQL-Platform-Client": "minimalist-niobe",
            "X-Airbnb-Supports-Airlock-V2": "true",
            "X-CSRF-Token": "null",
            "X-CSRF-Without-Token": "1",
            "X-KL-Ajax-Request": "Ajax_Request",
            "X-Niobe-Short-Circuited": "true"
        }
        yield scrapy.Request(
            url=url,
            method='GET',
            headers=headers,
            callback=self.parse_listings,
        )

    def parse_listings(self, response):
        resp_dict = response.json()
        yield resp_dict

Output:

{'data': {'presentation': {'__typename': 'RootPresentationContainer', 'explore': {'__typename': 'ExplorePresentation', 'sections': {'__typename': 'ExploreSections', 'sections': [{'__typename': 'SectionContainer', 'id': 
...
...
...

Getting the request headers, example with chrome:

Request headers

  • Related