Home > database >  How to get around 403 error when webscraping with R
How to get around 403 error when webscraping with R

Time:07-10

I am trying to webscrape some price information from a local supermarket. I am being denied access to the site, and not sure why... I have updated my user identity to be that of google chrome but am still getting the same error. Thanks!

library(rvest)
library(dplyr)

link <- "https://www.paknsave.co.nz/shop/product/5031015_ea_000pns?name=size-7-eggs"

page <- GET(link, add_headers('user-agent' = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"))

CodePudding user response:

import requests

cookies = {
    '__cf_bm': 'HrPgHXxwZ21ZHce6g119GTh0TW5PLK226Avwsqr_yRk-1657420237-0-AV3AFcbB1RRPQi9sj9f0jlyEtnLOU3joTSTqvIuc0StLeyezQdAJDeSSBWpSuxYxQLz6k7KvDjIKR4dPCPww4nxztaohWaWLgKR8wJw1OopkzNjFT7V/MPgZknXPuL4W0B//cUxLgOniMWzJyUqDjAPqJ3fIVNZykHBsk3kWx krXKDl/xVcmgfD0X8HnQoBtw==',
    'cf_chl_2': '6362dc2388c492e',
    'cf_chl_prog': 'x13',
    'cf_clearance': '7Q36fdlfvE_xpzRSuN425iQrAXi0K6t9oMEg9bgBl1E-1657420230-0-150',
    'shell#lang': 'en',
    'SessionCookieIdV2': '2f331eba017f4978a21db30d38bd58bd',
    'SC_ANALYTICS_GLOBAL_COOKIE': '75db5ec972684f1d83a298be947ff26f|False',
    'server_nearest_store_v2': '{"StoreId":"3c5e3145-0767-4066-9349-6c0a1313acc5","UserLat":"37.7697","UserLng":"-122.3933","StoreLat":"-35.09945","StoreLng":"173.258322","IsSuccess":true}',
    '__RequestVerificationToken': 'i7yGKUCMmP0LpzH6Ir9q8Tin79X0zz2C9mzoUh_VUyNxQNWZ-Gm64inb2J8yRT7C89VdUZc85pIIztehy5ypTrgxBmU1',
    'STORE_ID_V2': '3c5e3145-0767-4066-9349-6c0a1313acc5|False',
    'Region': 'NI',
    'AllowRestrictedItems': 'true',
    'sxa_site': 'PAKnSAVE',
    '__cfruid': '8f13df268c53d03a3b3440e47baa5df4671d278d-1657420232',
    '_gcl_au': '1.1.1855441244.1657420235',
    '_ga_8ZFCCVKEC2': 'GS1.1.1657420235.1.1.1657420235.60',
    '_ga': 'GA1.1.444441072.1657420235',
    'FPLC': 'G6JkKZ86eQgLbN2PTg5DU9nts8HFZj2ZdPTjM6VTo6Johf6YgbfYcZZVDcnxgUmYN/dRRR6/z4mEDQIYWroUc8Rhy5+XkehpQlNuUN+d11JsFx8S/zyGohu9wvfYeA==',
    'FPID': 'FPID2.3.pLYyjOkBCu9gt8rah2k+xfEuOt1pMJfZ/g7VwV/wsy8=.1657420235',
    'ASP.NET_SessionId': '1rzzw1ls1vagg4fdeayflrm0',
    'fs-store-select-tooltip-closed': 'true',
}

headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:98.0) Gecko/20100101 Firefox/98.0',
    'Accept': 'text/html,application/xhtml xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    # 'Accept-Encoding': 'gzip, deflate, br',
    'Referer': 'https://www.paknsave.co.nz/shop/product/5031015_ea_000pns?name=size-7-eggs&__cf_chl_tk=ZpR7svpE5x07zN1HC3SVHKDAAVXTtdqPUtgz1pBfj.A-1657420229-0-gaNycGzNCD0',
    'Origin': 'https://www.paknsave.co.nz',
    'DNT': '1',
    'Connection': 'keep-alive',
    # Requests sorts cookies= alphabetically
    # 'Cookie': '__cf_bm=HrPgHXxwZ21ZHce6g119GTh0TW5PLK226Avwsqr_yRk-1657420237-0-AV3AFcbB1RRPQi9sj9f0jlyEtnLOU3joTSTqvIuc0StLeyezQdAJDeSSBWpSuxYxQLz6k7KvDjIKR4dPCPww4nxztaohWaWLgKR8wJw1OopkzNjFT7V/MPgZknXPuL4W0B//cUxLgOniMWzJyUqDjAPqJ3fIVNZykHBsk3kWx krXKDl/xVcmgfD0X8HnQoBtw==; cf_chl_2=6362dc2388c492e; cf_chl_prog=x13; cf_clearance=7Q36fdlfvE_xpzRSuN425iQrAXi0K6t9oMEg9bgBl1E-1657420230-0-150; shell#lang=en; SessionCookieIdV2=2f331eba017f4978a21db30d38bd58bd; SC_ANALYTICS_GLOBAL_COOKIE=75db5ec972684f1d83a298be947ff26f|False; server_nearest_store_v2={"StoreId":"3c5e3145-0767-4066-9349-6c0a1313acc5","UserLat":"37.7697","UserLng":"-122.3933","StoreLat":"-35.09945","StoreLng":"173.258322","IsSuccess":true}; __RequestVerificationToken=i7yGKUCMmP0LpzH6Ir9q8Tin79X0zz2C9mzoUh_VUyNxQNWZ-Gm64inb2J8yRT7C89VdUZc85pIIztehy5ypTrgxBmU1; STORE_ID_V2=3c5e3145-0767-4066-9349-6c0a1313acc5|False; Region=NI; AllowRestrictedItems=true; sxa_site=PAKnSAVE; __cfruid=8f13df268c53d03a3b3440e47baa5df4671d278d-1657420232; _gcl_au=1.1.1855441244.1657420235; _ga_8ZFCCVKEC2=GS1.1.1657420235.1.1.1657420235.60; _ga=GA1.1.444441072.1657420235; FPLC=G6JkKZ86eQgLbN2PTg5DU9nts8HFZj2ZdPTjM6VTo6Johf6YgbfYcZZVDcnxgUmYN/dRRR6/z4mEDQIYWroUc8Rhy5+XkehpQlNuUN+d11JsFx8S/zyGohu9wvfYeA==; FPID=FPID2.3.pLYyjOkBCu9gt8rah2k+xfEuOt1pMJfZ/g7VwV/wsy8=.1657420235; ASP.NET_SessionId=1rzzw1ls1vagg4fdeayflrm0; fs-store-select-tooltip-closed=true',
    'Upgrade-Insecure-Requests': '1',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Cache-Control': 'max-age=0',
    # Requests doesn't support trailers
    # 'TE': 'trailers',
}

params = {
    'name': 'size-7-eggs',
}

data = {
    'md': 'LY3z3moXjvkiL6.TltBjKutBlxr0gRcWotnWZD224Ik-1657420229-0-AdVFW-EtYzqbcg7Spq0beYQxr56eQ35wUZByyeUdPhP2RYKPi-G5qDV3BcVp9a-cMTclDfdEhbvXZLuhffGQLmLiSva5afHqpVZZYRepw3ej7SDL2x_vpDpT7yDzSdOVhiRIYWNgG82LWigFM5t7GPoG9XgJTDbpt7exsP4fbjENcCSQCGPhzI8H1FZVDUmRLDMMRLSFECC_ntCat-xMaNN1-LMQnqb_ASBKE7tQzjtFlZc3Uix4SsRbeZqs1CWJWdVsRMfm8jNh9hhG9NuMIq2ggRZGd7r1va7C8m1aj1UdlbnzM2juswggBe-1J1gMF6ZFrjmbiulBfe-HSwf3h65MlDrX63uJTU4XCg62A9HMGq_5t2IcNa8V93H4fLeJEI-KMsmHmhM-gE-VHHUV1ygSyaK1RQQvDNVF2K9QRFYaMZBc0rjMaJsZd8tiU5vXW4xEAWKDvxZHSAkXqklXpKY58VTudkiRw_xrcAzIkGotTZ3okQwAIV4BFspJOO6ir9yx4MIyjPr53rGvqQEOSa24GHlpAm8EEojo3FGbu_YUX5vjjptyItyM-juiyxqdiWx7dKA1gY-KJjwNpVKYhfAfLgH7EU86WUTPHZK2Zkx1d3URpbaKnll47i18d-dSnfuWwt1NwAv_rcr_tFdC2cdYxoebGLMqd7DIKdRR7BgNve_lOxnVjv6-tS7eIoOuCw10FX3HN_mVv7ez8RXLYKYlbMFeKw_tbUNqRayHFsjifNPwr0nkZI_dCpxfwc56pYIWLprD4GbRvMW8DLvb9780wJteNcbw3lUAwljK_lVX07rqC69W_SEzU_Tx7SA5XA',
    'r': 'NhJKZcFC0PI8V8pbbsMGnQLopbV2aQcLxBSjbGH0Rnk-1657420229-0-AV1Uyb7KhY0yniBW0nJsIQ9n0cm m8jbKXLtyUVB8dsxtMpVww0d5vQUlTu06beYL4XRKa9x8nz8ddLZzJhz/rW61z8Bax45FL5KQqVdrnC5Ki3ul MZnmLwBC1Do1DYP2837DScEbbPB9lKtzv0M6M0 plSLMwyYKolemCmp3vUI3DWKvvv0SLBP1R/hzqGH56HuR/wTrJEv2mmjOUiqa FyG9Go6wMIE57FV3WmtaRHp2ZE QtpgLw8o9l6KUN8wZByC NfSfBLDDK1ofEtb0aDXPPjr JfgMeM1rigJOGxsYTN8tcdoQA7B/wvA/xWGJ V49AoJOWo1pXm0WqXenbsHYbFwYYT8wVoiUAMod6uxPKqxJYGrOJkI6N28k/WjcRQaJ4Tbz6hR18WBN8xayfGtTvfc2vVHpfZGzI2BjKIbGVQ6UL6mgGFsQZ16UDfX0FyDOkvDRtd3Q645K8l9oUt1  PLoEQxmOP21FegAjIRFVy1 WfgKVJgubsnpNRIOxZ U9EZbgRTDjJO ruPzzUFPixw74ZNCInHVNE4xwpUWndqDTS17RKX/ZR3auc1rltgtJGHrVFzATgLjAroAhSyy24ddDpvGcRXZZSoaI X6bXf7A0UVh2BxPvPYmbwsxTXblYxve6enVJvN5drt n 0nVzoih3VhMHuQqDLebFn Yfq4OJmVgpq0yXuiGM  JPY/5H8bicbKiCMjG JSUSJOyJoBItscitorPNSyJC6OX1laLmXarxXbLd2AXUMhXAteXoToSDm1gUbCOkYBR/1q7lv/QuBJBiFV9Rnt3zVRsqNPNGzlz6CzTVPpHGU209I21lA79VYTtnLkAjWVRE/PRARD4qK4JdzAvnJyI8xoBGebUYs nySaewIMnjslXPQSYLisF53fNLwkcjUoZqq1qMEmw7Wc2fq6DB/9MFTa7ZVc9luxy8mbdyRAeh9XXOfbUNhwp0RYaC9ps0pptrj/2e7FJOhe/r63h DoA3LhSh8JOc40SGE6ayfsgr5FVAmUwsFvQE2sYuCI4GaULBP tVkhOrEY9793n09AM9ljQ7Cr2dV 0p80xQdzy7td7pEVOa/qw4IvYPTBLWGHjBacJiON0ARj uO0RdWR7MSJP/6WGvvF01Tbcdd12Ss4JzqYqc sDJ9VjqaqawOW79JI7DjUYXPhJqJ6iGPxMDLe939qTpystXf6Fvi3ZGovpBru0aMFlCmTU/HwtkwAG3G5Hzg2GFFr2ViuYzB1TrGzzGDmbOwuWEG6p6l/WCeuY8l5f/NfqTq8oLaGCiDYr9sbJL4EOHbJZ 6tcaoQxD5xm Yd3jskCqk6MY7vGARUrof/Wl0GhU8znpVZeDa7wKmzGd6XGYG/gJKnM6rOf3I/sEnY8HJ5Hj9o7bZ52x9N80DwPJbGbTvVG9JR9pE1B0MPqrUUM1Omkh2aUh/Co7qAf2qC3aeTBLbwKwXN6TcB6S1yOGcvNMT eKbdMpA1Ac0YjvD0b1t3/SlK3pkx10kBhXJ3HE0bj/WiqmHNW/OX3FiT7B06ynF rKPrUPKqQ089/rThZ VAheq7KveUxJtVXAwkOwe0xn7hk5HuhmKLq1i8psr1eFU9IJYmSB8QENvZ1k4ZOUdBbBZxBeMpA8iA2pu57E/ hTCDvjdpxxETwu84Y1sEHxVO80Qsir25 DDemFMiVi9DRUlyaiZ43dHC/qhrb4TEQiRWpYTpOrjv7Z0YPZUm5O3Q5hyXYfpgeuJ1 0JHLz/KH0U0lNLynMAAjyypipScAruzr25YGHAGsexzTwoQRoVED6nNRbc/4hQcFdNhRIyhd1aDNDkkzOC3gKPn8kjpFQqVmoAQU8Yfv6BohhHMyon5 sNV3Fdp1/az30lILeriDWU7KoL70nmvdyBcmboUyGesJS4GPWAe67E0sU9NLcZF6LzoP1YUmdd0FQZ7wvisAg2yJyBVwXD eehpLE7gGeXEyAxr7DepYT8wwqEGk4Dcx 4AScviP84T8JKiDiWchaGW/GTjdc/5flgFa3BeR/4W94wDpQ==',
    'vc': '1574e190db357034339f269c7c5755d0',
    'captcha_vc': 'c3856b069d07c7e16a7767324ca6f885',
    'captcha_answer': 'cppbodShTefN-13-7285e0b05b37b8af',
    'cf_ch_cp_return': '9a10d0a1037bf2b325009ab7be973b18|{"managed_clearance":"ni"}',
}

response = requests.post('https://www.paknsave.co.nz/shop/product/5031015_ea_000pns', params=params, cookies=cookies, headers=headers, data=data)
  • Related