I'm sort of new to python and I'm having trouble on requesting data from an API. The API endpoint is: https://www.poder360.com.br/banco-de-dados/
No matter what I do, I only get the status_code 403.
import requests
response = requests.get("https://pesquisas.poder360.com.br/api/")
print(response.status_code)
response.json()
response.text
CodePudding user response:
You are getting 403 because there is Cloudflare protection. To get past it, you need to include headers/cookies to show Cloudflare that you are not a bot.
Here's a working solution using urllib
:
import urllib.request
headers = {
'authority': 'www.poder360.com.br',
'cache-control': 'max-age=0',
'sec-ch-ua': '"Google Chrome";v="93", " Not;A Brand";v="99", "Chromium";v="93"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'dnt': '1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36',
'accept': 'text/html,application/xhtml xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'ja,ja-JP;q=0.9,en-SG;q=0.8,en-US;q=0.7,en;q=0.6',
'cookies': 'PHPSESSID=t2qgtmr3fjv5lg7t2j9hv1gv3e;',
}
request = urllib.request.Request('https://www.poder360.com.br/banco-de-dados/', headers=headers)
r = urllib.request.urlopen(request).read()
print(r.decode('utf-8'))
I've left my PHPSESSID cookie in there as I don't plan to use the site.
Alternatively, if you're using python2, this would work with python-requests
:
import requests
from requests import Session
from collections import OrderedDict
headers = OrderedDict([
('Accept-Encoding', 'gzip, deflate, br'),
('Host', 'www.poder360.com.br'),
('Authority', 'www.poder360.com.br'),
('Cache-Control', 'max-age=0'),
('Sec-Ch-Ua','"Google Chrome";v="93", " Not;A Brand";v="99", "Chromium";v="93"'),
('Sec-Ch-Ua-Mobile', '?0'),
('Sec-Ch-Ua-Platform', '"Windows"'),
('Dnt', '1'),
('Upgrade-Insecure-Requests', '1'),
('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'),
('Accept','text/html,application/xhtml xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'),
('Sec-Fetch-Site', 'none'),
('Sec-Fetch-Mode', 'navigate'),
('Sec-Fetch-User', '?1'),
('Sec-Fetch-Dest', 'document'),
('Accept-Language', 'ja,ja-JP;q=0.9,en-SG;q=0.8,en-US;q=0.7,en;q=0.6'),
('Cookies', 'PHPSESSID=t2qgtmr3fjv5lg7t2j9hv1gv3e;'),
])
s = Session()
s.headers = headers
response = s.get('https://www.poder360.com.br/banco-de-dados/', headers=headers)
print(response.status_code)