# selenium-request.py
from seleniumwire import webdriver # Import from seleniumwire
# Create a new instance of the Chrome driver
driver = webdriver.Chrome()
driver.get('https://www.cmegroup.com/content/cmegroup/en/tools-information/advisorySearch/jcr:content/full-par/cmeadvisorysearch.advisorySearch.advisorynotices:Advisory Notices.-.2.12|07|2021.01|01|2008.json')
for request in driver.requests:
if request.response:
print(request.response.headers)
When I run that code I get the headers Selenium uses:
$ python selenium-request.py
Accept-Ranges: bytes
Access-Control-Allow-Origin: http://star-website.com
Content-Type: application/json
ETag: W/"36b8a-5d3d28ed9cc43"
Last-Modified: Thu, 23 Dec 2021 16:16:16 GMT
Referrer-Policy: no-referrer-when-downgrade
Server: Apache
ServerID: e1
Strict-Transport-Security: max-age=31536000; includeSubDomains
Vary: Accept-Encoding
Content-Encoding: gzip
Cache-Control: max-age=86400
Date: Thu, 23 Dec 2021 16:16:16 GMT
Content-Length: 46236
Connection: keep-alive
Content-Security-Policy: frame-ancestors 'self' *.cmegroup.com *.quikstrike.net commodex.co.il openexchange.community.cmegroup.com staging.tickertocker.com http://www.straitsfinancial.com www.straitsfinancial.com http://straitsfinancial.com https://www.home.saxo https://app.topsteptrader.com https://help.topsteptrader.com https://staging.topsteptrader.com https://blueeditsitecore.sys.dom https://bluesitecore.sys.dom https://sitecoredev.orange.saxobank.com https://sitecoredev-nocache.orange.saxobank.com https://sitecoredevedit.orange.tst2.dom http://star-website.com https://www.investing.com https://*.benzinga.com https://bz.zingbot.bz https://www.zingbot.bz https://gdcdyn.interactivebrokers.com https://www.interactivebrokers.com https://zingbot.bz https://www.zingbot.bz https://m.zingbot.bz https://bz.zingbot.bz https://dev.futuresfirstacademy.com https://uat.futuresfirstacademy.com https://futuresfirstacademy.com http://stage.barchart.com http://www.barchart.com https://www.infinityfutures.com https://kilofutures.com https://m.cqg.com https://mdemo.cqg.com *.chicago.cme.com:7822 https://uatm.cqg.com https://local.zingbot.bz https://www.gulfbondsukuk.org www.kgieworld.sg https://www.propex24.wpcomstaging.com https://www.propex24.com *.straitsfinancial.gate39tech.com us.straitsfinancial.com https://*.kapcoclients.com https://kapcoclients.com https://*.wallstreetbound.org https://wallstreetbound.org https://cofcointl.plateau.com https://rise.articulate.com https://members.tradeday.com http://blf-django.herokuapp.com https://www.bluelinefutures.com https://www.bluelinefutures.live https://www.bluelinefutures.trade https://login.chicago.cme.com https://loginnr.chicago.cme.com https://logincert.chicago.cme.com https://login-ny.chicago.cme.com https://ampfutures.com https://cme.ampfutures.com https://*.advantagefutures.com https://*.e-futures.com https://*.etrade.com https://*.gffbrokers.com https://infinityfutures-cn.com https://sweetfutures.com https://*.tradovate.com https://home.saxo https://*.tickmill.co.uk https://*.directa.it https://big.pt https://*.tradestation-international.com https://*.stonex.com http://tradinglesson.com https://tradinglesson.com *.ibroker.it *.ibroker.es *.cornertrader.ch *.whselfinvest.com *.banxbroker.de *.ameritrade.com *.sweetfutures.com *.danielstrading.com *.gainfutures.com *.futuresonline.com *.tdainc.com *.lsvp.com *.schwab.com *.schwab.co.uk *.us.global.schwab.com *.dev.schwab.com;
Set-Cookie: ak_bmsc=AB0A9701302106EABE2E195C6AC2A074~000000000000000000000000000000~YAAQLtERAvOZVN19AQAA7C8U6A7AWr7StAmiphZPltguFftPSOXgfa2NAq7Vts 40k7AdnPG55ULK1vyBRhPRdqWbtYml3JTC3RjHLu31l8kWBFvysYyuY2uz4GpkvmOWoBSN/Dl/2bQ9bEgbiYj3tCZ1o wEvMfsiAWiJeMY3M1ozu6nyQz0JVpdvfsqun3z5wGhpJWhkjrJjeIyHvVdzx2uyIb1azRFlHT nRCR6NHGoaMM/G2sI1DqPOXPB5btXjdncvB739c2Beh7RgWD/zvb78qpAJDUR1KOenDy1EwN2Bg8pqH1sxlsoVrl7i7r/pAOaWKfd4U1FKP7p730GfOp/m2VRBIdYgHDPHPvGeITPKrR/G22aR886r9Lerhug==; Domain=.cmegroup.com; Path=/; Expires=Thu, 23 Dec 2021 18:16:01 GMT; Max-Age=7185; HttpOnly
I copy these exact headers into a python dict and request as follows:
# python-request.py
import requests
headers = {
"Accept-Ranges": "bytes",
"Access-Control-Allow-Origin": "http://star-website.com",
"Content-Type": "application/json",
"ETag": 'W/"36b8a-5d3d28ed9cc43"',
"Last-Modified": "Thu, 23 Dec 2021 16:16:16 GMT",
"Referrer-Policy": "no-referrer-when-downgrade",
"Server": "Apache",
"ServerID": "e1",
"Strict-Transport-Security": "max-age=31536000; includeSubDomains",
"Vary": "Accept-Encoding",
"Content-Encoding": "gzip",
"Cache-Control": "max-age=86400",
"Date": "Thu, 23 Dec 2021 16:16:16 GMT",
"Content-Length": "46236",
"Connection": "keep-alive",
"Content-Security-Policy": "frame-ancestors 'self' *.cmegroup.com *.quikstrike.net commodex.co.il openexchange.community.cmegroup.com staging.tickertocker.com http://www.straitsfinancial.com www.straitsfinancial.com http://straitsfinancial.com https://www.home.saxo https://app.topsteptrader.com https://help.topsteptrader.com https://staging.topsteptrader.com https://blueeditsitecore.sys.dom https://bluesitecore.sys.dom https://sitecoredev.orange.saxobank.com https://sitecoredev-nocache.orange.saxobank.com https://sitecoredevedit.orange.tst2.dom http://star-website.com https://www.investing.com https://*.benzinga.com https://bz.zingbot.bz https://www.zingbot.bz https://gdcdyn.interactivebrokers.com https://www.interactivebrokers.com https://zingbot.bz https://www.zingbot.bz https://m.zingbot.bz https://bz.zingbot.bz https://dev.futuresfirstacademy.com https://uat.futuresfirstacademy.com https://futuresfirstacademy.com http://stage.barchart.com http://www.barchart.com https://www.infinityfutures.com https://kilofutures.com https://m.cqg.com https://mdemo.cqg.com *.chicago.cme.com:7822 https://uatm.cqg.com https://local.zingbot.bz https://www.gulfbondsukuk.org www.kgieworld.sg https://www.propex24.wpcomstaging.com https://www.propex24.com *.straitsfinancial.gate39tech.com us.straitsfinancial.com https://*.kapcoclients.com https://kapcoclients.com https://*.wallstreetbound.org https://wallstreetbound.org https://cofcointl.plateau.com https://rise.articulate.com https://members.tradeday.com http://blf-django.herokuapp.com https://www.bluelinefutures.com https://www.bluelinefutures.live https://www.bluelinefutures.trade https://login.chicago.cme.com https://loginnr.chicago.cme.com https://logincert.chicago.cme.com https://login-ny.chicago.cme.com https://ampfutures.com https://cme.ampfutures.com https://*.advantagefutures.com https://*.e-futures.com https://*.etrade.com https://*.gffbrokers.com https://infinityfutures-cn.com https://sweetfutures.com https://*.tradovate.com https://home.saxo https://*.tickmill.co.uk https://*.directa.it https://big.pt https://*.tradestation-international.com https://*.stonex.com http://tradinglesson.com https://tradinglesson.com *.ibroker.it *.ibroker.es *.cornertrader.ch *.whselfinvest.com *.banxbroker.de *.ameritrade.com *.sweetfutures.com *.danielstrading.com *.gainfutures.com *.futuresonline.com *.tdainc.com *.lsvp.com *.schwab.com *.schwab.co.uk *.us.global.schwab.com *.dev.schwab.com;",
"Set-Cookie": "ak_bmsc=AB0A9701302106EABE2E195C6AC2A074~000000000000000000000000000000~YAAQLtERAvOZVN19AQAA7C8U6A7AWr7StAmiphZPltguFftPSOXgfa2NAq7Vts 40k7AdnPG55ULK1vyBRhPRdqWbtYml3JTC3RjHLu31l8kWBFvysYyuY2uz4GpkvmOWoBSN/Dl/2bQ9bEgbiYj3tCZ1o wEvMfsiAWiJeMY3M1ozu6nyQz0JVpdvfsqun3z5wGhpJWhkjrJjeIyHvVdzx2uyIb1azRFlHT nRCR6NHGoaMM/G2sI1DqPOXPB5btXjdncvB739c2Beh7RgWD/zvb78qpAJDUR1KOenDy1EwN2Bg8pqH1sxlsoVrl7i7r/pAOaWKfd4U1FKP7p730GfOp/m2VRBIdYgHDPHPvGeITPKrR/G22aR886r9Lerhug==; Domain=.cmegroup.com; Path=/; Expires=Thu, 23 Dec 2021 18:16:01 GMT; Max-Age=7185; HttpOnly"
}
requests.get(
"https://www.cmegroup.com/content/cmegroup/en/tools-information/advisorySearch/jcr:content/full-par/cmeadvisorysearch.advisorySearch.advisorynotices:Advisory Notices.-.2.12|07|2021.01|01|2008.json",
headers=headers)
When I run this it just hangs indefinitely, so there is some issue with the request.
Apart from the headers, what is the difference between the requests made by python and Selenium - how could I identify the issue and hopefully get this working with the python requests library?
Update
I updated the code to get the request.headers
instead:
Host: www.cmegroup.com
Connection: keep-alive
sec-ch-ua: " Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"
sec-ch-ua-mobile: ?0
sec-ch-ua-platform: "Linux"
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36
Accept: text/html,application/xhtml xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
Sec-Fetch-Site: none
Sec-Fetch-Mode: navigate
Sec-Fetch-User: ?1
Sec-Fetch-Dest: document
Accept-Encoding: gzip, deflate, br
Accept-Language: en-US,en;q=0.9
... but the python requests script has the same result when using these headers, just hanging (or timing out if I set a timeout parameter).
Further update
Debug output is as follows:
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.cmegroup.com:443
send: b'GET /content/cmegroup/en/tools-information/advisorySearch/jcr:content/full-par/cmeadvisorysearch.advisorySearch.advisorynotices:Advisory Notices.-.2.12|07|2021.01|01|2008.json HTTP/1.1\r\nUser-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36\r\nAccept-Encoding: gzip, deflate, br\r\nAccept: text/html,application/xhtml xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9\r\nConnection: keep-alive\r\nHost: www.cmegroup.com\r\nsec-ch-ua: " Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"\r\nsec-ch-ua-mobile: ?0\r\nsec-ch-ua-platform: Linux\r\nUpgrade-Insecure-Requests: 1\r\nSec-Fetch-Site: none\r\nSec-Fetch-Mode: navigate\r\nSec-Fetch-User: ?1\r\nSec-Fetch-Dest: document\r\nAccept-Language: en-US,en;q=0.9\r\n\r\n'
CodePudding user response:
It looks, you are using the response headers, not request headers. Try
print(request.headers)
CodePudding user response:
It looks like it only needs a compatible User-Agent header.
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:95.0) Gecko/20100101 Firefox/95.0',
}
url = 'https://www.cmegroup.com/content/cmegroup/en/tools-information/advisorySearch/jcr:content/full-par/cmeadvisorysearch.advisorySearch.advisorynotices:Advisory Notices.-.2.12|07|2021.01|01|2008.json'
response = requests.get(url, headers = headers, timeout = 30) # A
print(response.status_code) # Prints 200 (OK).
print(response.json()) # Prints the output as JSON. "item" key has 50 values in a list.
^ This snippet did the trick for me.