I have a number of urls that redirect to other ones and I would like to find the address of the links they redirect to. For example,
https://stlouisfed.org/fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown
redirects to
https://www.richmondfed.org/press_room/speeches/thomas_i_barkin/2019/barkin_speech_20190404
I tried using the requests
library like so:
import requests
url = "https://stlouisfed.org/fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown"
def get_redirected_url(url: str) -> str:
r = requests.get(url)
return r.url
get_redirected_url(url)
But I'm running getting the error below. Any idea what might be going wrong?
---------------------------------------------------------------------------
Error Traceback (most recent call last)
/opt/anaconda3/lib/python3.8/site-packages/urllib3/contrib/pyopenssl.py in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname)
487 try:
--> 488 cnx.do_handshake()
489 except OpenSSL.SSL.WantReadError:
/opt/anaconda3/lib/python3.8/site-packages/OpenSSL/SSL.py in do_handshake(self)
1933
-> 1934 :param addr: A remove address
1935 :return: What the socket's connect_ex method returns
/opt/anaconda3/lib/python3.8/site-packages/OpenSSL/SSL.py in _raise_ssl_error(self, ssl, result)
1670
-> 1671 def set_tlsext_host_name(self, name):
1672 """
/opt/anaconda3/lib/python3.8/site-packages/OpenSSL/_util.py in exception_from_error_queue(exception_type)
53 text(lib.ERR_reason_error_string(error)),
---> 54 )
55 )
Error: [('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')]
During handling of the above exception, another exception occurred:
SSLError Traceback (most recent call last)
/opt/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
669 # Make the request on the httplib connection object.
--> 670 httplib_response = self._make_request(
671 conn,
/opt/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
380 try:
--> 381 self._validate_conn(conn)
382 except (SocketTimeout, BaseSSLError) as e:
/opt/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py in _validate_conn(self, conn)
977 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
--> 978 conn.connect()
979
/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py in connect(self)
361
--> 362 self.sock = ssl_wrap_socket(
363 sock=conn,
/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/ssl_.py in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir, key_password, ca_cert_data)
385 if HAS_SNI and server_hostname is not None:
--> 386 return context.wrap_socket(sock, server_hostname=server_hostname)
387
/opt/anaconda3/lib/python3.8/site-packages/urllib3/contrib/pyopenssl.py in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname)
493 except OpenSSL.SSL.Error as e:
--> 494 raise ssl.SSLError("bad handshake: %r" % e)
495 break
SSLError: ("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])",)
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
/opt/anaconda3/lib/python3.8/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
438 if not chunked:
--> 439 resp = conn.urlopen(
440 method=request.method,
/opt/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
725
--> 726 retries = retries.increment(
727 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
445 if new_retry.is_exhausted():
--> 446 raise MaxRetryError(_pool, url, error or ResponseError(cause))
447
MaxRetryError: HTTPSConnectionPool(host='stlouisfed.org', port=443): Max retries exceeded with url: /fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])")))
During handling of the above exception, another exception occurred:
SSLError Traceback (most recent call last)
<ipython-input-126-5a66b7870cb5> in <module>
----> 1 get_redirected_url(url)
<ipython-input-125-d2cc1a52d8fa> in get_redirected_url(url)
1 def get_redirected_url(url: str) -> str:
2 # url = urljoin("https://stlouisfed.org/", url)
----> 3 r = requests.get(url, timeout=10)
4 return r.url
5
/opt/anaconda3/lib/python3.8/site-packages/requests/api.py in get(url, params, **kwargs)
73
74 kwargs.setdefault('allow_redirects', True)
---> 75 return request('get', url, params=params, **kwargs)
76
77
/opt/anaconda3/lib/python3.8/site-packages/requests/api.py in request(method, url, **kwargs)
58 # cases, and look like a memory leak in others.
59 with sessions.Session() as session:
---> 60 return session.request(method=method, url=url, **kwargs)
61
62
/opt/anaconda3/lib/python3.8/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
531 }
532 send_kwargs.update(settings)
--> 533 resp = self.send(prep, **send_kwargs)
534
535 return resp
/opt/anaconda3/lib/python3.8/site-packages/requests/sessions.py in send(self, request, **kwargs)
644
645 # Send the request
--> 646 r = adapter.send(request, **kwargs)
647
648 # Total elapsed time of the request (approximately)
/opt/anaconda3/lib/python3.8/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
512 if isinstance(e.reason, _SSLError):
513 # This branch is for urllib3 v1.22 and later.
--> 514 raise SSLError(e, request=request)
515
516 raise ConnectionError(e, request=request)
SSLError: HTTPSConnectionPool(host='stlouisfed.org', port=443): Max retries exceeded with url: /fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])")))
CodePudding user response:
I reproduced your issue with the following code:
import requests
url = "https://stlouisfed.org/fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown"
def get_redirected_url(url):
r = requests.get(url)
return r.url
get_redirected_url(url)
And the root cause is the server stlouisfed.org doesn't provide the right certificate chain, the L1K intermediate certificate is missing. You can confirm it by checking https://www.ssllabs.com/ssltest/analyze.html?d=https://stlouisfed.org/
To solve/workaround it on your side (client-side), I found 2 options:
- you can change url = "https://stlouisfed.org/fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown" to url = "https://www.stlouisfed.org/fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown" as the www.stlouisfed.org certificate chain is valid.
- you can add the missing certificate L1K into your certificate trust file: first, download the certificate from https://tls-observatory.services.mozilla.com/static/certsplainer.html?id=1439 2nd, update your certificate trust file, in my centos 7, I need to add a new file called l1k.pem into /etc/pki/ca-trust/source/anchors with the content mentioned in step1, and then run command update-ca-trust; then confirm the certificate is added in file /etc/pki/tls/certs/ca-bundle.crt.
With either option, the redirected URL can be returned.