We are working in a Python notebook on Databricks and want to send a file to a SharePoint site.
To achieve this, we obtained a client_id
and client_secret
from
https://<SP_domain>.sharepoint.com/sites/<my_site_name>/_layouts/15/appregnew.aspx
Locally, I can successfully send a file to SharePoint using these secrets. On DataBricks, I receive SSL Errors.
Normally, something like verify=false
within the request
can be provided, ignoring SSL certificate checks (if that is the actual issue). But this does not seem to be supported in the Python package that I am using: Office365-REST-Python-Client
The message of the errors that are received without any attempt to circumvent the issue.
SSLError: HTTPSConnectionPool(host='<SP_domain>.sharepoint.com', port=443): Max retries exceeded with url: /sites/<my sites name>(Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1129)')))
Reproducible code
sharepoint_url = 'https://....sharepoint.com/sites/...'
client_credentials = ClientCredential(client_id=, client_secret=)
ctx = ClientContext(sharepoint_url).with_credentials(client_credentials)
web = ctx.web
ctx.load(web)
ctx.execute_query() # <<< Crashes here
print(web.properties["Url"])
Results in:
AttributeError: 'NoneType' object has no attribute 'text'
Actual (not the last) error states:
MaxRetryError: HTTPSConnectionPool(host='nsdigitaal.sharepoint.com', port=443): Max retries exceeded with url: /sites/Team-Camerainspectie (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1129)')))
Full stack (sorry in advance :P)
---------------------------------------------------------------------------
SSLEOFError Traceback (most recent call last)
/databricks/python/lib/python3.9/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
698 # Make the request on the httplib connection object.
--> 699 httplib_response = self._make_request(
700 conn,
/databricks/python/lib/python3.9/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
381 try:
--> 382 self._validate_conn(conn)
383 except (SocketTimeout, BaseSSLError) as e:
/databricks/python/lib/python3.9/site-packages/urllib3/connectionpool.py in _validate_conn(self, conn)
1009 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
-> 1010 conn.connect()
1011
/databricks/python/lib/python3.9/site-packages/urllib3/connection.py in connect(self)
415
--> 416 self.sock = ssl_wrap_socket(
417 sock=conn,
/databricks/python/lib/python3.9/site-packages/urllib3/util/ssl_.py in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir, key_password, ca_cert_data, tls_in_tls)
448 if send_sni:
--> 449 ssl_sock = _ssl_wrap_socket_impl(
450 sock, context, tls_in_tls, server_hostname=server_hostname
/databricks/python/lib/python3.9/site-packages/urllib3/util/ssl_.py in _ssl_wrap_socket_impl(sock, ssl_context, tls_in_tls, server_hostname)
492 if server_hostname:
--> 493 return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
494 else:
/usr/lib/python3.9/ssl.py in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)
499 # ctx._wrap_socket()
--> 500 return self.sslsocket_class._create(
501 sock=sock,
/usr/lib/python3.9/ssl.py in _create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session)
1039 raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
-> 1040 self.do_handshake()
1041 except (OSError, ValueError):
/usr/lib/python3.9/ssl.py in do_handshake(self, block)
1308 self.settimeout(None)
-> 1309 self._sslobj.do_handshake()
1310 finally:
SSLEOFError: EOF occurred in violation of protocol (_ssl.c:1129)
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
/databricks/python/lib/python3.9/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
438 if not chunked:
--> 439 resp = conn.urlopen(
440 method=request.method,
/databricks/python/lib/python3.9/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
754
--> 755 retries = retries.increment(
756 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
/databricks/python/lib/python3.9/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
573 if new_retry.is_exhausted():
--> 574 raise MaxRetryError(_pool, url, error or ResponseError(cause))
575
MaxRetryError: HTTPSConnectionPool(host='<tenant name>.sharepoint.com', port=443): Max retries exceeded with url: /sites/<site name> (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1129)')))
During handling of the above exception, another exception occurred:
SSLError Traceback (most recent call last)
/local_disk0/.ephemeral_nfs/envs/pythonEnv-e6edc2d5-a811-4e43-a0ea-d29958d03122/lib/python3.9/site-packages/office365/runtime/auth/providers/acs_token_provider.py in get_app_only_access_token(self)
40 try:
---> 41 realm = self._get_realm_from_target_url()
42 url_info = urlparse(self.url)
/local_disk0/.ephemeral_nfs/envs/pythonEnv-e6edc2d5-a811-4e43-a0ea-d29958d03122/lib/python3.9/site-packages/office365/runtime/auth/providers/acs_token_provider.py in _get_realm_from_target_url(self)
69 def _get_realm_from_target_url(self):
---> 70 response = requests.head(url=self.url, headers={'Authorization': 'Bearer'})
71 return self.process_realm_response(response)
/databricks/python/lib/python3.9/site-packages/requests/api.py in head(url, **kwargs)
101 kwargs.setdefault('allow_redirects', False)
--> 102 return request('head', url, **kwargs)
103
/databricks/python/lib/python3.9/site-packages/requests/api.py in request(method, url, **kwargs)
60 with sessions.Session() as session:
---> 61 return session.request(method=method, url=url, **kwargs)
62
/databricks/python/lib/python3.9/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
541 send_kwargs.update(settings)
--> 542 resp = self.send(prep, **send_kwargs)
543
/databricks/python/lib/python3.9/site-packages/requests/sessions.py in send(self, request, **kwargs)
654 # Send the request
--> 655 r = adapter.send(request, **kwargs)
656
/databricks/python/lib/python3.9/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
513 # This branch is for urllib3 v1.22 and later.
--> 514 raise SSLError(e, request=request)
515
SSLError: HTTPSConnectionPool(host='<tenant name>.sharepoint.com', port=443): Max retries exceeded with url: /sites/<site name> (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1129)')))
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<command-4083654498839573> in <cell line: 14>()
12 web = ctx.web
13 ctx.load(web)
---> 14 ctx.execute_query()
15 print(web.properties["Url"])
/local_disk0/.ephemeral_nfs/envs/pythonEnv-e6edc2d5-a811-4e43-a0ea-d29958d03122/lib/python3.9/site-packages/office365/runtime/client_runtime_context.py in execute_query(self)
145 def execute_query(self):
146 """Submit request(s) to the server"""
--> 147 self.pending_request().execute_query()
148
149 def add_query(self, query):
/local_disk0/.ephemeral_nfs/envs/pythonEnv-e6edc2d5-a811-4e43-a0ea-d29958d03122/lib/python3.9/site-packages/office365/runtime/client_request.py in execute_query(self)
72 request = self.build_request(qry)
73 self.beforeExecute.notify(request)
---> 74 response = self.execute_request_direct(request)
75 response.raise_for_status()
76 self.process_response(response)
/local_disk0/.ephemeral_nfs/envs/pythonEnv-e6edc2d5-a811-4e43-a0ea-d29958d03122/lib/python3.9/site-packages/office365/runtime/odata/request.py in execute_request_direct(self, request)
34 """
35 self._build_specific_request(request)
---> 36 return super(ODataRequest, self).execute_request_direct(request)
37
38 def build_request(self, query):
/local_disk0/.ephemeral_nfs/envs/pythonEnv-e6edc2d5-a811-4e43-a0ea-d29958d03122/lib/python3.9/site-packages/office365/runtime/client_request.py in execute_request_direct(self, request)
84 :type request: office365.runtime.http.request_options.RequestOptions
85 """
---> 86 self.context.authenticate_request(request)
87 if request.method == HttpMethod.Post:
88 if request.is_bytes or request.is_file:
/local_disk0/.ephemeral_nfs/envs/pythonEnv-e6edc2d5-a811-4e43-a0ea-d29958d03122/lib/python3.9/site-packages/office365/sharepoint/client_context.py in authenticate_request(self, request)
238
239 def authenticate_request(self, request):
--> 240 self.authentication_context.authenticate_request(request)
241
242 def _build_modification_query(self, request):
/local_disk0/.ephemeral_nfs/envs/pythonEnv-e6edc2d5-a811-4e43-a0ea-d29958d03122/lib/python3.9/site-packages/office365/runtime/auth/authentication_context.py in authenticate_request(self, request)
95 :type request: office365.runtime.http.request_options.RequestOptions
96 """
---> 97 self._provider.authenticate_request(request)
/local_disk0/.ephemeral_nfs/envs/pythonEnv-e6edc2d5-a811-4e43-a0ea-d29958d03122/lib/python3.9/site-packages/office365/runtime/auth/providers/acs_token_provider.py in authenticate_request(self, request)
29 :type request: office365.runtime.http.request_options.RequestOptions
30 """
---> 31 self.ensure_app_only_access_token()
32 request.set_header('Authorization', self._get_authorization_header())
33
/local_disk0/.ephemeral_nfs/envs/pythonEnv-e6edc2d5-a811-4e43-a0ea-d29958d03122/lib/python3.9/site-packages/office365/runtime/auth/providers/acs_token_provider.py in ensure_app_only_access_token(self)
34 def ensure_app_only_access_token(self):
35 if self._cached_token is None:
---> 36 self._cached_token = self.get_app_only_access_token()
37 return self._cached_token and self._cached_token.is_valid
38
/local_disk0/.ephemeral_nfs/envs/pythonEnv-e6edc2d5-a811-4e43-a0ea-d29958d03122/lib/python3.9/site-packages/office365/runtime/auth/providers/acs_token_provider.py in get_app_only_access_token(self)
43 return self._get_app_only_access_token(url_info.hostname, realm)
44 except requests.exceptions.RequestException as e:
---> 45 self.error = e.response.text
46 raise ValueError(e.response.text)
47
AttributeError: 'NoneType' object has no attribute 'text'
Tried solutions:
Attempt 1:
ctx = ClientContext(sharepoint_url).with_credentials(client_credentials)
request = RequestOptions("{0}/_api/web/".format(sharepoint_url))
request.verify = False
response = ctx.execute_request_direct(request) # <<< crashes here... example outdated?
json = json.loads(response.content)
web_title = json['d']['Title']
print("Web title: {0}".format(web_title))
Results in:
TypeError: sequence item 2: expected str instance, RequestOptions found
Attempt 2:
Based on this SO thread.
# If you're using a third-party module and want to disable the checks,
# here's a context manager that monkey patches `requests` and changes
# it so that verify=False is the default and suppresses the warning.
import warnings
import contextlib
import requests
from urllib3.exceptions import InsecureRequestWarning
old_merge_environment_settings = requests.Session.merge_environment_settings
@contextlib.contextmanager
def no_ssl_verification():
opened_adapters = set()
def merge_environment_settings(self, url, proxies, stream, verify, cert):
# Verification happens only once per connection so we need to close
# all the opened adapters once we're done. Otherwise, the effects of
# verify=False persist beyond the end of this context manager.
opened_adapters.add(self.get_adapter(url))
settings = old_merge_environment_settings(self, url, proxies, stream, verify, cert)
settings['verify'] = False
return settings
requests.Session.merge_environment_settings = merge_environment_settings
try:
with warnings.catch_warnings():
warnings.simplefilter('ignore', InsecureRequestWarning)
yield
finally:
requests.Session.merge_environment_settings = old_merge_environment_settings
for adapter in opened_adapters:
try:
adapter.close()
except:
pass
And running that like:
with no_ssl_verification():
function_to_send_file_to_sharepoint()
Results in the same Max number of attempts error
Attempt 3:
Based on this github issue.
def disable_ssl(request):
request.verify = False # Disable certification verification
ctx.get_pending_request().beforeExecute = disable_ssl
web = ctx.web
ctx.load(web)
ctx.execute_query()
print(web.properties["Url"])
This code needs an update, since the thread was outdated. The current api provides pending_request
and not get_pending_request()
. With the fix applied, it results in the following:
CodePudding user response:
We got it working.
The network configuration of databricks was configured with a firewall that blocked both these URLs which are both needed:
- https://<tenant name>.sharepoint.com/
- https://accounts.accesscontrol.windows.net
Then it worked flawlessly.
I didn't figure out why the error is shown like this:
AttributeError: 'NoneType' object has no attribute 'text'