How do I get a list of all notebooks in my workspace & store their names along with full path in csv file, I have tried using Databricks CLI option but that doesn't seem to have recursive operation.
CodePudding user response:
As we can see in code there is no recursive option: https://github.com/databricks/databricks-cli/blob/master/databricks_cli/workspace/cli.py (def ls_cli)
Example solution is to import cli in python and extend it:
from databricks_cli.sdk import ApiClient
from databricks_cli.sdk import service
host = "your_host"
token = "your_token"
client = ApiClient(host=host, token=token)
objects = []
workspace = service.WorkspaceService(client)
def list_workspace_objects(path):
elements = workspace.list(path).get('objects')
if elements is not None:
for object in elements:
objects.append(object)
if(object['object_type'] == 'DIRECTORY'):
list_workspace_objects(object['path'])
list_workspace_objects("/")
print(objects)
CodePudding user response:
You can use below code directly . Note : Tested Code
from pyspark.sql.types import IntegerType
from pyspark.sql.types import *
from pyspark.sql import Row
import base64
import requests
import json
databricks_instance ="databricks Instance"
url_list = f"{databricks_instance}/api/2.0/workspace/list"
url_export = f"{databricks_instance}/api/2.0/workspace/export"
payload = json.dumps({
"path": "/"
})
headers = {
'Authorization': 'Bearer token',
'Content-Type': 'application/json'
}
response = requests.request("GET", url_list, headers=headers, data=payload).json()
notebooks = []
# Getting the all notebooks list for given notebooks.
def list_notebooks(mylist):
for element in mylist['objects']:
if element['object_type'] == 'NOTEBOOK':
notebooks.append(element)
if element['object_type'] == 'DIRECTORY':
payload_inner = json.dumps({
"path": element['path']
})
response_inner = requests.request("GET", url_list, headers=headers, data=payload_inner).json()
if len(response_inner) != 0:
list_notebooks(response_inner)
return notebooks
result = list_notebooks(response)
print(result[0])