I have a code for downloading files from Google Drive. This was working perfectly few months before but not working now a days. I am unable to find the reason for the same.
My Code as below:
import pickle
import os
import re
import time
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import requests
from tqdm import tqdm
# If modifying these scopes, delete the file token.pickle.
SCOPES = [
'https://www.googleapis.com/auth/drive.metadata',
'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/drive.file',
]
def get_gdrive_service():
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file('client_secrets.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
# initiate Google Drive service API
return build('drive', 'v3', credentials=creds)
def download_file_from_google_drive(id, destination):
def get_confirm_token(response):
print(response.cookies.items())
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
# get the file size from Content-length response header
file_size = int(response.headers.get("Content-Length", 0))
# extract Content disposition from response headers
content_disposition = response.headers.get("content-disposition")
print("content_disposition:", content_disposition)
# parse filename
filename = re.findall("filename=\"(. )\"", content_disposition)[0]
# print("[ ] File name:", filename)
# print("[ ] File size:", file_size)
progress = tqdm(response.iter_content(CHUNK_SIZE), f"Downloading {filename}", total=file_size, unit="Byte", unit_scale=True, unit_divisor=1024)
with open(destination, "wb") as f:
for chunk in progress:
if chunk: # filter out keep-alive new chunks
f.write(chunk)
# update the progress bar
progress.update(len(chunk))
progress.close()
# base URL for download
URL = "https://docs.google.com/uc?export=download"
# init a HTTP session
session = requests.Session()
# make a request
response = session.get(URL, params={'id': id}, stream=True)
print("response:", response)
print("[ ] Downloading", response.url)
# get confirmation token
token = get_confirm_token(response)
print("token:", token)
if token:
params = {'id': id, 'confirm': token}
response = session.get(URL, params=params, stream=True)
# download to disk
save_response_content(response, destination)
def search(service, query):
# search for the file
result = []
page_token = None
while True:
response = service.files().list(q=query,
spaces="drive",
fields="nextPageToken, files(id, name, mimeType)",
pageToken=page_token).execute()
# iterate over filtered files
for file in response.get("files", []):
print(f"Found file: {file['name']} with the id {file['id']} and type {file['mimeType']}")
result.append((file["id"], file["name"], file["mimeType"]))
page_token = response.get('nextPageToken', None)
if not page_token:
# no more files
break
return result
def download_file(f_name):
service = get_gdrive_service()
filename = f_name
search_result = search(service, query=f"name='{filename}'")
file_id = search_result[0][0]
download_file_from_google_drive(file_id, filename)
if __name__ == '__main__':
download_file("Data_09_09_2022.zip")
Basically I am getting Error in Function: get_confirm_token where key.startswith('download_warning') is returning value Zero these days. I am not able to figure out the reason.
Any help..?
CodePudding user response:
I would try some library that does this like gdown
or googledrivedownloader
as google is known to change the way their APIs work and thus disable working code.
This StackOverflow post already discusses this topic.
Would suggest you use an up-to-date example from google themselves:
from __future__ import print_function
import io
import google.auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
def download_file(real_file_id):
"""Downloads a file
Args:
real_file_id: ID of the file to download
Returns : IO object with location.
Load pre-authorized user credentials from the environment.
TODO(developer) - See https://developers.google.com/identity
for guides on implementing OAuth2 for the application.
"""
creds, _ = google.auth.default()
try:
# create drive api client
service = build('drive', 'v3', credentials=creds)
file_id = real_file_id
# pylint: disable=maybe-no-member
request = service.files().get_media(fileId=file_id)
file = io.BytesIO()
downloader = MediaIoBaseDownload(file, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print(F'Download {int(status.progress() * 100)}.')
except HttpError as error:
print(F'An error occurred: {error}')
file = None
return file.getvalue()
if __name__ == '__main__':
download_file(real_file_id='1KuPmvGq8yoYgbfW74OENMCB5H0n_2Jm9')
CodePudding user response:
I question this whole section here. Why are you downloading it like this?
# base URL for download
URL = "https://docs.google.com/uc?export=download"
# init a HTTP session
session = requests.Session()
# make a request
response = session.get(URL, params={'id': id}, stream=True)
print("response:", response)
print("[ ] Downloading", response.url)
# get confirmation token
token = get_confirm_token(response)
print("token:", token)
if token:
params = {'id': id, 'confirm': token}
response = session.get(URL, params=params, stream=True)
manage-downloads#python
As you are just downloading the file and not exporting a file. You should simply use the file.get method and store the response.
from __future__ import print_function
import io
import google.auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
def download_file(real_file_id):
"""Downloads a file
Args:
real_file_id: ID of the file to download
Returns : IO object with location.
Load pre-authorized user credentials from the environment.
TODO(developer) - See https://developers.google.com/identity
for guides on implementing OAuth2 for the application.
"""
creds, _ = google.auth.default()
try:
# create drive api client
service = build('drive', 'v3', credentials=creds)
file_id = real_file_id
# pylint: disable=maybe-no-member
request = service.files().get_media(fileId=file_id)
file = io.BytesIO()
downloader = MediaIoBaseDownload(file, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print(F'Download {int(status.progress() * 100)}.')
except HttpError as error:
print(F'An error occurred: {error}')
file = None
return file.getvalue()
if __name__ == '__main__':
download_file(real_file_id='1KuPmvGq8yoYgbfW74OENMCB5H0n_2Jm9')