Home > Software engineering >  Error in Downloading File from Google Drive via Python
Error in Downloading File from Google Drive via Python

Time:09-12

I have a code for downloading files from Google Drive. This was working perfectly few months before but not working now a days. I am unable to find the reason for the same.

My Code as below:

import pickle
import os
import re
import time

from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import requests
from tqdm import tqdm

# If modifying these scopes, delete the file token.pickle.
SCOPES = [
          'https://www.googleapis.com/auth/drive.metadata',
          'https://www.googleapis.com/auth/drive',
          'https://www.googleapis.com/auth/drive.file',
          ]


def get_gdrive_service():
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file('client_secrets.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)
    # initiate Google Drive service API
    return build('drive', 'v3', credentials=creds)


def download_file_from_google_drive(id, destination):
    def get_confirm_token(response):
        print(response.cookies.items())
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value
        return None

    def save_response_content(response, destination):
        CHUNK_SIZE = 32768
        # get the file size from Content-length response header
        file_size = int(response.headers.get("Content-Length", 0))
        # extract Content disposition from response headers
        content_disposition = response.headers.get("content-disposition")
        print("content_disposition:", content_disposition)
        # parse filename
        filename = re.findall("filename=\"(. )\"", content_disposition)[0]
        # print("[ ] File name:", filename)
        # print("[ ] File size:", file_size)
        progress = tqdm(response.iter_content(CHUNK_SIZE), f"Downloading {filename}", total=file_size, unit="Byte", unit_scale=True, unit_divisor=1024)
        with open(destination, "wb") as f:
            for chunk in progress:
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)
                    # update the progress bar
                    progress.update(len(chunk))
        progress.close()

    # base URL for download
    URL = "https://docs.google.com/uc?export=download"

    # init a HTTP session
    session = requests.Session()
    # make a request
    response = session.get(URL, params={'id': id}, stream=True)
    print("response:", response)
    print("[ ] Downloading", response.url)

    # get confirmation token
    token = get_confirm_token(response)
    print("token:", token)
    if token:
        params = {'id': id, 'confirm': token}
        response = session.get(URL, params=params, stream=True)
    # download to disk
    save_response_content(response, destination)


def search(service, query):
    # search for the file
    result = []
    page_token = None
    while True:
        response = service.files().list(q=query,
                                        spaces="drive",
                                        fields="nextPageToken, files(id, name, mimeType)",
                                        pageToken=page_token).execute()
        # iterate over filtered files
        for file in response.get("files", []):
            print(f"Found file: {file['name']} with the id {file['id']} and type {file['mimeType']}")
            result.append((file["id"], file["name"], file["mimeType"]))
        page_token = response.get('nextPageToken', None)
        if not page_token:
            # no more files
            break
    return result


def download_file(f_name):
    service = get_gdrive_service()
    filename = f_name
    search_result = search(service, query=f"name='{filename}'")
    file_id = search_result[0][0]
    download_file_from_google_drive(file_id, filename)


if __name__ == '__main__':
    download_file("Data_09_09_2022.zip")

Basically I am getting Error in Function: get_confirm_token where key.startswith('download_warning') is returning value Zero these days. I am not able to figure out the reason.

Any help..?

CodePudding user response:

I would try some library that does this like gdown or googledrivedownloader as google is known to change the way their APIs work and thus disable working code. This StackOverflow post already discusses this topic.

Would suggest you use an up-to-date example from google themselves:

from __future__ import print_function

import io

import google.auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload


def download_file(real_file_id):
    """Downloads a file
    Args:
        real_file_id: ID of the file to download
    Returns : IO object with location.

    Load pre-authorized user credentials from the environment.
    TODO(developer) - See https://developers.google.com/identity
    for guides on implementing OAuth2 for the application.
    """
    creds, _ = google.auth.default()

    try:
        # create drive api client
        service = build('drive', 'v3', credentials=creds)

        file_id = real_file_id

        # pylint: disable=maybe-no-member
        request = service.files().get_media(fileId=file_id)
        file = io.BytesIO()
        downloader = MediaIoBaseDownload(file, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
            print(F'Download {int(status.progress() * 100)}.')

    except HttpError as error:
        print(F'An error occurred: {error}')
        file = None

    return file.getvalue()


if __name__ == '__main__':
    download_file(real_file_id='1KuPmvGq8yoYgbfW74OENMCB5H0n_2Jm9')

CodePudding user response:

I question this whole section here. Why are you downloading it like this?

# base URL for download
URL = "https://docs.google.com/uc?export=download"

# init a HTTP session
session = requests.Session()
# make a request
response = session.get(URL, params={'id': id}, stream=True)
print("response:", response)
print("[ ] Downloading", response.url)

# get confirmation token
token = get_confirm_token(response)
print("token:", token)
if token:
    params = {'id': id, 'confirm': token}
    response = session.get(URL, params=params, stream=True)

manage-downloads#python

As you are just downloading the file and not exporting a file. You should simply use the file.get method and store the response.

from __future__ import print_function

import io

import google.auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload


def download_file(real_file_id):
    """Downloads a file
    Args:
        real_file_id: ID of the file to download
    Returns : IO object with location.

    Load pre-authorized user credentials from the environment.
    TODO(developer) - See https://developers.google.com/identity
    for guides on implementing OAuth2 for the application.
    """
    creds, _ = google.auth.default()

    try:
        # create drive api client
        service = build('drive', 'v3', credentials=creds)

        file_id = real_file_id

        # pylint: disable=maybe-no-member
        request = service.files().get_media(fileId=file_id)
        file = io.BytesIO()
        downloader = MediaIoBaseDownload(file, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
            print(F'Download {int(status.progress() * 100)}.')

    except HttpError as error:
        print(F'An error occurred: {error}')
        file = None

    return file.getvalue()


if __name__ == '__main__':
    download_file(real_file_id='1KuPmvGq8yoYgbfW74OENMCB5H0n_2Jm9')
  • Related