Home > Net >  google drive file upload with aiohttp in Python
google drive file upload with aiohttp in Python

Time:05-27

I am trying to make a function that upload files to my google drive.

The below code uploads a single file with requests.

import requests
import json

with open('storage.json', 'r') as file:
    token = json.load(file)
    token = token["access_token"]

url = "https://www.googleapis.com/upload/drive/v3/files"
file_metadata = {"name": "test.jpg",
                 "parents": [],
                 }
data = {
    "MetaData": (
        "metadata",
        json.dumps(file_metadata),
        "application/json; charset=UTF-8",
    ),
    "Media": open("test2.jpg", "rb"),
}

headers = {"Authorization": "Bearer {}".format(token)}
params = {"uploadType": "multipart"}

res = requests.post(url, files=data, params=params, headers=headers)
print(res.text)

Now, I want to make a async function with aiohttp. However, I do not know how to do so. Below is my current code, but it gives aiohttp.payload.LookupError.

async def upload_file(session, file_path, folder_id):
    '''
    * uploads a single file to the designated folder
    * args:
    - session: aiohttp session
    - file_path : absolute path of a file (e.g.) C:\Git\GoogleDriveAPI\test2.jpg
    - folder_id: folder id of the designated folder in google drive (e.g.) '1Q6gaU4kHaLRN5psS4S_2Yx_*******'
    '''
    file_name = file_path.split(os.path.sep)[-1]
    url = "https://www.googleapis.com/upload/drive/v3/files"
    file_metadata = {"name": file_name,
                     "parents": [folder_id],
                     }
    data = {
        "MetaData": (
            "metadata",
            json.dumps(file_metadata),
            "application/json; charset=UTF-8",
        ),
        "Media": open(file_path, "rb"),
    }
    global token
    headers = {"Authorization": "Bearer {}".format(token)}
    params = {"uploadType": "multipart"}
    async with session.post(url, data=data, params=params, headers=headers) as resp:
        print(resp.text)

Here's my full code in case you need it to experiment.

import os
import sys
import argparse
import asyncio
import json

import aiohttp
from aiohttp import web

import googleapiclient.errors
from googleapiclient.discovery import build

from httplib2 import Http
from oauth2client import file, client, tools
from tqdm import tqdm


DEFAULT_CONCUR_REQ = 20
MAX_CONCUR_REQ = 1000


def get_token():
    '''
    * authorize access to user's google drive and return access token
    * access information is stored as 'storage.json'
    '''
    SCOPES = 'https://www.googleapis.com/auth/drive.file'
    store = file.Storage('storage.json')
    creds = store.get()
    if not creds or creds.invalid:
        print("make new storage data file ")
        flow = client.flow_from_clientsecrets('client_secret_drive.json', SCOPES)
        creds = tools.run_flow(flow, store)
    build('drive', 'v3', http=creds.authorize(Http()))
    with open('storage.json', 'r') as f:
        creds = json.load(f)
        token = creds["access_token"]
    return token


async def upload_file(session, file_path, folder_id):
    '''
    * uploads a single file to the designated folder
    * args:
    - session: aiohttp session
    - file_path : absolute path of a file (e.g.) C:\Git\GoogleDriveAPI\test2.jpg
    - folder_id: folder id of the designated folder in google drive (e.g.) '1Q6gaU4kHaLRN5psS4S_2Yx_*******'
    '''
    file_name = file_path.split(os.path.sep)[-1]
    url = "https://www.googleapis.com/upload/drive/v3/files"
    file_metadata = {"name": file_name,
                     "parents": [folder_id],
                     }
    data = {
        "MetaData": (
            "metadata",
            json.dumps(file_metadata),
            "application/json; charset=UTF-8",
        ),
        "Media": open(file_path, "rb"),
    }
    global token
    headers = {"Authorization": "Bearer {}".format(token)}
    params = {"uploadType": "multipart"}
    async with session.post(url, data=data, params=params, headers=headers) as resp:
        print(resp.text)

async def upload_files(file_paths, folder_id):
    async with aiohttp.ClientSession() as session:
        jobs = [upload_file(session, file_path, folder_id) for file_path in file_paths]
        jobs = asyncio.as_completed(jobs)
        for job in jobs:
            await job

def main():
    folder = r'C:\Git\GoogleDriveAPI\test2'
    folder_id = None
    files = os.listdir(folder)
    file_paths = [os.path.join(folder, file) for file in files]
    loop = asyncio.get_event_loop()
    loop.run_until_complete(upload_files(file_paths, folder_id))



if __name__ == '__main__':
    # parser = argparse.ArgumentParser(
    #     description='Upload folder including all sub-folders to google drive.')
    # parser.add_argument('folder_path',
    #                     help='folder_path: local folder path to upload'
    #                          'e.g. C:\Git\PytorchBasic')
    # parser.add_argument('folder_id',
    #                     help='folder_id: target folder\'s id in google drive'
    #                          'e.g. 1FzI5QChbh4Q-nEQGRu8D-********')
    # args = parser.parse_args()
    # if not os.path.isdir(args.folder_path):
    #     print('*** Folder path error: invalid path')
    #     parser.print_usage()
    #     sys.exit(1)
    # folder_path = args.folder_path
    # folder_id = args.folder_id
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
    token = get_token()
    main()




error

C:\VirtualEnv\basic\Scripts\python.exe C:/Git/GoogleDriveAPI/googledriveapi_async.py
Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 510, in update_body_from_data
    body = payload.PAYLOAD_REGISTRY.get(body, disposition=None)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 145, in _gen_form_data
    part = payload.get_payload(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 71, in get_payload
    return PAYLOAD_REGISTRY.get(data, *args, **kwargs)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 103, in <module>
    main()
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 81, in main
    loop.run_until_complete(upload_files(file_paths, folder_id))
  File "C:\Users\chsze\AppData\Local\Programs\Python\Python38\lib\asyncio\base_events.py", line 616, in run_until_complete
    return future.result()
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 73, in upload_files
    await job
  File "C:\Users\chsze\AppData\Local\Programs\Python\Python38\lib\asyncio\tasks.py", line 619, in _wait_for_one
    return f.result()  # May raise f.exception().
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 65, in upload_file
    async with session.post(url, data=data, params=params, headers=headers) as resp:
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 1138, in __aenter__
    self._resp = await self._coro
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 507, in _request
    req = self._request_class(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 313, in __init__
    self.update_body_from_data(data)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 512, in update_body_from_data
    body = FormData(body)()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 170, in __call__
    return self._gen_form_data()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 149, in _gen_form_data
    raise TypeError(
TypeError: Can not serialize value type: <class 'tuple'>
 headers: {}
 value: ('metadata', '{"name": "COCO_train2014_000000000064.jpg", "parents": [null]}', 'application/json; charset=UTF-8')
Task exception was never retrieved
future: <Task finished name='Task-4' coro=<upload_file() done, defined at C:/Git/GoogleDriveAPI/googledriveapi_async.py:41> exception=TypeError('Can not serialize value type: <class \'tuple\'>\n headers: {}\n value: (\'metadata\', \'{"name": "COCO_train2014_000000000061.jpg", "parents": [null]}\', \'application/json; charset=UTF-8\')')>
Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 510, in update_body_from_data
    body = payload.PAYLOAD_REGISTRY.get(body, disposition=None)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 145, in _gen_form_data
    part = payload.get_payload(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 71, in get_payload
    return PAYLOAD_REGISTRY.get(data, *args, **kwargs)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 65, in upload_file
    async with session.post(url, data=data, params=params, headers=headers) as resp:
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 1138, in __aenter__
    self._resp = await self._coro
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 507, in _request
    req = self._request_class(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 313, in __init__
    self.update_body_from_data(data)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 512, in update_body_from_data
    body = FormData(body)()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 170, in __call__
    return self._gen_form_data()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 149, in _gen_form_data
    raise TypeError(
TypeError: Can not serialize value type: <class 'tuple'>
 headers: {}
 value: ('metadata', '{"name": "COCO_train2014_000000000061.jpg", "parents": [null]}', 'application/json; charset=UTF-8')
Task exception was never retrieved
future: <Task finished name='Task-3' coro=<upload_file() done, defined at C:/Git/GoogleDriveAPI/googledriveapi_async.py:41> exception=TypeError('Can not serialize value type: <class \'tuple\'>\n headers: {}\n value: (\'metadata\', \'{"name": "COCO_train2014_000000000049.jpg", "parents": [null]}\', \'application/json; charset=UTF-8\')')>
Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 510, in update_body_from_data
    body = payload.PAYLOAD_REGISTRY.get(body, disposition=None)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 145, in _gen_form_data
    part = payload.get_payload(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 71, in get_payload
    return PAYLOAD_REGISTRY.get(data, *args, **kwargs)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\payload.py", line 118, in get
    raise LookupError()
aiohttp.payload.LookupError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:/Git/GoogleDriveAPI/googledriveapi_async.py", line 65, in upload_file
    async with session.post(url, data=data, params=params, headers=headers) as resp:
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 1138, in __aenter__
    self._resp = await self._coro
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client.py", line 507, in _request
    req = self._request_class(
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 313, in __init__
    self.update_body_from_data(data)
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\client_reqrep.py", line 512, in update_body_from_data
    body = FormData(body)()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 170, in __call__
    return self._gen_form_data()
  File "C:\VirtualEnv\basic\lib\site-packages\aiohttp\formdata.py", line 149, in _gen_form_data
    raise TypeError(
TypeError: Can not serialize value type: <class 'tuple'>
 headers: {}
 value: ('metadata', '{"name": "COCO_train2014_000000000049.jpg", "parents": [null]}', 'application/json; charset=UTF-8')

Process finished with exit code 1

CodePudding user response:

When I saw your script, I thought that in order to request multipart/form-data using your script, it is required to create the request body. I thought that this might be the reason for your issue. When this is reflected in your script it becomes as follows.

From:

data = {
    "MetaData": (
        "metadata",
        json.dumps(file_metadata),
        "application/json; charset=UTF-8",
    ),
    "Media": open(file_path, "rb"),
}
global token
headers = {"Authorization": "Bearer {}".format(token)}
params = {"uploadType": "multipart"}
async with session.post(url, data=data, params=params, headers=headers) as resp:
    print(resp.text)

To:

data = aiohttp.FormData()
data.add_field(
    "metadata",
    json.dumps(file_metadata),
    content_type="application/json; charset=UTF-8",
)
data.add_field("file", open(file_path, "rb"))
global token
headers = {"Authorization": "Bearer {}".format(token)}
params = {"uploadType": "multipart"}
async with session.post(url, data=data, params=params, headers=headers) as resp:
    r = await resp.json()
    print(r)

Testing:

When this modified script is run, the following result is obtained.

{'kind': 'drive#file', 'id': '###', 'name': '###', 'mimeType': '###'}
,
,
,

Note:

  • This modified script supposes that your accesss token can be used for uploading the file to Google Drive. Please be careful about this.

Reference:

  • Related