I have many files in my google drive. I want to read it according to it's type like .txt,.pdf,.doc etc. I am able to read it separately, but I want that through the use of if else or any other condition.
Here is the code for read .txt file
file_path=link of google drive
import urllib.request
response = urllib.request.urlopen(file_path)
html = response.read()
text=html.decode('utf8')
print(text)
Here is the code for read .pdf file.
import requests, PyPDF2
url = file_path
response = requests.get(url)
my_raw_data = response.content
with open("my_pdf.pdf", 'wb') as my_data:
my_data.write(my_raw_data)
open_pdf_file = open("my_pdf.pdf", 'rb')
read_pdf = PyPDF2.PdfFileReader(open_pdf_file)
if read_pdf.isEncrypted:
read_pdf.decrypt("")
print(read_pdf.getPage(0).extractText())
else:
print(read_pdf.getPage(0).extractText())
here is code for google drive API
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from apiclient.http import MediaFileUpload
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive']
def main():
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('drive', 'v3', credentials=creds)
# Call the Drive v3 API
results = service.files().list(
pageSize=10, fields="nextPageToken, files(id, name)").execute()
items = results.get('files', [])
if not items:
print('No files found.')
else:
print('Files:')
for item in items:
print(u'{0} ({1})'.format(item['name'], item['id']))
if __name__ == '__main__':
main()
file_metadata = {'name': 'unnamed.jpg'}
media = MediaFileUpload('unnamed.jpg',
mimetype='image/jpeg')
file = service.files().create(body=file_metadata,
media_body=media,
fields='id').execute()
print ('File ID: %s' % file.get('id'))
print(file)
Now I want to give a conditions that if the file type is .txt then then it will read accordingly, if file type is .pdf then it will read pdf like that.
So, how can I find the type of the file and read all type of file in one code through the conditions?
CodePudding user response:
Im not 100% sure i understand what you want to do.
If you want just get back files of a specific mime type then you can use the q parameter for the files.list method to search for files by mimeType
mimeType='application/vnd.google-apps.document'
Something like this
response = service.files().list(q=f"mimeType = 'application/vnd.google-apps.document'",
fields='nextPageToken, files(id, name)'
).execute()
You could also do it with a single file.list request and then detect the mime type of each file. that is returned.
"files": [
{
"kind": "drive#file",
"id": "1x8-vD-XiXEEA5Spf3qp8x2wltablGF22Lpwup8VtxNY",
"name": "Experts Activity Dump go/ExpertsActivities",
"mimeType": "application/vnd.google-apps.spreadsheet"
},
Depending upon the mime type of the file returned you can use a different open method. Just remember there are some file types you will not be able to open in this manner. Any drive types like sheets and docs, you will need to export first and then open them in a format you can handle.