Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change the code related to pydrive and use google-api instead #2254

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,11 @@ test = [
'rundoc>=0.4.3,<0.5',
'pytest-runner >= 2.11.1',
'tomli>=2.0.0,<3',
'pydrive',
'google-api-python-client',
'google-auth',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this and google-auth-httplib2 already seem to be required by the python client

'google-auth-oauthlib',
'google-auth-httplib2',
'requests',
'pyarrow',
'gitpython',
'slack-sdk>=3.23,<4.0',
Expand Down
136 changes: 75 additions & 61 deletions tests/_external/gdrive_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@
import io
import json
import os
import pathlib
import tempfile
from datetime import date

import git
import pandas as pd
import yaml
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload, MediaIoBaseUpload

SCOPES = ['https://www.googleapis.com/auth/drive']
PYDRIVE_CREDENTIALS = 'PYDRIVE_CREDENTIALS'


Expand All @@ -25,53 +26,38 @@ def _generate_filename():


def _get_drive_client():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor: should we rename to _get_drive_service?

tmp_credentials = os.getenv(PYDRIVE_CREDENTIALS)
if not tmp_credentials:
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
else:
with tempfile.TemporaryDirectory() as tempdir:
credentials_file_path = pathlib.Path(tempdir) / 'credentials.json'
credentials_file_path.write_text(tmp_credentials)

credentials = json.loads(tmp_credentials)

settings = {
'client_config_backend': 'settings',
'client_config': {
'client_id': credentials['client_id'],
'client_secret': credentials['client_secret'],
},
'save_credentials': True,
'save_credentials_backend': 'file',
'save_credentials_file': str(credentials_file_path),
'get_refresh_token': True,
}
settings_file = pathlib.Path(tempdir) / 'settings.yaml'
settings_file.write_text(yaml.safe_dump(settings))

gauth = GoogleAuth(str(settings_file))
gauth.LocalWebserverAuth()

return GoogleDrive(gauth)
tmp_credentials = os.getenv('PYDRIVE_CREDENTIALS')
creds = None
if tmp_credentials:
credentials = json.loads(tmp_credentials)
creds = Credentials.from_authorized_user_info(credentials, SCOPES)
Comment on lines +32 to +33
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor: maybe we rename the first one to credentials_json and the other to credentials or something

if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file('client_secrets.json', SCOPES)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this file doesn't exist does it? Will this block ever work?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh sorry this was from my local. I will remove it

creds = flow.run_local_server(port=0)
with open('token.json', 'w') as token:
token.write(creds.to_json())

service = build('drive', 'v3', credentials=creds)
return service


def get_latest_file(folder_id):
"""Get the latest file from the given Google Drive folder.
"""Get the latest file from the given Google Drive folder."""
service = _get_drive_client()

Args:
folder (str):
The string Google Drive folder ID.
"""
drive = _get_drive_client()
drive_query = drive.ListFile({
'q': f"'{folder_id}' in parents and trashed=False",
'orderBy': 'modifiedDate desc',
'maxResults': 1,
})
file_list = drive_query.GetList()
if len(file_list) > 0:
return file_list[0]
query = f"'{folder_id}' in parents and trashed = false"
results = (
service.files()
.list(q=query, orderBy='modifiedTime desc', pageSize=1, fields='files(id, name)')
.execute()
)

files = results.get('files', [])
if files:
return files[0]


def read_excel(file_id):
Expand All @@ -87,11 +73,33 @@ def read_excel(file_id):
each sheet

"""
client = _get_drive_client()
drive_file = client.CreateFile({'id': file_id})
xlsx_mime = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
drive_file.FetchContent(mimetype=xlsx_mime)
return pd.read_excel(drive_file.content, sheet_name=None)
service = _get_drive_client()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we be calling service.close() when we're done with it?


# Get file metadata to check mimeType
file_metadata = service.files().get(fileId=file_id, fields='mimeType').execute()
mime_type = file_metadata.get('mimeType')

if mime_type == 'application/vnd.google-apps.spreadsheet':
# If it's a Google Sheet, export it to XLSX
request = service.files().export_media(
fileId=file_id,
mimeType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
)
else:
# If it's already an XLSX or other binary format, download it directly
request = service.files().get_media(fileId=file_id)

# Download file content
file_io = io.BytesIO()
downloader = MediaIoBaseDownload(file_io, request)
done = False
while not done:
_, done = downloader.next_chunk()

file_io.seek(0) # Reset stream position

# Load the file content into pandas
return pd.read_excel(file_io, sheet_name=None)


def _set_column_width(writer, results, sheet_name):
Expand Down Expand Up @@ -126,15 +134,21 @@ def save_to_gdrive(output_folder, results, output_filename=None):
output_filename = _generate_filename()

output = io.BytesIO()

with pd.ExcelWriter(output, engine='xlsxwriter') as writer: # pylint: disable=E0110
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
for sheet_name, data in results.items():
data.to_excel(writer, sheet_name=sheet_name, index=False)
_set_column_width(writer, data, sheet_name)

file_config = {'title': output_filename, 'parents': [{'id': output_folder}]}
drive = _get_drive_client()
drive_file = drive.CreateFile(file_config)
drive_file.content = output
drive_file.Upload({'convert': True})
return drive_file['id']
output.seek(0)

file_metadata = {
'name': output_filename,
'parents': [output_folder],
'mimeType': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
}

service = _get_drive_client()
media = MediaIoBaseUpload(output, mimetype=file_metadata['mimeType'], resumable=True)

file = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
return file.get('id')
Loading