-
Notifications
You must be signed in to change notification settings - Fork 0
/
libsys_drive.py
131 lines (110 loc) · 4.08 KB
/
libsys_drive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from oauth2client.service_account import ServiceAccountCredentials
from pathlib import Path
import httplib2
import io
import os
import sys
DOWNLOADED_FILES_PATH = os.environ.get("DOWNLOADED_FILES_PATH") or "."
SERVICE_ACCOUNT_EMAIL = (
os.environ.get("SERVICE_ACCOUNT_EMAIL")
or "libsys@sul-libsys-files.iam.gserviceaccount.com"
)
SERVICE_ACCOUNT_PKCS12_FILE_PATH = (
os.environ.get("SERVICE_ACCOUNT_PKCS12_FILE_PATH") or "cert/sul-libsys-files.p12"
)
class LibSysDrive:
def __init__(self, args):
self.args = args
folder_name = args[1]
try:
download_directory = args[2]
except IndexError:
download_directory = DOWNLOADED_FILES_PATH
self.gDrive = self.service()
self.download(folder_name, download_directory)
def service(self):
credentials = ServiceAccountCredentials.from_p12_keyfile(
SERVICE_ACCOUNT_EMAIL,
SERVICE_ACCOUNT_PKCS12_FILE_PATH,
"notasecret",
scopes=[
"https://www.googleapis.com/auth/drive",
],
)
http = httplib2.Http()
http = credentials.authorize(http)
return build("drive", "v3", http=http)
def folder_id(self, folder_name):
folder_results = (
self.gDrive.files()
.list(
fields="nextPageToken, files(id, name)",
q="mimeType = 'application/vnd.google-apps.folder'",
)
.execute()
)
"""
Loop through all the folders and find the folder id
of the folder name provided from the command line
"""
folders = folder_results.get("files", [])
folder_id = None
if not folders:
sys.exit("No folders found.")
else:
for folder in folders:
if folder["name"] == folder_name:
folder_id = folder["id"]
# print("Found: %s " % folder_name, ": %s" % folder_id)
if folder_id is None:
sys.exit("No folders found named %s." % folder_name)
return folder_id
def file_ids(self, folder_name):
"""
Find all the files stored in the found folder id
"""
folder = self.folder_id(folder_name)
file_results = (
self.gDrive.files()
.list(
fields="nextPageToken, files(id, name, parents)",
q=f"'{folder}' in parents",
)
.execute()
)
files = file_results.get("files", [])
if not files:
sys.exit("No files found in Google drive folder %s." % folder_name)
else:
return files
def download(self, folder_name, download_directory):
items = self.file_ids(folder_name)
for item in items:
downloaded = os.listdir(DOWNLOADED_FILES_PATH)
if not item["id"] in downloaded:
try:
"""
print(
"Name: %s," % item['name'],
"File ID: %s, " % item['id'],
"Folder ID: %a" % item['parents']
)
"""
request = self.gDrive.files().export_media(
fileId=item["id"], mimeType="text/plain"
)
fh = io.FileIO("%s/%s" % (download_directory, item["name"]), "wb")
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
# print("Download %d%%." % int(status.progress() * 100))
Path("%s/%s" % (DOWNLOADED_FILES_PATH, item["id"])).touch()
except Exception as error:
sys.exit(
"Name: %s, File ID: %s: %s" % (item["name"], item["id"], error)
)
if __name__ == "__main__":
LibSysDrive(sys.argv)