-
Notifications
You must be signed in to change notification settings - Fork 0
/
picasa.py
102 lines (79 loc) · 3.87 KB
/
picasa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import argparse
import re
import os
import urllib.request
import urllib.error
from multiprocessing.dummy import Pool
class Picasa:
ids = None
path = None
storage = None
filePath = None
THREADS_COUNT = 10
ACCOUNT_PREF = 'https://picasaweb.google.com/data/feed/base/user/'
ALBUM_PREF = 'https://get.google.com/albumarchive/pwaf/'
def __init__(self, ids, path=None):
self.ids = ids
self.path = path
def download(self):
for id in self.ids:
url = self.ACCOUNT_PREF + id
accoutHtml = urllib.request.urlopen(url).read().decode('utf-8')
albumIds = re.findall('<id>.*?' + id + '\/albumid\/(\d*)', accoutHtml)
if not albumIds:
print(id + ' - No album was found in this account!')
return
for albumId in albumIds:
albumName = re.search(albumId + '.*?' + id + '\/(.*?)\">', accoutHtml).group(1)
userName = re.search('<name>(.*?)</name>', accoutHtml).group(1)
self.filePath = os.path.join(self.path if self.path else '', userName + ' (' + id + ')', albumName)
os.makedirs(self.filePath, exist_ok=True)
albumUrl = url + '/albumid/' + albumId
html = urllib.request.urlopen(albumUrl).read().decode('utf-8')
photoId = re.search('<id>.*?\/\d*\/albumid\/\d*\/photoid\/(\d*)', html)
if photoId == None:
print(albumUrl + ' - No media was found!')
continue
albumByPhotoUrl = self.ALBUM_PREF + id + '/album/' + albumId +'/photo/' + photoId.group(1)
self.download_album(albumByPhotoUrl)
def download_album(self, albumUrl):
html = urllib.request.urlopen(albumUrl).read().decode('utf-8')
photoList = re.findall(',\[?\[\[".*?",".*?",\d*,\d*,.*?\].*?"\d{19}".*?".*?".*?"\d{9}":\[.*?,\[\[.*?\]', html, re.DOTALL)
if not photoList:
print(albumUrl + ' - No media was found!')
return
pool = Pool(self.THREADS_COUNT)
pool.map(self.download_file, photoList)
pool.close()
pool.join()
print(albumUrl + ' - OK!')
def download_file(self, photoHtml):
photoObject = re.search('\[\[".*?","(.*?)",(\d*),(\d*),(.*?)\].*?"\d{19}".*?"(.*?)"', photoHtml, re.DOTALL)
fileUrl = photoObject.group(1) + '=' + 'h' + photoObject.group(2) + '-w' + photoObject.group(3) + '-no'
fileName = photoObject.group(5).replace('\\', '_').replace('/', '_')
fileName = fileName + '.jpg' if fileName.find('.') == -1 else fileName
videoUrls = re.findall('url.*?(lh3\.googleusercontent.*?m(\d\d)).*?itag', photoHtml)
try:
if videoUrls:
# find biggest video
bigSize = 0
quality = 0
sizes = re.findall('(\d{1,2})\/(\d*)x\d*', photoHtml)
for size in sizes:
if int(size[1]) > bigSize:
bigSize = int(size[1])
quality = int(size[0])
for videoUrl in videoUrls:
if int(videoUrl[1]) == quality:
fileUrl = 'https://' + videoUrl[0].replace('%2F','/').replace('%3D','=')
urllib.request.urlretrieve(fileUrl, os.path.join(self.filePath, fileName))
urllib.request.urlretrieve(fileUrl, os.path.join(self.filePath, fileName))
except urllib.error.URLError as e:
print("Can't receive " + fileName + " '" + fileUrl + "' with " + e.reason)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--ids', help="List of IDs", nargs='+', required=True)
parser.add_argument('-p', '--path', help="Destination folder")
args = parser.parse_args()
downloader = Picasa(args.ids, args.path)
downloader.download()