-
Notifications
You must be signed in to change notification settings - Fork 0
/
twitchloader.py
executable file
·360 lines (317 loc) · 13.2 KB
/
twitchloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
import os
import sys
import pathlib
import datetime
import configargparse
import yaml
from pyfiglet import Figlet
import colorama
from twitch import TwitchClient
import requests
from fuzzywuzzy import process
import youtube_dl
__version__ = "0.1"
__url__ = "https://github.com/Quoorex/twitchloader"
# TODO Add threading or multiprocessing
# (Twitch API has a ratelimit though,
# but the API is only used to gather the links in the beginning anyway)
# TODO Fix rename randomly creating folders with parts of video titles as their name
class Twitchloader:
def __init__(self):
self.conf = self.init_parser()
client_id = self.conf.client_id
self.client = TwitchClient(client_id=client_id)
self.video_url_base = "https://www.twitch.tv/videos/"
self.api_base = "https://api.twitch.tv/kraken/"
self.headers = {
"Accept": "application/vnd.twitchtv.v5+json",
"Client-ID": client_id,
}
def init_parser(self):
ydl_options_default = {
"format": "best",
"outtmpl": "%(download_dir)s/%(uploader)s/%(collection_name)s/%(video_index)s - %(title)s.%(ext)s",
"urls_outtmpl": "%(download_dir)s/%(uploader)s/%(title)s.%(ext)s",
}
self.parser = configargparse.ArgParser(
config_file_parser_class=configargparse.YAMLConfigFileParser,
default_config_files=["config.yaml"],
description="Download VODs and complete video collections from Twitch.tv using youtube-dl.",
)
self.parser.add_argument(
"-c",
"--config-path",
is_config_file=True,
dest="config-path",
help="path to the config file",
)
self.parser.add_argument(
"-C",
"--channels",
dest="channels",
nargs="+",
type=yaml.safe_load,
help="names of the channels to get the collections of",
)
self.parser.add_argument(
"-t",
"--client-id",
dest="client_id",
help="Twitch client ID needed to access the API (get one on https://dev.twitch.tv/)",
)
self.parser.add_argument(
"--collection-ids",
dest="collection_ids",
type=yaml.safe_load,
nargs="+",
help="ids of the collections to process",
)
self.parser.add_argument(
"--show-collections",
action="store_true",
dest="show_collections",
help="show the collections of the channels",
)
self.parser.add_argument(
"--save-urls",
action="store_true",
dest="save_urls",
help="save the urls of the videos in separated folders instead of downloading them (for manual use with the '-a' youtube-dl option",
)
self.parser.add_argument(
"-o",
"--output-dir",
dest="output_dir",
default="downloads",
type=str,
help="Path to where the files will be saved",
)
self.parser.add_argument(
"-y",
"--ydl-options",
dest="ydl_options",
type=yaml.safe_load,
default=ydl_options_default,
help="Youtube-DL options (https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312)",
)
self.parser.add_argument(
"-u",
"--urls",
dest="urls",
type=yaml.safe_load,
nargs="+",
help="URLs of the videos to download",
)
self.parser.add_argument(
"--rename-existing",
dest="rename_existing",
type=bool,
help="Rename exsting files",
)
self.parser.add_argument(
"--rename-outtmpl",
dest="rename_outtmpl",
type=str,
help="Youtube-DL compatible output template for renaming files",
)
self.parser.add_argument(
"--match-ratio",
dest="match_ratio",
type=str,
help="Percent ratio of how similar the name of an existing file has to be to be considered for the rename",
)
return self.parser.parse()
def print_banner(self):
colorama.init()
figlet = Figlet(font="speed")
print(
colorama.Fore.MAGENTA
+ colorama.Back.BLACK
+ figlet.renderText("Twitchloader")
)
print(colorama.Back.RESET + "Download Twitch videos with ease")
print(colorama.Style.RESET_ALL)
def print_figlet(self, font, text):
figlet = Figlet(font=font)
print(colorama.Fore.MAGENTA + figlet.renderText(text))
print(colorama.Style.RESET_ALL)
def channel_search(self, query):
result = self.client.search.channels(query, 1)[0]
result_name = result["name"]
print(f"Found channel '{result_name}' for query '{query}'")
return result
def get_collections(self, channel_id):
params = {"limit": 100}
request_url = self.api_base + f"channels/{channel_id}/collections"
collections_response = requests.get(
request_url, headers=self.headers, params=params
).json()
collections = collections_response["collections"]
if (
collections_response["_cursor"] is not None
): # Not all collections could be fetched with one request
params["cursor"] = collections_response["_cursor"]
while params["cursor"] is not None:
r = requests.get(
request_url, headers=self.headers, params=params
).json()
collections.extend(r["collections"])
params["cursor"] = r["_cursor"]
return collections
def gather_links(self, collections):
collections_dict = {}
video_count = 0
for collection in collections:
videos = self.client.collections.get(
collection["_id"], include_all_items=True
)
video_urls = []
for video in videos:
video_url = self.video_url_base + video["item_id"]
video_urls.append(video_url)
collections_dict[collection["_id"]] = [collection, video_urls]
video_count += collection["items_count"]
print(f"Found a total of {video_count} videos")
return collections_dict
def save_urls(self, collections_dict):
self.print_figlet("standard", "Saving all urls")
download_dir = self.conf.output_dir
for collection_id in collections_dict.keys():
collection_item, video_urls = collections_dict[collection_id]
collection_name = collection_item["title"]
uploader = collection_item["owner"]["name"]
# Declare the download path and create it if necessary
download_path = os.path.abspath(
f"{download_dir}/{uploader}/{collection_name}/"
)
pathlib.Path(download_path).mkdir(parents=True, exist_ok=True)
with open(os.path.join(download_path, "urls.txt"), "w") as f:
for video_url in video_urls:
f.write(video_url + "\n")
def process_outtmpl(self, outtmpl, download_dir, collection_name, video_index):
"""
Replaces some parts of the outtmpl, that are specific to this program and youtube-dl therefore isn't able to handle
"""
return (
outtmpl.replace("%(download_dir)s", download_dir)
.replace("%(collection_name)s", collection_name)
.replace("%(video_index)s", str(video_index))
)
def rename_existing(self, video_url, download_dir, collection_name, video_index):
outtmpl = self.conf.rename_outtmpl
with youtube_dl.YoutubeDL(self.conf.ydl_options) as ydl:
info_dict = ydl.extract_info(video_url, download=False)
# In some cases video do not have a upload date.
if info_dict["timestamp"] is None:
outtmpl = outtmpl.replace("%(upload_date)s", "")
filepath = (
self.process_outtmpl(outtmpl, download_dir, collection_name, video_index)
% info_dict
)
path, filename = os.path.split(filepath)
if not (os.path.exists(path)):
os.makedirs(path)
match = process.extractOne(filename, os.listdir(path))
if not match:
return False
if match[1] >= int(self.conf.match_ratio):
match_filepath = os.path.join(path, match[0])
save_path = os.path.join(path, filename)
os.rename(match_filepath, save_path)
return True # rename took place
def download_collection(self, collections_dict):
"""
Downloads a complete collection of videos
"""
ydl_options = self.conf.ydl_options
original_outtmpl = ydl_options["outtmpl"]
self.print_figlet("standard", "Starting the collection downloads")
download_dir = self.conf.output_dir
for collection_id in collections_dict.keys():
collection_item, video_urls = collections_dict[collection_id]
collection_name = collection_item["title"]
for video_url in video_urls:
video_index = (
video_urls.index(video_url) + 1
) # Add 1 because lists start at 0
ydl_options["outtmpl"] = self.process_outtmpl(
original_outtmpl, download_dir, collection_name, video_index
)
if self.conf.rename_existing is True:
if (
self.rename_existing(
video_url, download_dir, collection_name, video_index
)
is True
):
continue # File exists and was renamed; programm can continue with the next video
with youtube_dl.YoutubeDL(ydl_options) as ydl:
try:
ydl.download([video_url])
except KeyboardInterrupt:
print("\nUser interrupted the program, stopping ...")
sys.exit(1)
def download(self, video_urls):
"""
Downloads individual videos from a list of URLs
"""
ydl_options = self.conf.ydl_options
self.print_figlet("standard", "Starting the video downloads")
download_dir = self.conf.output_dir
ydl_options["outtmpl"] = ydl_options["urls_outtmpl"].replace(
"%(download_dir)s", download_dir
)
for video_url in video_urls:
if self.conf.rename_existing is True:
if self.rename_existing(video_url, download_dir, "", "") is True:
continue # File exists and was renamed; programm can continue with the next video
with youtube_dl.YoutubeDL(ydl_options) as ydl:
try:
ydl.download([video_url])
except KeyboardInterrupt:
print("\nUser interrupted the program, stopping ...")
sys.exit(1)
def run(self):
self.print_banner()
collection_ids = self.conf.collection_ids
collections_dict = dict()
if collection_ids is not None:
self.print_figlet("standard", f"Processing collection IDs")
collections = []
for c_id in collection_ids:
# Get additonal information about that collection
request_url = self.api_base + f"collections/{c_id}"
collection_item = requests.get(request_url, headers=self.headers).json()
if "error" in collection_item.keys():
print(f"Collection with the ID '{c_id}' does not exist")
continue
else:
collections.append(collection_item)
collections_dict = self.gather_links(collections)
elif self.conf.channels: # Only channel names are supplied
for channel_name in self.conf.channels:
self.print_figlet("standard", f"Processing: {channel_name}")
channel_query_result = self.channel_search(channel_name)
channel = self.client.channels.get_by_id(channel_query_result["id"])
collections = self.get_collections(channel["id"])
if self.conf.show_collections is True:
# Only show available collections
for collection in collections:
print(f"{collection['title']} - {collection['_id']}")
sys.exit(1)
else:
# download the collections
collections_dict = self.gather_links(collections)
# Single URLs are given
if self.conf.urls:
self.download(self.conf.urls)
# Collection IDs are given
if self.conf.save_urls is True:
self.save_urls(collections_dict)
elif len(collections_dict) > 0:
self.download_collection(collections_dict)
def main():
twitchloader = Twitchloader()
twitchloader.run()
if __name__ == "__main__":
main()