-
Notifications
You must be signed in to change notification settings - Fork 0
/
client.py
499 lines (435 loc) · 16.8 KB
/
client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
import os
import pandas as pd
from typing import Union, Tuple
import logging
import logging.config
logger = logging.getLogger(__name__)
from .constants import Teams
from .statsapi import Game, Player
from .statcast import Statcast, _DEFAULT_SORT
from .filmroom import FilmRoom
from .compilation import Compilation
from .youtube import YouTube
from .utils import _PURGE_SUBFOLDERS
from .utils import get_video_info
from .analysis.umpire_calls import get_ump_calls
from .analysis.delta_win_exp import get_pitcher_batter_delta_win_exp
from .analysis.pitch_movement import get_pitch_movement
_ANALYSIS_DICT = {
"umpire_calls": get_ump_calls,
"pitcher_batter_delta_win_exp": get_pitcher_batter_delta_win_exp,
"pitch_movement": get_pitch_movement,
}
class MLBVideoClient:
def __init__(
self,
project_name: str,
project_path: str,
statcast_params: dict = None,
game_info: bool = False,
player_info: bool = False,
team_info: bool = False,
analysis: list = None,
queries: list = None,
steps: list = None,
search_filmroom: bool = False,
filmroom_params: dict = {},
build_compilation: bool = False,
compilation_params: dict = {},
youtube_upload: bool = False,
youtube_params: dict = {},
purge_files: bool = False,
):
"""MLB Video Client - handles end-to-end
If statcast_params are not passed, all downstream params are stored, but not performed
The client will not fail, but will simply remind you to add params and re-initialize OR
apply downstream functions manually
Parameters
----------
project_name : str
used for storing project files in specific project dir
project_path : str
local path to store project files
statcast_params : dict
dictionary of params to pass to statcast API
game_info (bool, optional): bool, default False
Expand data model to StatsAPI and gather game info_
player_info (bool, optional): bool, default False
Expand data model to MLB.com and gather player info
team_info (bool, optional): bool, default False
Expand team info to include abbreviations, hashtags, etc.
analysis (list, optional): list, default None
List of analysis functions to apply, transforming dataframe
ex. `["umpire_calls","pitch_movement"]`
queries (list, optional): list, default None
List of queries to apply to dataframe
Each row represents string passed to df.query method
steps (list, optional): list, default None
Each element `step` in steps represents a query or rank method applied
search_filmroom (bool, optional): bool, default False
Perform search on MLB Film Room site, finding video for each record in dataframe
filmroom_params (dict, optional): dict, default {}
Params to pass to MLB Film Room class (feed, download(bool), etc.)
build_compilation (bool, optional): bool, default False
Gather clips & consolidate in comp file
compilation_params (dict, optional): dict, default {}
**kwargs for compilation class
youtube_upload (bool, optional): bool, default False
Upload compilation to Youtube
youtube_params (dict, optional): dict, default {}
Parameters for youtube API
ex. video title, description, tags, playlist to add to, privacy, thumbnail, etc.
purge_files (bool, optional): bool, default False
Purge local store of video clips, compilations, etc.
"""
self.project_name = project_name
self.local_path = project_path
self.statcast_params = statcast_params
self.game_info = game_info
self.player_info = player_info
self.team_info = team_info
self.analysis = analysis
self.queries = queries
self.steps = steps
self.search_filmroom = search_filmroom
self.filmroom_params = filmroom_params
self.build_compilation = build_compilation
self.compilation_params = compilation_params
self.youtube_upload = youtube_upload
self.youtube_params = youtube_params
self.purge_files = purge_files
self.missing_videos = []
if self.statcast_params:
self.get_statcast_df()
if game_info:
self.add_game_info()
if player_info:
self.add_player_info()
if team_info:
self.add_team_info()
if analysis:
for mod in self.analysis:
self.transform_statcast(mod)
if queries:
self._perform_queries()
if steps:
self._perform_steps()
if self.search_filmroom:
self._get_filmroom_videos(params=self.filmroom_params)
if build_compilation:
self.create_compilation()
if youtube_upload:
self.upload_youtube()
if purge_files:
self.purge_project_media()
else:
logging.warning(
f"No statcast params passed - all downstream functions have been ignored."
)
pass
def get_statcast_df(self, statcast_params: dict = None):
"""Get Statcast DF
Parameters
----------
statcast_params (dict, optional): dict, default None
dictionary of new statcast params to set
if none, will use self.statcast_params?
"""
if not statcast_params and self.statcast_params:
statcast_params = self.statcast_params
elif statcast_params:
self.statcast_params = statcast_params
else:
logging.info(f"No statcast params -- try again.")
return
self.df = Statcast(**self.statcast_params).get_df()
def purge_project_media(self):
"""Deletes local store of media files (video, data, etc.)"""
for subfolder in _PURGE_SUBFOLDERS:
del_dir = os.path.join(self.local_path, subfolder)
files = [os.path.join(del_dir, f) for f in os.listdir(del_dir)]
for f in files:
os.remove(f)
logging.info(f"Purged media from project folder..")
def update_df(self, new_df: pd.DataFrame):
"""Sets DF property within client
Parameters
----------
new_df : pd.DataFrame
"""
self.df = new_df
def add_game_info(self):
"""Add game info from the MLB StatsAPI to statcast dataframe
Contains attributes for ballpark, umpire, etc.
"""
game_list = list(set(self.statcast_df["game_pk"].values.tolist()))
logging.info(f"Getting game info for {len(game_list)} game(s)..")
games = Game(game_list)
game_df = games.get_df()
game_df.rename(
columns={c: f"game_{c}" for c in game_df.columns.values if c != "game_pk"}
)
self.df = self.df.merge(game_df, how="left", on="game_pk")
logging.info(f"Added game info.")
def add_player_info(self):
"""Add player info from MLB website to statcast dataframe
Contains personal info, social media links, etc.
"""
player_list = list(
set(self.df["batter"].values.tolist() + self.df["pitcher"].values.tolist())
)
logging.info(f"Getting player info for {len(player_list)} player(s)..")
players = Player(player_list)
player_df = players.get_df()
self.df = self.df.merge(
player_df.rename(
columns={
c: "batter" if c == "id" else f"batter_{c}"
for c in player_df.columns.values
}
),
how="left",
on="batter",
)
self.df = self.df.merge(
player_df.rename(
columns={
c: "pitcher" if c == "id" else f"pitcher_{c}"
for c in player_df.columns.values
}
),
how="left",
on="pitcher",
)
logging.info(f"Added player info.")
def add_team_info(self):
"""Add team info from static file to statcast dataframe
Contains team name abbreviations, hashtags, etc.
"""
team_df = pd.json_normalize([v for _, v in Teams.items()])
self.df = self.df.merge(
team_df.rename(
columns={
c: "home_team" if c == "abbreviation" else f"home_team_{c}"
for c in team_df.columns.values
}
),
how="left",
on="home_team",
)
self.df = self.df.merge(
team_df.rename(
columns={
c: "away_team" if c == "abbreviation" else f"away_team_{c}"
for c in team_df.columns.values
}
),
how="left",
on="away_team",
)
def transform_statcast(self, mod: Union[list, str]):
"""Run each module `(analysis/*)` referenced in class.analysis
Parameters
----------
mod : Union[list, str]
list of str or str with module names
"""
if isinstance(mod, str):
mod = [mod]
for md in mod:
self.df = _ANALYSIS_DICT.get(md)(self.df)
logging.info(f"Transformed DF: {md}")
def _perform_filmroom_search(self, pitch: pd.Series, params: dict) -> Tuple:
"""Performs a filmrooom search for given pitch
Parameters
----------
pitch : pd.Series
row of data from self.df
params : dict
self.filmroom_params
Returns
-------
Tuple
Information about clip if found
"""
try:
clip = FilmRoom(pitch=pitch, local_path=self.local_path, **params)
return clip.get_file_info()
except Exception as e:
logging.warning(f"FilmRoom search failed: {e}\n\n")
return (None, None)
def _get_filmroom_videos(
self, params: dict = {"download": True, "feed": "Optimal"}
):
"""Iterates over members of self.df & performs filmroom search for all
Parameters
----------
params (dict, optional): dict, default {"download": True, "feed": "Optimal"}
self.filmroom params
"""
self.search_filmroom = True
logging.info(f"Starting FilmRoom search for {len(self.df)} pitch(es)..")
self.df[["video_file_name", "video_file_path"]] = self.df.apply(
lambda x: self._perform_filmroom_search(x, params),
axis=1,
result_type="expand",
)
# self.df[
# [
# "video_duration",
# "video_width",
# "video_height",
# "video_fps",
# "video_filesize",
# ]
# ] = self.df.apply(
# lambda x: get_video_info(x["video_file_path"])
# if not pd.isnull(x["video_file_path"])
# else (None, None, None, None, None),
# axis=1,
# result_type="expand",
# )
def sort_df(self, fields: Union[list, str], ascending: Union[list, bool]):
"""Sort dataframe based on multiple fields
Parameters
----------
fields : Union[list, str]
list of cols to sort by
ascending : Union[list, bool]
list of boolean
Raises
------
Exception
If count of fields != ascending, raise exception
"""
if isinstance(fields, str) and isinstance(ascending, bool):
fields = [fields]
ascending = [ascending]
elif isinstance(fields, list) and isinstance(ascending, list):
pass
else:
raise Exception("Mismatch in Parameter Count")
self.df = self.df.sort_values(by=fields, ascending=ascending).reset_index(
drop=True
)
logging.info(f"Sorted df: {fields}")
def query_df(self, query: str):
"""Applies df.query method & resets index
Parameters
----------
query : str
Query string to pass to query function
"""
self.df = self.df.query(query)
self.df = self.df.reset_index(drop=True)
logging.info(f"Applied query to DF: {query}")
def rank_df(
self,
name: str,
fields: Union[list, str],
ascending: Union[list, bool],
group_by: Union[list, str] = None,
keep_sort: bool = False,
):
"""Rank members of dataframe, multi-column, add field repr
Parameters
----------
name : str
Col name to add for rank value
group_by : Union[list, str]
List of columns to groupby
fields : Union[list, str]
List of columns to rank by
ascending : Union[list, bool]
List of boolean
keep_sort (bool, optional): bool, default False
Keep values in same order as before
Raises
------
Exception
If count of fields != ascending, raise exception
"""
if isinstance(group_by, str):
group_by = [group_by]
if isinstance(fields, str) and isinstance(ascending, bool):
fields = [fields]
ascending = [ascending]
elif isinstance(fields, list) and isinstance(ascending, list):
pass
else:
raise Exception("Mismatch in Parameter Count")
self.df = self.df.sort_values(by=fields, ascending=ascending)
self.df[name] = 1
if group_by:
self.df[name] = self.df.groupby(group_by)[name].cumsum()
else:
self.df[name] = self.df[name].cumsum()
if keep_sort:
self._reset_df_sort()
self.df = self.df.reset_index(drop=True)
logging.info(f"Added rank field: {name} to DF")
def _reset_df_sort(self):
"""Reset df sort order
["game_date", "game_pk", "at_bat_number", "pitch_number"]
"""
self.df = self.df.sort_values(by=_DEFAULT_SORT, ascending=True)
def get_df(self) -> pd.DataFrame:
"""Get DataFrame
Returns
-------
pd.DataFrame
"""
return self.df
def _perform_queries(self):
"""Run all pre-defined queries"""
for query in self.queries:
self.query_df(query)
def _perform_steps(self):
"""Run all pre-defined steps"""
for step in self.steps:
if step.get("type") == "query":
self.query_df(**step.get("params"))
elif step.get("type") == "rank":
self.rank_df(**step.get("params"))
elif step.get("type") == "sort":
self.sort_df(**step.get("params"))
self.df = self.df.reset_index(drop=True)
def create_compilation(self):
"""Init Compilation class, generate file"""
self.build_compilation = True
# Callout any missing clip(s)
missing_clips = self.df[self.df["video_file_name"].notnull() == False]
if len(missing_clips) > 0:
logging.warning(
f"Missing clips for following:\n{missing_clips.pitch_id.values.tolist()}"
)
comp = Compilation(
df=self.df[self.df["video_file_name"].notnull() == True],
project_title=self.project_name,
project_path=self.local_path,
**self.compilation_params,
)
self.comp_file = comp.comp_file
def upload_youtube(self, youtube_params: dict = None):
"""Upload compilation to YouTube
Parameters
----------
youtube_params (dict, optional): dict, default None
dictionary of youtube parameters
Raises
------
Exception
_description_
Exception
_description_
"""
if not self.comp_file:
raise Exception("No compilation generated..")
if not youtube_params and not self.youtube_params:
raise Exception("Must pass a valid params dict for YT Upload..")
elif youtube_params and self.youtube_params:
for k, v in youtube_params.items():
self.youtube_params[k] = v
elif youtube_params:
self.youtube_params = youtube_params
self.yt_client = YouTube(file_path=self.comp_file, params=self.youtube_params)