From c6b9bf0595565923729fee070650499d6d8b913f Mon Sep 17 00:00:00 2001 From: Joseph Armstrong <70788681+armstjc@users.noreply.github.com> Date: Mon, 16 Sep 2024 18:28:06 -0400 Subject: [PATCH] 0.2.4 The "Speedy" Update - Refactored `cfbd_json_py.games.get_cfbd_player_game_stats()`, `cfbd_json_py.plays.get_cfbd_pbp_play_types()`, and `cfbd_json_py.players.get_cfbd_player_season_stats()` to use a significantly faster process to parse player stats. - Changed `print()` statements into `logging.info()` statements for `cfbd_json_py.games.get_cfbd_player_advanced_game_stats()` - For `cfbd_json_py.metrics.get_cfbd_predicted_ppa_from_down_distance()`, a `logging.warn()` call is now a `logging.warning()` call due to a pending deprecation of `logging.warn()`. - Fixed an issue found in `cfbd_json_py.players.get_cfbd_pbp_stats()` where the function would warn the user about an issue that the user should not have triggered. - Removed `tqdm` integration with `cfbd_json_py.plays.get_cfbd_pbp_play_types()`, `cfbd_json_py.rankings.get_cfbd_poll rankings()`. - Updated the package version to `0.2.4`. --- .vscode/settings.json | 5 + CHANGELOG.md | 9 + README.md | 2 +- cfbd_json_py/betting.py | 2 +- cfbd_json_py/drives.py | 2 +- cfbd_json_py/games.py | 2035 +-- cfbd_json_py/metrics.py | 4 +- cfbd_json_py/players.py | 645 +- cfbd_json_py/plays.py | 43 +- cfbd_json_py/rankings.py | 6 +- cfbd_json_py/recruiting.py | 24 +- cfbd_json_py/stats.py | 7 +- cfbd_json_py/utls.py | 2 +- cfbd_json_py/venues.py | 6 +- docs/cfbd_json_py.html | 2 +- docs/cfbd_json_py/_early_access.html | 2 +- docs/cfbd_json_py/betting.html | 532 +- docs/cfbd_json_py/coaches.html | 2 +- docs/cfbd_json_py/conferences.html | 2 +- docs/cfbd_json_py/draft.html | 2 +- docs/cfbd_json_py/drives.html | 6 +- docs/cfbd_json_py/games.html | 16797 ++++++++++--------------- docs/cfbd_json_py/metrics.html | 6 +- docs/cfbd_json_py/players.html | 6216 ++++----- docs/cfbd_json_py/plays.html | 2886 ++--- docs/cfbd_json_py/rankings.html | 8 +- docs/cfbd_json_py/ratings.html | 2 +- docs/cfbd_json_py/recruiting.html | 2746 ++-- docs/cfbd_json_py/stats.html | 5044 ++++---- docs/cfbd_json_py/teams.html | 2 +- docs/cfbd_json_py/utls.html | 9 +- docs/cfbd_json_py/venues.html | 670 +- docs/index.html | 2 +- docs/search.js | 10 +- pyproject.toml | 2 +- 35 files changed, 16452 insertions(+), 21288 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..62152ec --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "cSpell.words": [ + "Opportunies" + ] +} \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index a78b762..78fda8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # CHANGELOG: cfbd_json_py + +## 0.2.4 The "Speedy" Update. +- Refactored `cfbd_json_py.games.get_cfbd_player_game_stats()`, `cfbd_json_py.plays.get_cfbd_pbp_play_types()`, and `cfbd_json_py.players.get_cfbd_player_season_stats()` to use a significantly faster process to parse player stats. +- Changed `print()` statements into `logging.info()` statements for `cfbd_json_py.games.get_cfbd_player_advanced_game_stats()` +- For `cfbd_json_py.metrics.get_cfbd_predicted_ppa_from_down_distance()`, a `logging.warn()` call is now a `logging.warning()` call due to a pending deprecation of `logging.warn()`. +- Fixed an issue found in `cfbd_json_py.players.get_cfbd_pbp_stats()` where the function would warn the user about an issue that the user should not have triggered. +- Removed `tqdm` integration with `cfbd_json_py.plays.get_cfbd_pbp_play_types()`, `cfbd_json_py.rankings.get_cfbd_poll rankings()`. +- Updated the package version to `0.2.4`. + ## 0.2.3 The "Hotfix" Update. - Fixed an issue raised in #51 where the `[player_id]` column would be entirely blank in `cfbd_json_py.players.get_cfbd_player_season_stats()`. - Updated the package version to `0.2.3`. diff --git a/README.md b/README.md index d0604d7..7fa7c5e 100644 --- a/README.md +++ b/README.md @@ -47,4 +47,4 @@ You can view the status of the project [by clicking on this link](https://github If you find an issue/bug while using this python package, [please feel free to raise an issue](https://github.com/armstjc/cfbd-json-py/issues), we're only human, and there will inevitably be issues found! ## Docs -For the cfbd-json-py docs, they can be found at [armstjc.github.io/cfbd-json-py/](armstjc.github.io/cfbd-json-py/) \ No newline at end of file +For the cfbd-json-py docs, they can be found at [armstjc.github.io/cfbd-json-py/](https://armstjc.github.io/cfbd-json-py/cfbd_json_py.html) \ No newline at end of file diff --git a/cfbd_json_py/betting.py b/cfbd_json_py/betting.py index 2642cbd..0859daf 100644 --- a/cfbd_json_py/betting.py +++ b/cfbd_json_py/betting.py @@ -1,5 +1,5 @@ # Creation Date: 08/30/2023 01:13 EDT -# Last Updated Date: 08/13/2024 02:10 PM EDT +# Last Updated Date: 08/28/2024 11:00 PM EDT # Author: Joseph Armstrong (armstrongjoseph08@gmail.com) # File Name: betting.py # Purpose: Houses functions pertaining to betting data within the CFBD API. diff --git a/cfbd_json_py/drives.py b/cfbd_json_py/drives.py index 311c24e..df2e474 100644 --- a/cfbd_json_py/drives.py +++ b/cfbd_json_py/drives.py @@ -1,5 +1,5 @@ # Creation Date: 08/30/2023 01:13 EDT -# Last Updated Date: 08/13/2024 02:10 PM EDT +# Last Updated Date: 08/28/2024 11:00 PM EDT # Author: Joseph Armstrong (armstrongjoseph08@gmail.com) # File Name: drives.py # Purpose: Houses functions pertaining to CFB drive data within the CFBD API. diff --git a/cfbd_json_py/games.py b/cfbd_json_py/games.py index dda2793..e4bfaae 100644 --- a/cfbd_json_py/games.py +++ b/cfbd_json_py/games.py @@ -1,5 +1,5 @@ # Creation Date: 08/30/2023 01:13 EDT -# Last Updated Date: 08/13/2024 02:10 PM EDT +# Last Updated Date: 09/16/2024 06:10 PM EDT # Author: Joseph Armstrong (armstrongjoseph08@gmail.com) # File Name: games.py # Purpose: Houses functions pertaining to CFB game data within the CFBD API. @@ -8,6 +8,7 @@ import logging from datetime import datetime +import numpy as np import pandas as pd import requests from tqdm import tqdm @@ -1894,10 +1895,13 @@ def get_cfbd_player_game_stats( """ - rebuilt_json = {} now = datetime.now() + + rebuilt_json = {} + rebuilt_json_list = [] + cfb_games_df = pd.DataFrame() - row_df = pd.DataFrame() + # row_df = pd.DataFrame() url = "https://api.collegefootballdata.com/games/players" stat_columns = [ "season", @@ -2142,1568 +2146,30 @@ def get_cfbd_player_game_stats( team_conference = team["conference"] home_away = team["homeAway"] - for s_category in team["categories"]: - if s_category["name"] == "passing": - for stat in s_category["types"]: - if stat["name"] == "C/ATT": # passing_C/ATT - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = i["stat"] - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "passing_C/ATT" - ] = player_stat - - elif stat["name"] == "YDS": # passing_YDS - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "passing_YDS" - ] = player_stat - - elif stat["name"] == "AVG": # passing_AVG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = float(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "passing_AVG" - ] = player_stat - - elif stat["name"] == "TD": # passing_TD - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "passing_TD" - ] = player_stat - - elif stat["name"] == "INT": # passing_INT - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "passing_INT" - ] = player_stat - - elif stat["name"] == "QBR": # passing_QBR - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - try: - player_stat = float(i["stat"]) - except: # noqa: E722 - player_stat = None - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "passing_QBR" - ] = player_stat - - else: - raise IndexError( - f"Unhandled stat: \t{stat['name']}" - ) - # passing_df = pd.DataFrame(s_category['types']) - elif s_category["name"] == "rushing": - for stat in s_category["types"]: - if stat["name"] == "CAR": # rushing_CAR - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "rushing_CAR" - ] = player_stat - - elif stat["name"] == "YDS": # rushing_YDS - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "rushing_YDS" - ] = player_stat - - elif stat["name"] == "AVG": # rushing_AVG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = float(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "rushing_AVG" - ] = player_stat - - elif stat["name"] == "TD": # rushing_TD - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "rushing_TD" - ] = player_stat - - elif stat["name"] == "LONG": # rushing_LONG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "rushing_LONG" - ] = player_stat - - else: - raise IndexError( - f"Unhandled stat: \t{stat['name']}" - ) - - elif s_category["name"] == "receiving": - for stat in s_category["types"]: - if stat["name"] == "REC": # receiving_REC - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "receiving_REC" - ] = player_stat - - elif stat["name"] == "YDS": # receiving_YDS - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "receiving_YDS" - ] = player_stat - - elif stat["name"] == "AVG": # receiving_AVG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = float(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "receiving_AVG" - ] = player_stat - - elif stat["name"] == "TD": # receiving_TD - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "receiving_TD" - ] = player_stat - - elif stat["name"] == "LONG": # receiving_LONG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "receiving_LONG" - ] = player_stat - - else: - raise IndexError( - f"Unhandled stat: \t{stat['name']}" - ) - - elif s_category["name"] == "fumbles": - for stat in s_category["types"]: - if stat["name"] == "FUM": # fumbles_FUM - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "fumbles_FUM" - ] = player_stat - - elif stat["name"] == "LOST": # fumbles_LOST - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "fumbles_LOST" - ] = player_stat - - elif stat["name"] == "REC": # fumbles_REC - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "fumbles_REC" - ] = player_stat - - else: - raise IndexError( - f"Unhandled stat: \t{stat['name']}" - ) - - elif s_category["name"] == "defensive": - for stat in s_category["types"]: - if stat["name"] == "TOT": # defensive_TOT - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "defensive_TOT" - ] = player_stat - - elif stat["name"] == "SOLO": # defensive_SOLO - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "defensive_SOLO" - ] = player_stat - - elif stat["name"] == "TFL": # defensive_TFL - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = float(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "defensive_TFL" - ] = player_stat - - elif stat["name"] == "QB HUR": # defensive_QB HUR - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "defensive_QB HUR" - ] = player_stat - - elif stat["name"] == "SACKS": # defensive_SACKS - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = float(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "defensive_SACKS" - ] = player_stat - - elif stat["name"] == "PD": # defensive_PD - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "defensive_PD" - ] = player_stat - - elif stat["name"] == "TD": # defensive_TD - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "defensive_TD" - ] = player_stat - - else: - raise IndexError( - f"Unhandled stat: \t{stat['name']}" - ) - - elif s_category["name"] == "interceptions": - for stat in s_category["types"]: - if stat["name"] == "INT": # interceptions_INT - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "interceptions_INT" - ] = player_stat - - elif stat["name"] == "YDS": # interceptions_YDS - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "interceptions_YDS" - ] = player_stat - - elif stat["name"] == "TD": # interceptions_TD - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "interceptions_TD" - ] = player_stat - - else: - raise IndexError( - f"Unhandled stat: \t{stat['name']}" - ) - - elif s_category["name"] == "punting": - for stat in s_category["types"]: - if stat["name"] == "NO": # punting_NO - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "punting_NO" - ] = player_stat - - elif stat["name"] == "YDS": # punting_YDS - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "punting_YDS" - ] = player_stat - - elif stat["name"] == "AVG": # punting_AVG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = float(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "punting_AVG" - ] = player_stat - - elif stat["name"] == "TB": # punting_TB - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "punting_TB" - ] = player_stat - - elif stat["name"] == "In 20": # punting_In 20 - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "punting_In 20" - ] = player_stat - - elif stat["name"] == "LONG": # punting_LONG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "punting_LONG" - ] = player_stat - - else: - raise IndexError( - f"Unhandled stat: \t{stat['name']}" - ) - - elif s_category["name"] == "kicking": - for stat in s_category["types"]: - if stat["name"] == "FG": # kicking_FG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = i["stat"] - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "kicking_FG" - ] = player_stat - - elif stat["name"] == "TOT": # kicking_FG, special case - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = i["stat"] - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "kicking_FG" - ] = player_stat - - elif stat["name"] == "PCT": # kicking_PCT - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = float(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "kicking_PCT" - ] = player_stat - - elif stat["name"] == "LONG": # kicking_LONG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "kicking_LONG" - ] = player_stat - - elif stat["name"] == "XP": # kicking_XP - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = i["stat"] - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "kicking_XP" - ] = player_stat - - elif stat["name"] == "PTS": # kicking_PTS - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "kicking_PTS" - ] = player_stat - - else: - raise IndexError( - f"Unhandled stat: \t{stat['name']}" - ) - - elif s_category["name"] == "kickReturns": - for stat in s_category["types"]: - if stat["name"] == "NO": # kickReturns_NO - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "kickReturns_NO" - ] = player_stat - - elif stat["name"] == "YDS": # kickReturns_YDS - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "kickReturns_YDS" - ] = player_stat - - elif stat["name"] == "AVG": # kickReturns_AVG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = float(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "kickReturns_AVG" - ] = player_stat - - elif stat["name"] == "TD": # kickReturns_TD - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "kickReturns_TD" - ] = player_stat - - elif stat["name"] == "LONG": # kickReturns_LONG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "kickReturns_LONG" - ] = player_stat - - else: - raise IndexError( - f"Unhandled stat: \t{stat['name']}" - ) - - elif s_category["name"] == "puntReturns": - for stat in s_category["types"]: - if stat["name"] == "NO": # puntReturns_NO - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "puntReturns_NO" - ] = player_stat - - elif stat["name"] == "YDS": # puntReturns_YDS - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "puntReturns_YDS" - ] = player_stat - - elif stat["name"] == "AVG": # puntReturns_AVG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = float(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "puntReturns_AVG" - ] = player_stat - - elif stat["name"] == "TD": # puntReturns_TD - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "puntReturns_TD" - ] = player_stat - - elif stat["name"] == "LONG": # puntReturns_LONG - for i in stat["athletes"]: - player_id = int(i["id"]) - player_name = i["name"] - player_stat = int(i["stat"]) - - if rebuilt_json.get(player_id) is None: - rebuilt_json[player_id] = {} - - rebuilt_json[player_id]["game_id"] = game_id - rebuilt_json[player_id][ - "team_name" - ] = team_name - rebuilt_json[player_id][ - "team_conference" - ] = team_conference - rebuilt_json[player_id][ - "player_id" - ] = player_id - rebuilt_json[player_id][ - "player_name" - ] = player_name - rebuilt_json[player_id][ - "home_away" - ] = home_away - rebuilt_json[player_id][ - "puntReturns_LONG" - ] = player_stat - - else: - raise IndexError( - f"Unhandled stat: \t{stat['name']}" - ) - - else: - raise IndexError( - f"Unhandled stat category: \t{s_category['name']}" - ) - - for key, value in tqdm(rebuilt_json.items()): - row_df = pd.json_normalize(value) - cfb_games_df = pd.concat([cfb_games_df, row_df], ignore_index=True) - del row_df + for stat_category in team["categories"]: + stat_category = stat_category["name"] + for s_type in stat_category["types"]: + stat_name = s_type["name"] + for player in s_type["athletes"]: + p_id = player["id"] + p_name = player["name"] + full_stat_name = f"{stat_category}_{stat_name}" + stat_value = player["stat"] + + if rebuilt_json.get(p_id) is None: + rebuilt_json[p_id] = {} + rebuilt_json[p_id]["player_id"] = p_id + rebuilt_json[p_id]["game_id"] = game_id + rebuilt_json[p_id]["team_name"] = team_name + rebuilt_json[p_id]["team_conference"] = team_conference + rebuilt_json[p_id]["home_away"] = home_away + rebuilt_json[p_id]["player_name"] = p_name + rebuilt_json[p_id][full_stat_name] = stat_value + + for _, value in rebuilt_json.items(): + rebuilt_json_list.append(value) + cfb_games_df = pd.DataFrame(rebuilt_json_list) + cfb_games_df["season"] = season cfb_games_df[["passing_COMP", "passing_ATT"]] = cfb_games_df[ "passing_C/ATT" @@ -3714,225 +2180,258 @@ def get_cfbd_player_game_stats( ].str.split( "/", expand=True ) - cfb_games_df[["kicking_XPM", "kicking_XPA"]] = cfb_games_df[ + + cfb_games_df[["kicking_XP", "kicking_XPM"]] = cfb_games_df[ "kicking_XP" ].str.split( "/", expand=True ) - cfb_games_df = cfb_games_df.fillna(0) + cfb_games_df = cfb_games_df.reindex( + columns=stat_columns + ) + cfb_games_df = cfb_games_df.replace(np.nan, 0) cfb_games_df = cfb_games_df.astype( { - "passing_COMP": "int", - "passing_ATT": "int", - "kicking_FGM": "int", - "kicking_FGA": "int", - "kicking_XPM": "int", - "kicking_XPA": "int", + "season": "uint16", + "game_id": "int64", + "team_name": "str", + "team_conference": "str", + "player_id": "int64", + "player_name": "str", + "home_away": "str", + + "passing_COMP": "uint16", + "passing_ATT": "uint16", + "passing_YDS": "int16", + "passing_TD": "uint16", + "passing_INT": "uint16", + "passing_AVG": "float16", + + "rushing_CAR": "uint16", + "rushing_YDS": "int16", + "rushing_AVG": "float16", + "rushing_TD": "uint16", + "rushing_LONG": "int16", + + "receiving_REC": "uint16", + "receiving_YDS": "int16", + "receiving_AVG": "float16", + "receiving_TD": "uint16", + "receiving_LONG": "int16", + + "fumbles_FUM": "uint8", + "fumbles_LOST": "uint8", + "fumbles_REC": "uint8", + + "defensive_TOT": "uint16", + "defensive_SOLO": "uint16", + "defensive_TFL": "float16", + "defensive_QB HUR": "uint16", + "defensive_SACKS": "float16", + "defensive_PD": "uint16", + "defensive_TD": "uint8", + + "interceptions_INT": "uint8", + "interceptions_YDS": "int16", + "interceptions_TD": "uint8", + + "punting_NO": "uint16", + "punting_YDS": "int16", + "punting_AVG": "float16", + "punting_TB": "uint8", + "punting_In 20": "uint8", + "punting_LONG": "int8", + + "kicking_FGM": "uint16", + "kicking_FGA": "uint16", + "kicking_PCT": "float16", + "kicking_LONG": "uint8", + "kicking_XPM": "uint16", + "kicking_XPA": "uint16", + "kicking_PTS": "uint16", + + "kickReturns_NO": "uint16", + "kickReturns_YDS": "int16", + "kickReturns_AVG": "float16", + "kickReturns_TD": "uint8", + "kickReturns_LONG": "int8", + + "puntReturns_NO": "uint16", + "puntReturns_YDS": "int16", + "puntReturns_AVG": "float16", + "puntReturns_TD": "uint8", + "puntReturns_LONG": "int8", } ) - # print(cfb_games_df.columns) - cfb_games_df["season"] = season - - if filter_by_stat_category is False: - cfb_games_df = cfb_games_df.reindex(columns=stat_columns) - - elif filter_by_stat_category is True and stat_category == "passing": - cfb_games_df = cfb_games_df[ - [ - "season", - "game_id", - "team_name", - "team_conference", - "player_id", - "player_name", - "home_away", - # PASS - "passing_C/ATT", - "passing_COMP", - "passing_ATT", - "passing_YDS", - "passing_AVG", - "passing_TD", - "passing_INT", - "passing_QBR", - ] - ] + if filter_by_stat_category is True and stat_category == "passing": + cfb_games_df = cfb_games_df[[ + "season", + "game_id", + "team_name", + "team_conference", + "player_id", + "player_name", + "home_away", + # PASS + "passing_C/ATT", + "passing_COMP", + "passing_ATT", + "passing_YDS", + "passing_AVG", + "passing_TD", + "passing_INT", + "passing_QBR", + ]] elif filter_by_stat_category is True and stat_category == "rushing": - cfb_games_df = cfb_games_df[ - [ - "season", - "game_id", - "team_name", - "team_conference", - "player_id", - "player_name", - "home_away", - # RUSH - "rushing_CAR", - "rushing_YDS", - "rushing_AVG", - "rushing_TD", - "rushing_LONG", - ] - ] - + cfb_games_df = cfb_games_df[[ + "season", + "game_id", + "team_name", + "team_conference", + "player_id", + "player_name", + "home_away", + # RUSH + "rushing_CAR", + "rushing_YDS", + "rushing_AVG", + "rushing_TD", + "rushing_LONG", + ]] elif filter_by_stat_category is True and stat_category == "receiving": - cfb_games_df = cfb_games_df[ - [ - "season", - "game_id", - "team_name", - "team_conference", - "player_id", - "player_name", - "home_away", - # REC - "receiving_REC", - "receiving_YDS", - "receiving_AVG", - "receiving_TD", - "receiving_LONG", - ] - ] - + cfb_games_df = cfb_games_df[[ + "season", + "game_id", + "team_name", + "team_conference", + "player_id", + "player_name", + "home_away", + # REC + "receiving_REC", + "receiving_YDS", + "receiving_AVG", + "receiving_TD", + "receiving_LONG", + ]] elif filter_by_stat_category is True and stat_category == "fumbles": - cfb_games_df = cfb_games_df[ - [ - "season", - "game_id", - "team_name", - "team_conference", - "player_id", - "player_name", - "home_away", - # FUM - "fumbles_FUM", - "fumbles_LOST", - "fumbles_REC", - ] - ] - + cfb_games_df = cfb_games_df[[ + "season", + "game_id", + "team_name", + "team_conference", + "player_id", + "player_name", + "home_away", + # FUM + "fumbles_FUM", + "fumbles_LOST", + "fumbles_REC", + ]] elif filter_by_stat_category is True and stat_category == "defensive": - cfb_games_df = cfb_games_df[ - [ - "season", - "game_id", - "team_name", - "team_conference", - "player_id", - "player_name", - "home_away", - # DEFENSE - "defensive_TOT", - "defensive_SOLO", - "defensive_TFL", - "defensive_QB HUR", - "defensive_SACKS", - "defensive_PD", - "defensive_TD", - ] - ] - + cfb_games_df = cfb_games_df[[ + "season", + "game_id", + "team_name", + "team_conference", + "player_id", + "player_name", + "home_away", + # DEFENSE + "defensive_TOT", + "defensive_SOLO", + "defensive_TFL", + "defensive_QB HUR", + "defensive_SACKS", + "defensive_PD", + "defensive_TD", + ]] elif filter_by_stat_category is True and stat_category == "interceptions": - cfb_games_df = cfb_games_df[ - [ - "season", - "game_id", - "team_name", - "team_conference", - "player_id", - "player_name", - "home_away", - # INT - "interceptions_INT", - "interceptions_YDS", - "interceptions_TD", - ] - ] - + cfb_games_df = cfb_games_df[[ + "season", + "game_id", + "team_name", + "team_conference", + "player_id", + "player_name", + "home_away", + # INT + "interceptions_INT", + "interceptions_YDS", + "interceptions_TD", + ]] elif filter_by_stat_category is True and stat_category == "punting": - cfb_games_df = cfb_games_df[ - [ - "season", - "game_id", - "team_name", - "team_conference", - "player_id", - "player_name", - "home_away", - # PUNT - "punting_NO", - "punting_YDS", - "punting_AVG", - "punting_TB", - "punting_In 20", - "punting_LONG", - ] - ] - + cfb_games_df = cfb_games_df[[ + "season", + "game_id", + "team_name", + "team_conference", + "player_id", + "player_name", + "home_away", + # PUNT + "punting_NO", + "punting_YDS", + "punting_AVG", + "punting_TB", + "punting_In 20", + "punting_LONG", + ]] elif filter_by_stat_category is True and stat_category == "kicking": - cfb_games_df = cfb_games_df[ - [ - "season", - "game_id", - "team_name", - "team_conference", - "player_id", - "player_name", - "home_away", - # KICK - "kicking_FG", - "kicking_FGM", - "kicking_FGA", - "kicking_PCT", - "kicking_LONG", - "kicking_XP", - "kicking_XPM", - "kicking_XPA", - "kicking_PTS", - ] - ] - + cfb_games_df = cfb_games_df[[ + "season", + "game_id", + "team_name", + "team_conference", + "player_id", + "player_name", + "home_away", + # KICK + "kicking_FG", + "kicking_FGM", + "kicking_FGA", + "kicking_PCT", + "kicking_LONG", + "kicking_XP", + "kicking_XPM", + "kicking_XPA", + "kicking_PTS", + ]] elif filter_by_stat_category is True and stat_category == "kickReturns": - cfb_games_df = cfb_games_df[ - [ - "season", - "game_id", - "team_name", - "team_conference", - "player_id", - "player_name", - "home_away", - # KR - "kickReturns_NO", - "kickReturns_YDS", - "kickReturns_AVG", - "kickReturns_TD", - "kickReturns_LONG", - ] - ] - + cfb_games_df = cfb_games_df[[ + "season", + "game_id", + "team_name", + "team_conference", + "player_id", + "player_name", + "home_away", + # KR + "kickReturns_NO", + "kickReturns_YDS", + "kickReturns_AVG", + "kickReturns_TD", + "kickReturns_LONG", + ]] elif filter_by_stat_category is True and stat_category == "puntReturns": - cfb_games_df = cfb_games_df[ - [ - "season", - "game_id", - "team_name", - "team_conference", - "player_id", - "player_name", - "home_away", - # KR - "puntReturns_NO", - "puntReturns_YDS", - "puntReturns_AVG", - "puntReturns_TD", - "puntReturns_LONG", - ] - ] + cfb_games_df = cfb_games_df[[ + "season", + "game_id", + "team_name", + "team_conference", + "player_id", + "player_name", + "home_away", + # KR + "puntReturns_NO", + "puntReturns_YDS", + "puntReturns_AVG", + "puntReturns_TD", + "puntReturns_LONG", + ]] return cfb_games_df @@ -4132,8 +2631,8 @@ def get_cfbd_player_advanced_game_stats( game_excitement_score = json_data["gameInfo"]["excitement"] # Parsing Usage - print("Parsing player usage data.") - for player in tqdm(json_data["players"]["usage"]): + logging.info("Parsing player usage data.") + for player in json_data["players"]["usage"]: row_df = pd.DataFrame({"game_id": game_id}, index=[0]) row_df["player_name"] = player["player"] row_df["team"] = player["team"] @@ -4151,8 +2650,8 @@ def get_cfbd_player_advanced_game_stats( del row_df # Parsing PPA - print("Parsing player PPA data.") - for player in tqdm(json_data["players"]["ppa"]): + logging.info("Parsing player PPA data.") + for player in json_data["players"]["ppa"]: row_df = pd.DataFrame({"game_id": game_id}, index=[0]) row_df["player_name"] = player["player"] row_df["team"] = player["team"] diff --git a/cfbd_json_py/metrics.py b/cfbd_json_py/metrics.py index 97866d2..cd4eafb 100644 --- a/cfbd_json_py/metrics.py +++ b/cfbd_json_py/metrics.py @@ -1,5 +1,5 @@ # Creation Date: 08/30/2023 01:13 EDT -# Last Updated Date: 08/13/2024 02:10 PM EDT +# Last Updated Date: 09/16/2024 06:10 PM EDT # Author: Joseph Armstrong (armstrongjoseph08@gmail.com) # File Name: metrics.py # Purpose: Houses functions pertaining to various CFB @@ -197,7 +197,7 @@ def get_cfbd_predicted_ppa_from_down_distance( # but we have to convey to the person calling this # function that setting `down = 5` # is not something they should be doing. - logging.warn( + logging.warning( 'There is a very limited number of "5th down" situations ' + "in American Football history. " + "Do not expect anything back when calling this function, " diff --git a/cfbd_json_py/players.py b/cfbd_json_py/players.py index 3b63058..3d2df8f 100644 --- a/cfbd_json_py/players.py +++ b/cfbd_json_py/players.py @@ -1,16 +1,16 @@ # Creation Date: 08/30/2023 01:13 EDT -# Last Updated Date: 08/13/2024 02:10 PM EDT +# Last Updated Date: 09/16/2024 06:10 PM EDT # Author: Joseph Armstrong (armstrongjoseph08@gmail.com) # File Name: players.py # Purpose: Houses functions pertaining to CFB player data within the CFBD API. ############################################################################### -import logging +# import logging from datetime import datetime import pandas as pd import requests -from tqdm import tqdm +# from tqdm import tqdm # from cfbd_json_py.games import get_cfbd_player_game_stats from cfbd_json_py.utls import get_cfbd_api_token @@ -782,7 +782,6 @@ def get_cfbd_player_usage( if player_id is not None: url += f"&playerId={player_id}" - # print() if exclude_garbage_time is not None: url += f"&excludeGarbageTime={gt_str}" @@ -1518,6 +1517,8 @@ def get_cfbd_player_season_stats( """ rebuilt_json = {} + rebuilt_json_list = [] + stat_columns = [ "season", "team_name", @@ -1749,452 +1750,109 @@ def get_cfbd_player_season_stats( if return_as_dict is True: return json_data - for player in tqdm(json_data): - player_id = int(player["playerId"]) + for player in json_data: + player_id = player["playerId"] player_name = player["player"] team_name = player["team"] team_conference = player["conference"] - s_category = player["category"] - s_type = player["statType"] - s_num = player["stat"] if rebuilt_json.get(player_id) is None: rebuilt_json[player_id] = {} - if s_category == "passing": - if s_type == "COMPLETIONS": - rebuilt_json[player_id]["player_id"] = player_name - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["passing_COMP"] = s_num - - elif s_type == "ATT": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["passing_ATT"] = s_num - - elif s_type == "YDS": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["passing_YDS"] = s_num - - elif s_type == "TD": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["passing_TD"] = s_num - - elif s_type == "INT": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["passing_INT"] = s_num - # we can calculate these two later - elif s_type == "PCT": - pass - - elif s_type == "YPA": - pass - - else: - raise ValueError(f"Unhandled stat type: {s_type}") - - elif s_category == "rushing": - if s_type == "CAR": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["rushing_CAR"] = s_num - - elif s_type == "YDS": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["rushing_YDS"] = s_num - - elif s_type == "TD": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["rushing_TD"] = s_num - - elif s_type == "LONG": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["rushing_LONG"] = s_num - # we can calculate this later - elif s_type == "YPC": - pass - - else: - raise ValueError(f"Unhandled stat type: {s_type}") - - elif s_category == "receiving": - if s_type == "REC": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["receiving_REC"] = s_num - - elif s_type == "YDS": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["receiving_YDS"] = s_num - - elif s_type == "TD": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["receiving_TD"] = s_num - - elif s_type == "LONG": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["receiving_LONG"] = s_num - # we can calculate this later - elif s_type == "YPR": - pass - - else: - raise ValueError(f"Unhandled stat type: {s_type}") - - elif s_category == "fumbles": - if s_type == "FUM": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["fumbles_FUM"] = s_num - - elif s_type == "LOST": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["fumbles_LOST"] = s_num - - elif s_type == "REC": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["fumbles_LOST"] = s_num - - else: - raise ValueError(f"Unhandled stat type: {s_type}") - - elif s_category == "defensive": - if s_type == "TOT": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["defensive_TOT"] = s_num - - elif s_type == "SOLO": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["defensive_SOLO"] = s_num - - elif s_type == "TFL": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["defensive_TFL"] = s_num - - elif s_type == "QB HUR": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["defensive_QB HUR"] = s_num - - elif s_type == "SACKS": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["defensive_SACKS"] = s_num - - elif s_type == "PD": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["defensive_PD"] = s_num - - elif s_type == "TD": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["defensive_TD"] = s_num - - else: - raise ValueError(f"Unhandled stat type: {s_type}") - - elif s_category == "interceptions": - if s_type == "INT": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["interceptions_INT"] = s_num - - elif s_type == "YDS": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["interceptions_YDS"] = s_num - - elif s_type == "TD": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["interceptions_TD"] = s_num - - elif s_type == "AVG": - pass - - else: - raise ValueError(f"Unhandled stat type: {s_type}") - - elif s_category == "punting": - if s_type == "NO": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["punting_NO"] = s_num - - elif s_type == "YDS": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["punting_YDS"] = s_num - - elif s_type == "TB": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["punting_TB"] = s_num - - elif s_type == "In 20": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["punting_In 20"] = s_num - - elif s_type == "LONG": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["punting_LONG"] = s_num - - elif s_type == "YPP": - pass - - else: - raise ValueError(f"Unhandled stat type: {s_type}") - - elif s_category == "kicking": - if s_type == "FGM": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["kicking_FGM"] = s_num - - elif s_type == "FGA": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["kicking_FGA"] = s_num - - elif s_type == "LONG": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["kicking_LONG"] = s_num - - elif s_type == "XPM": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["kicking_XPM"] = s_num - - elif s_type == "XPA": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["kicking_XPA"] = s_num - - elif s_type == "PTS": - pass - - elif s_type == "PCT": - pass - - else: - raise ValueError(f"Unhandled stat type: {s_type}") - - elif s_category == "kickReturns": - if s_type == "NO": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["kickReturns_NO"] = s_num - - elif s_type == "YDS": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["kickReturns_YDS"] = s_num - - elif s_type == "TD": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["kickReturns_TD"] = s_num - - elif s_type == "LONG": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["kickReturns_LONG"] = s_num - # we can calculate this later - elif s_type == "AVG": - pass - - else: - raise ValueError(f"Unhandled stat type: {s_type}") - - elif s_category == "puntReturns": - if s_type == "NO": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["puntReturns_NO"] = s_num - - elif s_type == "YDS": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["puntReturns_YDS"] = s_num - - elif s_type == "TD": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["puntReturns_TD"] = s_num - - elif s_type == "LONG": - rebuilt_json[player_id]["player_name"] = player_name - rebuilt_json[player_id]["team_name"] = team_name - rebuilt_json[player_id]["team_conference"] = team_conference - rebuilt_json[player_id]["puntReturns_LONG"] = s_num - # we can calculate this later - elif s_type == "AVG": - pass - - else: - raise ValueError(f"Unhandled stat type: {s_type}") - - else: - raise ValueError(f"Unhandled stat category: {s_category}") - - del player_id, player_name, team_name, \ - team_conference, s_category, s_type, s_num - - for key, value in tqdm(rebuilt_json.items()): - row_df = pd.json_normalize(value) - row_df["player_id"] = key - final_df = pd.concat([final_df, row_df], ignore_index=True) - del row_df + stat_category = player["category"] + stat_type = player["statType"] + stat_name = f"{stat_category}_{stat_type}" - final_df = final_df.fillna(0) + stat_value = player["stat"] - final_df["season"] = season + rebuilt_json[player_id]["player_id"] = player_id + rebuilt_json[player_id]["player_name"] = player_name + rebuilt_json[player_id]["team_name"] = team_name + rebuilt_json[player_id]["team_conference"] = team_conference + rebuilt_json[player_id][stat_name] = stat_value - if filter_by_stat_category is False: - final_df = final_df.reindex(columns=stat_columns) - final_df = final_df.astype( - { - "passing_COMP": "int", - "passing_ATT": "int", - "rushing_CAR": "int", - "rushing_YDS": "int", - "receiving_REC": "int", - "receiving_YDS": "int", - "punting_NO": "int", - "punting_YDS": "int", - "kicking_FGM": "int", - "kicking_FGA": "int", - "kicking_XPM": "int", - "kicking_XPA": "int", - "kickReturns_NO": "int", - "kickReturns_YDS": "int", - "puntReturns_NO": "int", - "puntReturns_YDS": "int", - } - ) + for _, value in rebuilt_json.items(): + rebuilt_json_list.append(value) - final_df.loc[final_df["passing_ATT"] > 0, "passing_COMP%"] = ( - final_df["passing_COMP"] / final_df["passing_ATT"] - ) - final_df["passing_COMP%"] = final_df["passing_COMP%"].round(3) - - final_df.loc[final_df["rushing_CAR"] > 0, "rushing_AVG"] = ( - final_df["rushing_YDS"] / final_df["rushing_CAR"] - ) - final_df["rushing_AVG"] = final_df["rushing_AVG"].round(3) + final_df = pd.DataFrame(rebuilt_json_list) + final_df["season"] = season + # print(final_df.columns) - final_df.loc[final_df["receiving_REC"] > 0, "receiving_AVG"] = ( - final_df["receiving_YDS"] / final_df["receiving_REC"] - ) - final_df["receiving_AVG"] = final_df["receiving_AVG"].round(3) + final_df = final_df.rename( + columns={ + "passing_COMPLETIONS": "passing_COMP", + "passing_YPA": "passing_AVG", + "passing_PCT": "passing_COMP%", + "rushing_YPC": "rushing_AVG", + "punting_YPP": "punting_AVG", + "kicking_PCT": "kicking_FG%", + "receiving_YPR": "receiving_AVG", + } + ) + final_df = final_df.reindex(columns=stat_columns) + final_df = final_df.fillna(0) + final_df = final_df.astype( + { + "passing_COMP": "int", + "passing_ATT": "int", + "rushing_CAR": "int", + "rushing_YDS": "int", + "receiving_REC": "int", + "receiving_YDS": "int", + "punting_NO": "int", + "punting_YDS": "int", + "kicking_FGM": "int", + "kicking_FGA": "int", + "kicking_XPM": "int", + "kicking_XPA": "int", + "kickReturns_NO": "int", + "kickReturns_YDS": "int", + "puntReturns_NO": "int", + "puntReturns_YDS": "int", + }, + # errors="ignore" + ) - final_df.loc[final_df["punting_NO"] > 0, "punting_AVG"] = ( - final_df["punting_YDS"] / final_df["punting_NO"] - ) - final_df["punting_AVG"] = final_df["punting_AVG"].round(3) + final_df.loc[final_df["passing_ATT"] > 0, "passing_COMP%"] = ( + final_df["passing_COMP"] / final_df["passing_ATT"] + ) + final_df["passing_COMP%"] = final_df["passing_COMP%"].round(3) - final_df.loc[final_df["kicking_FGA"] > 0, "kicking_FG%"] = ( - final_df["kicking_FGM"] / final_df["kicking_FGA"] - ) - final_df["kicking_FG%"] = final_df["kicking_FG%"].round(5) + final_df.loc[final_df["rushing_CAR"] > 0, "rushing_AVG"] = ( + final_df["rushing_YDS"] / final_df["rushing_CAR"] + ) + final_df["rushing_AVG"] = final_df["rushing_AVG"].round(3) - final_df.loc[final_df["kicking_XPA"] > 0, "kicking_XP%"] = ( - final_df["kicking_XPM"] / final_df["kicking_XPA"] - ) - final_df["kicking_XP%"] = final_df["kicking_XP%"].round(5) + final_df.loc[final_df["receiving_REC"] > 0, "receiving_AVG"] = ( + final_df["receiving_YDS"] / final_df["receiving_REC"] + ) + final_df["receiving_AVG"] = final_df["receiving_AVG"].round(3) - final_df.loc[final_df["kickReturns_NO"] > 0, "kickReturns_AVG"] = ( - final_df["kickReturns_YDS"] / final_df["kickReturns_NO"] - ) - final_df["kickReturns_AVG"] = final_df["kickReturns_AVG"].round(3) + final_df.loc[final_df["punting_NO"] > 0, "punting_AVG"] = ( + final_df["punting_YDS"] / final_df["punting_NO"] + ) + final_df["punting_AVG"] = final_df["punting_AVG"].round(3) - final_df.loc[final_df["puntReturns_NO"] > 0, "puntReturns_AVG"] = ( - final_df["puntReturns_YDS"] / final_df["puntReturns_NO"] - ) - final_df["puntReturns_AVG"] = final_df["puntReturns_AVG"].round(3) + final_df.loc[final_df["kicking_FGA"] > 0, "kicking_FG%"] = ( + final_df["kicking_FGM"] / final_df["kicking_FGA"] + ) + final_df["kicking_FG%"] = final_df["kicking_FG%"].round(5) - elif filter_by_stat_category is True and stat_category == "passing": - try: - final_df = final_df.astype( - { - "passing_COMP": "int", - "passing_ATT": "int", - } - ) - except Exception as e: - logging.warning( - "Could not reformat [passing_COMP]" - + " and [passing_ATT] into integers. " - + f"Full Exception: {e}" - ) + final_df.loc[final_df["kicking_XPA"] > 0, "kicking_XP%"] = ( + final_df["kicking_XPM"] / final_df["kicking_XPA"] + ) + final_df["kicking_XP%"] = final_df["kicking_XP%"].round(5) - final_df.loc[final_df["passing_ATT"] >= 1, "passing_COMP%"] = ( - final_df["passing_COMP"] / final_df["passing_ATT"] - ) + final_df.loc[final_df["kickReturns_NO"] > 0, "kickReturns_AVG"] = ( + final_df["kickReturns_YDS"] / final_df["kickReturns_NO"] + ) + final_df["kickReturns_AVG"] = final_df["kickReturns_AVG"].round(3) - final_df["passing_COMP%"] = final_df["passing_COMP%"].round(3) + final_df.loc[final_df["puntReturns_NO"] > 0, "puntReturns_AVG"] = ( + final_df["puntReturns_YDS"] / final_df["puntReturns_NO"] + ) + final_df["puntReturns_AVG"] = final_df["puntReturns_AVG"].round(3) + if filter_by_stat_category is True and stat_category == "passing": final_df = final_df[ [ @@ -2211,26 +1869,7 @@ def get_cfbd_player_season_stats( "passing_INT", ] ] - elif filter_by_stat_category is True and stat_category == "rushing": - try: - final_df = final_df.astype( - { - "rushing_CAR": "int", - "rushing_YDS": "int", - } - ) - except Exception as e: - logging.warning( - "Could not reformat [rushing_CAR] " - + "and [rushing_YDS] into integers. " - + f"Full Exception: {e}" - ) - - final_df.loc[final_df["rushing_CAR"] >= 1, "rushing_AVG"] = ( - final_df["rushing_YDS"] / final_df["rushing_CAR"] - ) - final_df["rushing_AVG"] = final_df["rushing_AVG"].round(3) final_df = final_df[ [ @@ -2247,26 +1886,7 @@ def get_cfbd_player_season_stats( "rushing_LONG", ] ] - elif filter_by_stat_category is True and stat_category == "receiving": - try: - final_df = final_df.astype( - { - "receiving_REC": "int", - "receiving_YDS": "int", - } - ) - except Exception as e: - logging.warning( - "Could not reformat [receiving_REC] " - + "and [receiving_YDS] into integers. " - + f"Full Exception: {e}" - ) - - final_df.loc[final_df["receiving_REC"] > 0, "receiving_AVG"] = ( - final_df["receiving_YDS"] / final_df["receiving_REC"] - ) - final_df["receiving_AVG"] = final_df["receiving_AVG"].round(3) final_df = final_df[ [ @@ -2283,7 +1903,6 @@ def get_cfbd_player_season_stats( "receiving_LONG", ] ] - elif filter_by_stat_category is True and stat_category == "fumbles": final_df = final_df[ [ @@ -2298,7 +1917,6 @@ def get_cfbd_player_season_stats( "fumbles_REC", ] ] - elif filter_by_stat_category is True and stat_category == "defensive": final_df = final_df[ [ @@ -2317,7 +1935,6 @@ def get_cfbd_player_season_stats( "defensive_TD", ] ] - elif filter_by_stat_category is True and stat_category == "interceptions": final_df = final_df[ [ @@ -2332,26 +1949,7 @@ def get_cfbd_player_season_stats( "interceptions_TD", ] ] - elif filter_by_stat_category is True and stat_category == "punting": - try: - final_df = final_df.astype( - { - "punting_NO": "int", - "punting_YDS": "int", - } - ) - except Exception as e: - logging.warning( - "Could not reformat [punting_YDS] " - + "and [punting_NO] into integers. " - + f"Full Exception: {e}" - ) - - final_df.loc[final_df["punting_NO"] > 0, "punting_AVG"] = ( - final_df["punting_YDS"] / final_df["punting_NO"] - ) - final_df["punting_AVG"] = final_df["punting_AVG"].round(3) final_df = final_df[ [ @@ -2369,36 +1967,7 @@ def get_cfbd_player_season_stats( "punting_LONG", ] ] - elif filter_by_stat_category is True and stat_category == "kicking": - try: - final_df = final_df.astype( - { - "kicking_FGM": "int", - "kicking_FGA": "int", - "kicking_XPM": "int", - "kicking_XPA": "int", - } - ) - except Exception as e: - logging.warning( - "Could not reformat the following columns into integers.:" - + "\n-[kicking_FGM]" - + "\n-[kicking_FGA]" - + "\n-[kicking_XPM]" - + "\n-[kicking_XPA]" - + f"\nFull Exception: {e}" - ) - - final_df.loc[final_df["kicking_FGA"] > 0, "kicking_FG%"] = ( - final_df["kicking_FGM"] / final_df["kicking_FGA"] - ) - final_df["kicking_FG%"] = final_df["kicking_FG%"].round(5) - - final_df.loc[final_df["kicking_XPA"] > 0, "kicking_XP%"] = ( - final_df["kicking_XPM"] / final_df["kicking_XPA"] - ) - final_df["kicking_XP%"] = final_df["kicking_XP%"].round(5) final_df = final_df[ [ @@ -2416,26 +1985,7 @@ def get_cfbd_player_season_stats( "kicking_XPA" "kicking_XP%", ] ] - elif filter_by_stat_category is True and stat_category == "kickReturns": - try: - final_df = final_df.astype( - { - "kickReturns_NO": "int", - "kickReturns_YDS": "int", - } - ) - except Exception as e: - logging.warning( - "Could not reformat [passing_COMP] " - + "and [kickReturns_YDS] into integers. " - + f"Full Exception: {e}" - ) - - final_df.loc[final_df["kickReturns_NO"] > 0, "kickReturns_AVG"] = ( - final_df["kickReturns_YDS"] / final_df["kickReturns_NO"] - ) - final_df["kickReturns_AVG"] = final_df["kickReturns_AVG"].round(3) final_df = final_df[ [ @@ -2452,26 +2002,7 @@ def get_cfbd_player_season_stats( "kickReturns_LONG", ] ] - elif filter_by_stat_category is True and stat_category == "puntReturns": - try: - final_df = final_df.astype( - { - "puntReturns_NO": "int", - "puntReturns_YDS": "int", - } - ) - except Exception as e: - logging.warning( - "Could not reformat [passing_COMP] " - + "and [puntReturns_YDS] into integers." - + f"Full Exception: {e}" - ) - - final_df.loc[final_df["puntReturns_NO"] > 0, "puntReturns_AVG"] = ( - final_df["puntReturns_YDS"] / final_df["puntReturns_NO"] - ) - final_df["puntReturns_AVG"] = final_df["puntReturns_AVG"].round(3) final_df = final_df[ [ diff --git a/cfbd_json_py/plays.py b/cfbd_json_py/plays.py index f5c8658..76f7119 100644 --- a/cfbd_json_py/plays.py +++ b/cfbd_json_py/plays.py @@ -1,5 +1,5 @@ # Creation Date: 08/30/2023 01:13 EDT -# Last Updated Date: 08/13/2024 02:10 PM EDT +# Last Updated Date: 09/16/2024 06:10 PM EDT # Author: Joseph Armstrong (armstrongjoseph08@gmail.com) # File Name: plays.py # Purpose: Houses functions pertaining to CFB play data within the CFBD API. @@ -10,7 +10,7 @@ import pandas as pd import requests -from tqdm import tqdm +# from tqdm import tqdm from cfbd_json_py.utls import get_cfbd_api_token @@ -634,7 +634,9 @@ def get_cfbd_pbp_data( def get_cfbd_pbp_play_types( - api_key: str = None, api_key_dir: str = None, return_as_dict: bool = False + api_key: str = None, + api_key_dir: str = None, + return_as_dict: bool = False ): """ Allows you to get CFBD PBP play types from the CFBD API. @@ -680,6 +682,7 @@ def get_cfbd_pbp_play_types( """ # now = datetime.now() plays_df = pd.DataFrame() + plays_df_arr = [] row_df = pd.DataFrame() url = "https://api.collegefootballdata.com/play/types" @@ -727,16 +730,17 @@ def get_cfbd_pbp_play_types( if return_as_dict is True: return json_data - for p in tqdm(json_data): + for p in json_data: p_id = p["id"] row_df = pd.DataFrame({"play_type_id": p_id}, index=[0]) row_df["play_type_text"] = p["text"] row_df["play_type_abv"] = p["abbreviation"] - plays_df = pd.concat([plays_df, row_df], ignore_index=True) - + # plays_df = pd.concat([plays_df, row_df], ignore_index=True) + plays_df_arr.append(row_df) del row_df del p_id + plays_df = pd.concat(plays_df_arr, ignore_index=True) return plays_df @@ -756,6 +760,7 @@ def get_cfbd_pbp_stats( """ Allows you to get stats for various players from CFB play-by-play (PBP) data within the CFBD API. + Parameters ---------- @@ -1096,13 +1101,16 @@ def get_cfbd_pbp_stats( ) if season is None and game_id is None: - logging.warn( + logging.warning( "This endpoint only returns the top 1,000 results. " + "Not setting a value for `season` or `game_id` " + "is not a recommended practice." ) - elif season is not None and game_id is None: - logging.warn( + elif ( + (season is not None) and + (game_id is not None) + ): + logging.warning( "Setting a value for both `season` and `game_id` " + "may not yeld the results you want. " + "If you just want PBP stats for a valid game ID, " @@ -1215,7 +1223,9 @@ def get_cfbd_pbp_stats( def get_cfbd_pbp_stat_types( - api_key: str = None, api_key_dir: str = None, return_as_dict: bool = False + api_key: str = None, + api_key_dir: str = None, + return_as_dict: bool = False ): """ Allows you to get CFBD PBP stat types from the CFBD API. @@ -1261,6 +1271,7 @@ def get_cfbd_pbp_stat_types( """ # now = datetime.now() plays_df = pd.DataFrame() + plays_df_arr = [] row_df = pd.DataFrame() url = "https://api.collegefootballdata.com/play/types" @@ -1308,15 +1319,21 @@ def get_cfbd_pbp_stat_types( if return_as_dict is True: return json_data - for p in tqdm(json_data): + for p in json_data: p_id = p["id"] row_df = pd.DataFrame({"stat_type_id": p_id}, index=[0]) - row_df["stat_type_text"] = p["name"] - plays_df = pd.concat([plays_df, row_df], ignore_index=True) + row_df["stat_type_abv"] = p["abbreviation"] + try: + row_df["stat_type_text"] = p["name"] + except KeyError: + row_df["stat_type_text"] = p["text"] + # plays_df = pd.concat([plays_df, row_df], ignore_index=True) + plays_df_arr.append(row_df) del row_df del p_id + plays_df = pd.concat(plays_df_arr, ignore_index=True) return plays_df diff --git a/cfbd_json_py/rankings.py b/cfbd_json_py/rankings.py index f063805..e195526 100644 --- a/cfbd_json_py/rankings.py +++ b/cfbd_json_py/rankings.py @@ -1,5 +1,5 @@ # Creation Date: 08/30/2023 01:13 EDT -# Last Updated Date: 08/13/2024 02:10 PM EDT +# Last Updated Date: 09/16/2024 06:10 PM EDT # Author: Joseph Armstrong (armstrongjoseph08@gmail.com) # File Name: rankings.py # Purpose: Houses functions pertaining to CFB poll data within the CFBD API. @@ -9,7 +9,7 @@ import pandas as pd import requests -from tqdm import tqdm +# from tqdm import tqdm from cfbd_json_py.utls import get_cfbd_api_token @@ -284,7 +284,7 @@ def get_cfbd_poll_rankings( if return_as_dict is True: return json_data - for week in tqdm(json_data): + for week in json_data: w_season = week["season"] w_season_type = week["seasonType"] w_week = week["week"] diff --git a/cfbd_json_py/recruiting.py b/cfbd_json_py/recruiting.py index 152efc7..51dadb4 100644 --- a/cfbd_json_py/recruiting.py +++ b/cfbd_json_py/recruiting.py @@ -1,5 +1,5 @@ # Creation Date: 08/30/2023 01:13 EDT -# Last Updated Date: 08/13/2024 02:10 PM EDT +# Last Updated Date: 09/16/2024 06:10 PM EDT # Author: Joseph Armstrong (armstrongjoseph08@gmail.com) # File Name: recruiting.py # Purpose: Houses functions pertaining to CFB recruiting data @@ -291,7 +291,7 @@ def get_cfbd_player_recruit_ratings( """ - now = datetime.now() + # now = datetime.now() recruit_df = pd.DataFrame() # row_df = pd.DataFrame() url = "https://api.collegefootballdata.com/recruiting/players" @@ -315,9 +315,12 @@ def get_cfbd_player_recruit_ratings( else: real_api_key = "Bearer " + real_api_key - if season > (now.year + 1): - raise ValueError(f"`season` cannot be greater than {season}.") - elif season < 1869: + # if season > (now.year + 1): + # raise ValueError(f"`season` cannot be greater than {season}.") + # elif season < 1869: + # raise ValueError("`season` cannot be less than 1869.") + + if season < 1869: raise ValueError("`season` cannot be less than 1869.") if ( @@ -570,7 +573,7 @@ def get_cfbd_team_recruiting_ratings( a dictionary object with CFB Poll data. """ - now = datetime.now() + # now = datetime.now() recruit_df = pd.DataFrame() # row_df = pd.DataFrame() url = "https://api.collegefootballdata.com/recruiting/teams" @@ -594,9 +597,12 @@ def get_cfbd_team_recruiting_ratings( else: real_api_key = "Bearer " + real_api_key - if season is not None and season > (now.year + 1): - raise ValueError(f"`season` cannot be greater than {season}.") - elif season is not None and season < 1869: + # if season > (now.year + 1): + # raise ValueError(f"`season` cannot be greater than {season}.") + # elif season < 1869: + # raise ValueError("`season` cannot be less than 1869.") + + if season < 1869: raise ValueError("`season` cannot be less than 1869.") if season is None and team is None: diff --git a/cfbd_json_py/stats.py b/cfbd_json_py/stats.py index acc0240..814a423 100644 --- a/cfbd_json_py/stats.py +++ b/cfbd_json_py/stats.py @@ -1,5 +1,5 @@ # Creation Date: 08/30/2023 01:13 PM EDT -# Last Updated Date: 08/13/2024 02:10 PM EDT +# Last Updated Date: 09/16/2024 06:10 PM EDT # Author: Joseph Armstrong (armstrongjoseph08@gmail.com) # File Name: stats.py # Purpose: Houses functions pertaining to CFB team/player @@ -611,7 +611,6 @@ def get_cfbd_team_season_stats( for key, value in tqdm(rebuilt_json.items()): row_df = pd.DataFrame(value, index=[0]) final_df = pd.concat([final_df, row_df], ignore_index=True) - # print() final_df = final_df[stat_columns] return final_df @@ -962,10 +961,6 @@ def get_cfbd_advanced_team_season_stats( if return_as_dict is True: return json_data - # final_df = pd.json_normalize(json_data) - - # print(final_df.columns) - for team in tqdm(json_data): t_season = team["season"] t_team = team["team"] diff --git a/cfbd_json_py/utls.py b/cfbd_json_py/utls.py index e4aaf5c..879ed77 100644 --- a/cfbd_json_py/utls.py +++ b/cfbd_json_py/utls.py @@ -1,5 +1,5 @@ # Creation Date: 08/30/2023 01:13 EDT -# Last Updated Date: 08/13/2024 02:10 PM EDT +# Last Updated Date: 08/28/2024 02:10 PM EDT # Author: Joseph Armstrong (armstrongjoseph08@gmail.com) # File Name: utls.py # Purpose: Houses utility functions for this python package. diff --git a/cfbd_json_py/venues.py b/cfbd_json_py/venues.py index 21a44c8..0557056 100644 --- a/cfbd_json_py/venues.py +++ b/cfbd_json_py/venues.py @@ -1,5 +1,5 @@ # Creation Date: 08/30/2023 01:13 EDT -# Last Updated Date: 08/13/2024 02:10 PM EDT +# Last Updated Date: 09/16/2024 06:10 PM EDT # Author: Joseph Armstrong (armstrongjoseph08@gmail.com) # File Name: venues.py # Purpose: Houses functions pertaining to @@ -13,7 +13,9 @@ def get_cfbd_venues( - api_key: str = None, api_key_dir: str = None, return_as_dict: bool = False + api_key: str = None, + api_key_dir: str = None, + return_as_dict: bool = False ): """ Allows a user to get CFB venue/stadium information from the CFBD API. diff --git a/docs/cfbd_json_py.html b/docs/cfbd_json_py.html index 26a1ea8..abc3657 100644 --- a/docs/cfbd_json_py.html +++ b/docs/cfbd_json_py.html @@ -1937,4 +1937,4 @@