-
Notifications
You must be signed in to change notification settings - Fork 1
/
get_player_stats.py
103 lines (84 loc) · 2.81 KB
/
get_player_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import json
import logging
import os
import pandas as pd
from tqdm import tqdm
from utls import (
get_json_from_url,
convert_numeric,
get_latest_season_year,
team_ids_list,
)
logging.basicConfig(level=logging.WARNING)
def get_ids_from_pa(team_id: int, pa: int, find_batter: bool = True) -> list[str]:
"get a list of ids of players in a team with plate appearance >= pa"
logging.info("Get list of ids")
id_list = []
player_type = "batter" if find_batter else "pitcher"
latest_season = get_latest_season_year()
url = f"https://spaia.jp/baseball/npb/api/{player_type}_list?team={team_id}&year={latest_season}"
players_json = get_json_from_url(url)
for player in players_json:
player_pa = player.get("PlateAppearance")
try:
if player_pa and int(player_pa) >= pa:
id_list.append(player["PlayerCD"])
except:
print(player.get("Name"), player_pa)
return id_list
def get_player_stats(id_list: list[str], save_path: str):
"get player's data for all available seasons"
logging.info("Get player stats")
metrics = [
"TeamCD",
"BattingAverage",
"Game",
"PlateAppearance",
"AtBat",
"Run",
"Hit",
"Double",
"Triple",
"Homerun",
"Base",
"RunsBattingIn",
"StrikeOut",
"BaseOnBall",
"HitByPitch",
"SacrificeHit",
"SacrificeFly",
"StolenBase",
"CaughtStealing",
"DoublePlay",
"Error",
"Slugging",
"OnBase",
"Ops",
]
organized_data = {}
for player_id in id_list:
url = f"https://spaia.jp/baseball/npb/api/hitting_stats_by_year?player_id={player_id}"
json_data = get_json_from_url(url)
player_dict = {"name": json_data[0]["Name"], "stats": {}}
for seasonal_stat in json_data:
stats = {}
for metric in metrics:
val = seasonal_stat[metric]
if "ID" in metric or "CD" in metric:
stats[metric] = val
elif val == "-":
stats[metric] = None
else:
stats[metric] = convert_numeric(val)
player_dict["stats"][seasonal_stat["Year"]] = stats
organized_data[player_id] = player_dict
logging.info("Data extracted")
if save_path: # save if save_path is non-empty
with open(save_path, "w") as json_file:
json.dump(organized_data, json_file, indent=4)
if __name__ == "__main__":
team_ids = team_ids_list()
for team_name, team_id in tqdm(team_ids.items()):
logging.info(f"===== {team_name} ======")
player_ids = get_ids_from_pa(team_id, 100)
get_player_stats(player_ids, f"player_stats/{team_name}.json")