-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfree agents.R
134 lines (115 loc) · 6.84 KB
/
free agents.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
library(tidyverse)
library(rvest)
library(janitor)
library(polite)
library(lubridate)
library(openxlsx)
realgm_bow=bow("https://basketball.realgm.com/",user_agent = "Sumitro Datta",force=TRUE)
print(realgm_bow)
salary_cap_hist<-scrape(nod(realgm_bow,path="nba/info/salary_cap")) %>%
html_nodes(xpath='//*[contains(concat( " ", @class, " " ), concat( " ", "compact", " " ))]') %>%
.[[1]] %>% html_table(fill=TRUE) %>%
#add correct column names (ended up as first row)
row_to_names(1) %>% clean_names() %>% select(3:4) %>%
#only take year and cap number, parse cap into a number (has dollar sign and commas originally)
rename(season=luxury_tax,cap=bae) %>%
mutate(season=as.numeric(str_sub(season,start=-4))) %>%
mutate(cap=parse_number(cap))
write_csv(salary_cap_hist,"Data/Salary Cap History.csv")
spotrac_bow=bow("https://www.spotrac.com/",user_agent = "Sumitro Datta",force=TRUE)
print(spotrac_bow)
fa_current_yr<-scrape(nod(spotrac_bow,path="nba/free-agents/2023")) %>% html_nodes("table") %>% .[[1]] %>% html_table()
fa_with_options=scrape(nod(spotrac_bow,path="nba/option/2023")) %>% html_nodes("table") %>% .[[1]] %>% html_table()
intermed_fa_current_yr=fa_current_yr %>%
#filter out coaches
filter(`Pos.` != "COA") %>%
rename(Player=1) %>%
mutate(Player=str_replace_all(Player,"[\r\n]" , "")) %>%
#remove repeat of player's last name
separate(Player,into=c('to_discard','Player'),sep='\\s{2,100}',extra="merge") %>% mutate(Player=str_trim(Player)) %>%
select(-c(to_discard,Age,"Pos.")) %>% arrange(Player)
cleaned_fa_current_yr=left_join(intermed_fa_current_yr,fa_with_options) %>%
#options tracked more up to date, so first element in coalesce with Type column
mutate(Type=str_trim(word(Type)),type=coalesce(Option,Type)) %>%
select(player=Player,type,experience=Exp,sal_2023="Cap Hit") %>% mutate(season=2023) %>%
#add salary amounts for options
mutate(contract_yrs=NA,
first_year_percent_of_cap=ifelse(str_detect(type,"Player|Club"),parse_number(sal_2023),NA)) %>%
select(-sal_2023) %>%
#change names to match basketball-reference data
mutate(player=case_when(str_detect(player,"AJ Green")~"A.J. Green",
str_detect(player,'Bruce Brown Jr.')~'Bruce Brown',
str_detect(player,'Boban')~'Boban Marjanović',
str_detect(player,'Bogdan Bogdan')~'Bogdan Bogdanović',
str_detect(player,"Dario")~"Dario Šarić",
str_detect(player,'Goran')~'Goran Dragić',
str_detect(player,"Ishmael")~"Ish Smith",
str_detect(player,"Ishmail")~"Ish Wainright",
str_detect(player,"J.D. Davison")~"JD Davison",
str_detect(player,"Juancho")~"Juancho Hernangómez",
str_detect(player,"Kristaps")~"Kristaps Porziņģis",
str_detect(player,"Moussa")~"Moussa Diabaté",
str_detect(player,"Vucevic")~"Nikola Vučević",
str_detect(player,"PJ Wash")~"P.J. Washington",
str_detect(player,"RJ Hamp")~"R.J. Hampton",
str_detect(player,"Sviatoslav")~"Svi Mykhailiuk",
str_detect(player,"Theo")~"Théo Maledon",
str_detect(player,"Tillman")~"Xavier Tillman Sr.",
str_detect(player,"Willy Hernan")~"Willy Hernangómez",
TRUE~player))
write_csv(cleaned_fa_current_yr,"Data/Free Agents 2023.csv")
#use pro-sports-transactions to get full picture of free agents (players waived during season)
#start from 2011-2012 season (first season under new CBA)
pro_sports_transact_bow=bow("https://www.prosportstransactions.com/",user_agent = "Sumitro Datta",force=TRUE)
print(pro_sports_transact_bow)
get_pro_sports_transact<-function(begin_date,end_date){
session=nod(pro_sports_transact_bow,path=paste0(
"basketball/Search/SearchResults.php?BeginDate=",begin_date,"&EndDate=",end_date,"&PlayerMovementChkBx=yes&Submit=Search"))
pages_string=scrape(session) %>%
html_nodes(xpath='//*[contains(concat( " ", @class, " " ), concat( " ", "bodyCopy", " " ))]') %>%
.[[3]] %>% html_text()
number_pages=as.numeric(str_sub(pages_string,as.numeric(stringi::stri_locate_last_regex(pages_string,"\\s")[,1])))-1
transactions=tibble()
sapply(0:number_pages*25,function(x){
new_session=nod(pro_sports_transact_bow,path=paste0(
"basketball/Search/SearchResults.php?BeginDate=",begin_date,"&EndDate=",end_date,"&PlayerMovementChkBx=yes&Submit=Search&start=",
x))
new_transact_df=scrape(new_session) %>%
html_nodes(xpath='//*[contains(concat( " ", @class, " " ), concat( " ", "center", " " ))]') %>%
html_table() %>% .[[1]] %>% row_to_names(1) %>% clean_names()
transactions<<-bind_rows(transactions,new_transact_df)
print(x)
})
return(transactions)
}
transactions=get_pro_sports_transact(begin_date="2011-12-25",end_date="2021-10-18")
x2022_transact=get_pro_sports_transact(begin_date="2021-10-19",end_date="2022-10-17") #as of Jan 6 2023
clean_transacts<-function(transact_df){
cleaned_df=transact_df %>% mutate(date=ymd(date)) %>% na_if("") %>%
#combine two cols into one player column
mutate(player=coalesce(acquired,relinquished),.before="notes") %>%
select(-c(acquired,relinquished)) %>%
#remove front office & coaching staff
filter(str_detect(
notes,"coach|GM|general manager|basketball operations|owner|fired|hired|promote|demote|resigned|renamed",negate = TRUE)) %>%
filter(str_detect(notes,"trade|NBA draft|first round pick",negate = TRUE)) %>%
mutate(player=str_replace(player,"• ",""))
return(cleaned_df)
}
cleaned_transact=clean_transacts(transactions) %>%
#season corresponds from first day of games to day before first day of games of next season
mutate(season=case_when(
date %within% interval(ymd("2020-12-22"),ymd("2021-10-18"))~2021,
date %within% interval(ymd("2019-10-22"),ymd("2020-12-21"))~2020,
date %within% interval(ymd("2018-10-16"),ymd("2019-10-21"))~2019,
date %within% interval(ymd("2017-10-17"),ymd("2018-10-15"))~2018,
date %within% interval(ymd("2016-10-25"),ymd("2017-10-16"))~2017,
date %within% interval(ymd("2015-10-27"),ymd("2016-10-24"))~2016,
date %within% interval(ymd("2014-10-28"),ymd("2015-10-26"))~2015,
date %within% interval(ymd("2013-10-29"),ymd("2014-10-27"))~2014,
date %within% interval(ymd("2012-10-30"),ymd("2013-10-28"))~2013,
date %within% interval(ymd("2011-12-25"),ymd("2012-10-29"))~2012))
cleaned_2022_transact=clean_transacts(x2022_transact) %>% mutate(season=2022)
#include option years, partial guaranteed years in counting contract years
write.xlsx(cleaned_transact,"Data/Transactions 2012-2021.xlsx")
write.xlsx(cleaned_2022_transact,"Data/Transactions 2022.xlsx")