diff --git a/tweets/1711818907449766233/Career WAR by Debut Team.Rmd b/tweets/1711818907449766233/Career WAR by Debut Team.Rmd new file mode 100644 index 0000000..f930c6b --- /dev/null +++ b/tweets/1711818907449766233/Career WAR by Debut Team.Rmd @@ -0,0 +1,134 @@ +# Setup +```{r setup, message = FALSE} +library(jsonlite) +library(dplyr) +library(Lahman) +library(baseballr) +library(ggplot2) +library(ggtext) + +player_ids.df <- playerid_lookup(last_name = "", first_name = "") +``` + +# Each MLB Player's 1st Team (1871 - 2022) +```{r} +# Debut teams +debut.teams.df <- + union(Lahman::Batting %>% + filter(stint == 1) %>% + select(playerID, yearID, teamID), + Lahman::Pitching %>% + filter(stint == 1) %>% + select(playerID, yearID, teamID) + ) %>% + group_by(playerID) %>% + arrange(yearID) %>% + filter(row_number() == 1) %>% + left_join(Lahman::Teams %>% select(yearID, teamID, franchID), + by = c("yearID" = "yearID", "teamID" = "teamID")) %>% + left_join(Lahman::People %>% select(playerID, bbrefID), by = "playerID") %>% + left_join(player_ids.df %>% select(bbref_id, fangraphs_id), + by = c("bbrefID" = "bbref_id")) %>% + ungroup() %>% + select(fangraphs_id, franchID) +debut.teams.df %>% head(10) +``` + +# Career fWAR for Each Player +## Batting +```{r} +# Batter Seasons +base.url <- "https://www.fangraphs.com/api/leaders/major-league/data" +query.params <- list(age = "", pos = "all", stats = "bat", lg = "al%2Cnl", + qual = "0", season = "2022", season1 = "1871", + startdate = "", enddate = "", month = "0", hand = "", + team = "0", pageitems = "2000000000", pagenum = "1", + ind = "0", rost = "0", players = "", type = "8", + sortdir = "default", sortstat = "WAR") +sep = "?" +url <- base.url +for (param in names(query.params)) { + url <- paste(url, sep, param, "=", query.params[[param]], sep = "") + sep = "&" +} +batter.seasons.df <- fromJSON(url)$data %>% + select(Name, WAR) %>% + mutate(Type = "Batter") +batter.seasons.df %>% head(10) +``` + +## Pitching +```{r} +# Pitcher Seasons +query.params$stats <- "pit" + +sep = "?" +url <- base.url +for (param in names(query.params)) { + url <- paste(url, sep, param, "=", query.params[[param]], sep = "") + sep = "&" +} +pitcher.seasons.df <- fromJSON(url)$data %>% + select(Name, WAR) %>% + mutate(Type = "Pitcher") +pitcher.seasons.df %>% head(10) +``` + +## Combine Batting & Pitching +```{r} +war.df <- union(batter.seasons.df, pitcher.seasons.df) %>% + mutate(fangraphs_id = as.integer(gsub(".*playerid=(.*)&.*", "\\1", Name)), + Name = gsub(".*>(.*)<.*", "\\1", Name)) %>% + group_by(fangraphs_id) %>% + summarise(Name = first(Name), WAR = sum(WAR)) %>% + full_join(debut.teams.df, by = "fangraphs_id") %>% + rename(debut_franchise = franchID) +war.df %>% head(10) +``` + +## Data Quality Check +```{r} +cat(scales::percent(sum(is.na(war.df$debut_franchise)) / + length(war.df$debut_franchise), accuracy = 0.1), + "of player IDs were not mapped successfully") +war.df %>% filter(is.na(debut_franchise)) %>% arrange(desc(WAR)) +``` + +# Aggregate fWAR by Franchise +```{r} +team.war.df <- war.df %>% + group_by(debut_franchise) %>% + summarise(WAR = sum(WAR, na.rm = TRUE), players = n()) %>% + arrange(WAR) %>% + tail(31) %>% + mutate( + `WAR/player` = WAR / players, + logo = ifelse( + debut_franchise != "LOU", + paste("", sep = ""), + "" + ) + ) +team.war.df$logo <- factor(team.war.df$logo, levels = team.war.df$logo) +team.war.df %>% head(10) +``` + +# Graph +```{r} +team.war.sum.plot <- ggplot(team.war.df, aes(x = reorder(logo, WAR), y = WAR)) + + geom_col(fill = "#69b3a2") + + geom_text(aes(label = round(WAR, digits = 0)), color = "white", size = 6, + hjust = 1.1) + + coord_flip() + + scale_x_discrete(name = NULL, labels = team.war.df$logo) + + ggtitle("Career WAR by Players who Debuted with Each Franchise (1871-2022)") + + theme(plot.title = element_text(size = 22), + axis.text.x = element_text(size = 14), + axis.title.x = element_text(size = 16, face = "bold"), + axis.text.y = element_markdown()) + +ggsave("~/Desktop/peteb206.github.io/tweets/1/team_war_sum.png", + plot = team.war.sum.plot, width = 15, height = 12) +team.war.sum.plot +``` \ No newline at end of file diff --git a/tweets/1711818907449766233/Career-WAR-by-Debut-Team.html b/tweets/1711818907449766233/Career-WAR-by-Debut-Team.html new file mode 100644 index 0000000..9bd9068 --- /dev/null +++ b/tweets/1711818907449766233/Career-WAR-by-Debut-Team.html @@ -0,0 +1,635 @@ + + + + +
+ + + + + + + + + + +library(jsonlite)
+library(dplyr)
+library(Lahman)
+library(baseballr)
+library(ggplot2)
+library(ggtext)
+
+player_ids.df <- playerid_lookup(last_name = "", first_name = "")
+# Debut teams
+debut.teams.df <-
+ union(Lahman::Batting %>%
+ filter(stint == 1) %>%
+ select(playerID, yearID, teamID),
+ Lahman::Pitching %>%
+ filter(stint == 1) %>%
+ select(playerID, yearID, teamID)
+ ) %>%
+ group_by(playerID) %>%
+ arrange(yearID) %>%
+ filter(row_number() == 1) %>%
+ left_join(Lahman::Teams %>% select(yearID, teamID, franchID),
+ by = c("yearID" = "yearID", "teamID" = "teamID")) %>%
+ left_join(Lahman::People %>% select(playerID, bbrefID), by = "playerID") %>%
+ left_join(player_ids.df %>% select(bbref_id, fangraphs_id),
+ by = c("bbrefID" = "bbref_id")) %>%
+ ungroup() %>%
+ select(fangraphs_id, franchID)
+debut.teams.df %>% head(10)
+## # A tibble: 10 × 2
+## fangraphs_id franchID
+## <int> <fct>
+## 1 1000017 TRO
+## 2 1000070 ROK
+## 3 1000164 CFC
+## 4 1000168 OLY
+## 5 1000272 ROK
+## 6 1000310 KEK
+## 7 1000543 ROK
+## 8 1000573 BNA
+## 9 1000591 KEK
+## 10 1000608 BNA
+# Batter Seasons
+base.url <- "https://www.fangraphs.com/api/leaders/major-league/data"
+query.params <- list(age = "", pos = "all", stats = "bat", lg = "al%2Cnl",
+ qual = "0", season = "2022", season1 = "1871",
+ startdate = "", enddate = "", month = "0", hand = "",
+ team = "0", pageitems = "2000000000", pagenum = "1",
+ ind = "0", rost = "0", players = "", type = "8",
+ sortdir = "default", sortstat = "WAR")
+sep = "?"
+url <- base.url
+for (param in names(query.params)) {
+ url <- paste(url, sep, param, "=", query.params[[param]], sep = "")
+ sep = "&"
+}
+batter.seasons.df <- fromJSON(url)$data %>%
+ select(Name, WAR) %>%
+ mutate(Type = "Batter")
+batter.seasons.df %>% head(10)
+## Name
+## 1 <a href="statss.aspx?playerid=1011327&position=OF">Babe Ruth</a>
+## 2 <a href="statss.aspx?playerid=1109&position=OF">Barry Bonds</a>
+## 3 <a href="statss.aspx?playerid=1008315&position=OF">Willie Mays</a>
+## 4 <a href="statss.aspx?playerid=1002378&position=OF">Ty Cobb</a>
+## 5 <a href="statss.aspx?playerid=1013485&position=SS">Honus Wagner</a>
+## 6 <a href="statss.aspx?playerid=1000001&position=OF">Hank Aaron</a>
+## 7 <a href="statss.aspx?playerid=1012309&position=OF">Tris Speaker</a>
+## 8 <a href="statss.aspx?playerid=1014040&position=OF">Ted Williams</a>
+## 9 <a href="statss.aspx?playerid=1006030&position=2B">Rogers Hornsby</a>
+## 10 <a href="statss.aspx?playerid=1009405&position=1B/OF">Stan Musial</a>
+## WAR Type
+## 1 167.0179 Batter
+## 2 164.4482 Batter
+## 3 149.8535 Batter
+## 4 149.0767 Batter
+## 5 138.0926 Batter
+## 6 136.3079 Batter
+## 7 130.1979 Batter
+## 8 129.7669 Batter
+## 9 129.1392 Batter
+## 10 126.3664 Batter
+# Pitcher Seasons
+query.params$stats <- "pit"
+
+sep = "?"
+url <- base.url
+for (param in names(query.params)) {
+ url <- paste(url, sep, param, "=", query.params[[param]], sep = "")
+ sep = "&"
+}
+pitcher.seasons.df <- fromJSON(url)$data %>%
+ select(Name, WAR) %>%
+ mutate(Type = "Pitcher")
+pitcher.seasons.df %>% head(10)
+## Name
+## 1 <a href="statss.aspx?playerid=815&position=P">Roger Clemens</a>
+## 2 <a href="statss.aspx?playerid=1014369&position=P">Cy Young</a>
+## 3 <a href="statss.aspx?playerid=104&position=P">Greg Maddux</a>
+## 4 <a href="statss.aspx?playerid=1006511&position=P">Walter Johnson</a>
+## 5 <a href="statss.aspx?playerid=60&position=P">Randy Johnson</a>
+## 6 <a href="statss.aspx?playerid=1011348&position=P">Nolan Ryan</a>
+## 7 <a href="statss.aspx?playerid=1001098&position=P">Bert Blyleven</a>
+## 8 <a href="statss.aspx?playerid=1010210&position=P">Gaylord Perry</a>
+## 9 <a href="statss.aspx?playerid=1001964&position=P">Steve Carlton</a>
+## 10 <a href="statss.aspx?playerid=1000128&position=P">Pete Alexander</a>
+## WAR Type
+## 1 133.68920 Pitcher
+## 2 131.46872 Pitcher
+## 3 116.66175 Pitcher
+## 4 116.41905 Pitcher
+## 5 110.45433 Pitcher
+## 6 106.74577 Pitcher
+## 7 102.91322 Pitcher
+## 8 100.06021 Pitcher
+## 9 96.45129 Pitcher
+## 10 95.74027 Pitcher
+war.df <- union(batter.seasons.df, pitcher.seasons.df) %>%
+ mutate(fangraphs_id = as.integer(gsub(".*playerid=(.*)&.*", "\\1", Name)),
+ Name = gsub(".*>(.*)<.*", "\\1", Name)) %>%
+ group_by(fangraphs_id) %>%
+ summarise(Name = first(Name), WAR = sum(WAR)) %>%
+ full_join(debut.teams.df, by = "fangraphs_id") %>%
+ rename(debut_franchise = franchID)
+war.df %>% head(10)
+## # A tibble: 10 × 4
+## fangraphs_id Name WAR debut_franchise
+## <int> <chr> <dbl> <fct>
+## 1 1 Alfredo Amezaga 3.22 ANA
+## 2 2 Garret Anderson 23.9 ANA
+## 3 3 Kevin Appier 50.4 KCR
+## 4 4 Larry Barnes -0.567 ANA
+## 5 5 Scott Patterson 0.0469 NYY
+## 6 6 Jamie Burke 0.491 ANA
+## 7 7 Mickey Callaway 0.843 TBD
+## 8 8 Dennis Cook 5.31 SFG
+## 9 10 David Eckstein 16.7 ANA
+## 10 11 Darin Erstad 28.5 ANA
+cat(scales::percent(sum(is.na(war.df$debut_franchise)) /
+ length(war.df$debut_franchise), accuracy = 0.1),
+ "of player IDs were not mapped successfully")
+## 0.1% of player IDs were not mapped successfully
+war.df %>% filter(is.na(debut_franchise)) %>% arrange(desc(WAR))
+## # A tibble: 21 × 4
+## fangraphs_id Name WAR debut_franchise
+## <int> <chr> <dbl> <fct>
+## 1 1010104 "George Pearce" 3.73 <NA>
+## 2 1000777 "Ed Begley" 0.555 <NA>
+## 3 1010739 "Rip Reagan" 0.271 <NA>
+## 4 1010882 "Bill Rhodes" 0.161 <NA>
+## 5 1000511 "Bill Banks" 0.0872 <NA>
+## 6 1009235 "Sparrow Morton" 0.0719 <NA>
+## 7 1008462 "Jim McDonald" 0.0179 <NA>
+## 8 1005394 "Ben Harrison" -0.0173 <NA>
+## 9 1010128 "Monte Peffer" -0.0240 <NA>
+## 10 1002444 " Collins" -0.0451 <NA>
+## # ℹ 11 more rows
+team.war.df <- war.df %>%
+ group_by(debut_franchise) %>%
+ summarise(WAR = sum(WAR, na.rm = TRUE), players = n()) %>%
+ arrange(WAR) %>%
+ tail(31) %>%
+ mutate(
+ `WAR/player` = WAR / players,
+ logo = ifelse(
+ debut_franchise != "LOU",
+ paste("<img src=\"https://cdn.ssref.net/req/202310031/tlogo/br/",
+ debut_franchise, ".png\" height=\"25\"/>", sep = ""),
+ "<img src=\"https://upload.wikimedia.org/wikipedia/en/4/4a/LouisvilleColonelsLogo.PNG\" height=\"25\"/>"
+ )
+ )
+team.war.df$logo <- factor(team.war.df$logo, levels = team.war.df$logo)
+team.war.df %>% head(10)
+## # A tibble: 10 × 5
+## debut_franchise WAR players `WAR/player` logo
+## <fct> <dbl> <int> <dbl> <fct>
+## 1 COL 630. 217 2.90 "<img src=\"https://cdn.ssref.net…
+## 2 ARI 636. 199 3.19 "<img src=\"https://cdn.ssref.net…
+## 3 LOU 660. 150 4.40 "<img src=\"https://upload.wikime…
+## 4 TBD 679. 172 3.95 "<img src=\"https://cdn.ssref.net…
+## 5 FLA 937. 254 3.69 "<img src=\"https://cdn.ssref.net…
+## 6 SDP 1340. 408 3.28 "<img src=\"https://cdn.ssref.net…
+## 7 KCR 1445. 372 3.88 "<img src=\"https://cdn.ssref.net…
+## 8 MIL 1459. 317 4.60 "<img src=\"https://cdn.ssref.net…
+## 9 TOR 1571. 324 4.85 "<img src=\"https://cdn.ssref.net…
+## 10 SEA 1572. 336 4.68 "<img src=\"https://cdn.ssref.net…
+team.war.sum.plot <- ggplot(team.war.df, aes(x = reorder(logo, WAR), y = WAR)) +
+ geom_col(fill = "#69b3a2") +
+ geom_text(aes(label = round(WAR, digits = 0)), color = "white", size = 6,
+ hjust = 1.1) +
+ coord_flip() +
+ scale_x_discrete(name = NULL, labels = team.war.df$logo) +
+ ggtitle("Career WAR by Players who Debuted with Each Franchise (1871-2022)") +
+ theme(plot.title = element_text(size = 22),
+ axis.text.x = element_text(size = 14),
+ axis.title.x = element_text(size = 16, face = "bold"),
+ axis.text.y = element_markdown())
+
+ggsave("~/Desktop/peteb206.github.io/tweets/1/team_war_sum.png",
+ plot = team.war.sum.plot, width = 15, height = 12)
+team.war.sum.plot
+
+