diff --git a/tweets/1711818907449766233/Career WAR by Debut Team.Rmd b/tweets/1711818907449766233/Career WAR by Debut Team.Rmd new file mode 100644 index 0000000..f930c6b --- /dev/null +++ b/tweets/1711818907449766233/Career WAR by Debut Team.Rmd @@ -0,0 +1,134 @@ +# Setup +```{r setup, message = FALSE} +library(jsonlite) +library(dplyr) +library(Lahman) +library(baseballr) +library(ggplot2) +library(ggtext) + +player_ids.df <- playerid_lookup(last_name = "", first_name = "") +``` + +# Each MLB Player's 1st Team (1871 - 2022) +```{r} +# Debut teams +debut.teams.df <- + union(Lahman::Batting %>% + filter(stint == 1) %>% + select(playerID, yearID, teamID), + Lahman::Pitching %>% + filter(stint == 1) %>% + select(playerID, yearID, teamID) + ) %>% + group_by(playerID) %>% + arrange(yearID) %>% + filter(row_number() == 1) %>% + left_join(Lahman::Teams %>% select(yearID, teamID, franchID), + by = c("yearID" = "yearID", "teamID" = "teamID")) %>% + left_join(Lahman::People %>% select(playerID, bbrefID), by = "playerID") %>% + left_join(player_ids.df %>% select(bbref_id, fangraphs_id), + by = c("bbrefID" = "bbref_id")) %>% + ungroup() %>% + select(fangraphs_id, franchID) +debut.teams.df %>% head(10) +``` + +# Career fWAR for Each Player +## Batting +```{r} +# Batter Seasons +base.url <- "https://www.fangraphs.com/api/leaders/major-league/data" +query.params <- list(age = "", pos = "all", stats = "bat", lg = "al%2Cnl", + qual = "0", season = "2022", season1 = "1871", + startdate = "", enddate = "", month = "0", hand = "", + team = "0", pageitems = "2000000000", pagenum = "1", + ind = "0", rost = "0", players = "", type = "8", + sortdir = "default", sortstat = "WAR") +sep = "?" +url <- base.url +for (param in names(query.params)) { + url <- paste(url, sep, param, "=", query.params[[param]], sep = "") + sep = "&" +} +batter.seasons.df <- fromJSON(url)$data %>% + select(Name, WAR) %>% + mutate(Type = "Batter") +batter.seasons.df %>% head(10) +``` + +## Pitching +```{r} +# Pitcher Seasons +query.params$stats <- "pit" + +sep = "?" +url <- base.url +for (param in names(query.params)) { + url <- paste(url, sep, param, "=", query.params[[param]], sep = "") + sep = "&" +} +pitcher.seasons.df <- fromJSON(url)$data %>% + select(Name, WAR) %>% + mutate(Type = "Pitcher") +pitcher.seasons.df %>% head(10) +``` + +## Combine Batting & Pitching +```{r} +war.df <- union(batter.seasons.df, pitcher.seasons.df) %>% + mutate(fangraphs_id = as.integer(gsub(".*playerid=(.*)&.*", "\\1", Name)), + Name = gsub(".*>(.*)<.*", "\\1", Name)) %>% + group_by(fangraphs_id) %>% + summarise(Name = first(Name), WAR = sum(WAR)) %>% + full_join(debut.teams.df, by = "fangraphs_id") %>% + rename(debut_franchise = franchID) +war.df %>% head(10) +``` + +## Data Quality Check +```{r} +cat(scales::percent(sum(is.na(war.df$debut_franchise)) / + length(war.df$debut_franchise), accuracy = 0.1), + "of player IDs were not mapped successfully") +war.df %>% filter(is.na(debut_franchise)) %>% arrange(desc(WAR)) +``` + +# Aggregate fWAR by Franchise +```{r} +team.war.df <- war.df %>% + group_by(debut_franchise) %>% + summarise(WAR = sum(WAR, na.rm = TRUE), players = n()) %>% + arrange(WAR) %>% + tail(31) %>% + mutate( + `WAR/player` = WAR / players, + logo = ifelse( + debut_franchise != "LOU", + paste("", sep = ""), + "" + ) + ) +team.war.df$logo <- factor(team.war.df$logo, levels = team.war.df$logo) +team.war.df %>% head(10) +``` + +# Graph +```{r} +team.war.sum.plot <- ggplot(team.war.df, aes(x = reorder(logo, WAR), y = WAR)) + + geom_col(fill = "#69b3a2") + + geom_text(aes(label = round(WAR, digits = 0)), color = "white", size = 6, + hjust = 1.1) + + coord_flip() + + scale_x_discrete(name = NULL, labels = team.war.df$logo) + + ggtitle("Career WAR by Players who Debuted with Each Franchise (1871-2022)") + + theme(plot.title = element_text(size = 22), + axis.text.x = element_text(size = 14), + axis.title.x = element_text(size = 16, face = "bold"), + axis.text.y = element_markdown()) + +ggsave("~/Desktop/peteb206.github.io/tweets/1/team_war_sum.png", + plot = team.war.sum.plot, width = 15, height = 12) +team.war.sum.plot +``` \ No newline at end of file diff --git a/tweets/1711818907449766233/Career-WAR-by-Debut-Team.html b/tweets/1711818907449766233/Career-WAR-by-Debut-Team.html new file mode 100644 index 0000000..9bd9068 --- /dev/null +++ b/tweets/1711818907449766233/Career-WAR-by-Debut-Team.html @@ -0,0 +1,635 @@ + + + + + + + + + + + + + + + +Career WAR by Debut Team + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Setup

+
library(jsonlite)
+library(dplyr)
+library(Lahman)
+library(baseballr)
+library(ggplot2)
+library(ggtext)
+
+player_ids.df <- playerid_lookup(last_name = "", first_name = "")
+
+
+

Each MLB Player’s 1st Team (1871 - 2022)

+
# Debut teams
+debut.teams.df <-
+  union(Lahman::Batting %>%
+          filter(stint == 1) %>%
+          select(playerID, yearID, teamID),
+        Lahman::Pitching %>%
+          filter(stint == 1) %>%
+          select(playerID, yearID, teamID)
+  ) %>%
+  group_by(playerID) %>%
+  arrange(yearID) %>%
+  filter(row_number() == 1) %>%
+  left_join(Lahman::Teams %>% select(yearID, teamID, franchID),
+            by = c("yearID" = "yearID", "teamID" = "teamID")) %>%
+  left_join(Lahman::People %>% select(playerID, bbrefID), by = "playerID") %>%
+  left_join(player_ids.df %>% select(bbref_id, fangraphs_id),
+            by = c("bbrefID" = "bbref_id")) %>%
+  ungroup() %>%
+  select(fangraphs_id, franchID)
+debut.teams.df %>% head(10)
+
## # A tibble: 10 × 2
+##    fangraphs_id franchID
+##           <int> <fct>   
+##  1      1000017 TRO     
+##  2      1000070 ROK     
+##  3      1000164 CFC     
+##  4      1000168 OLY     
+##  5      1000272 ROK     
+##  6      1000310 KEK     
+##  7      1000543 ROK     
+##  8      1000573 BNA     
+##  9      1000591 KEK     
+## 10      1000608 BNA
+
+
+

Career fWAR for Each Player

+
+

Batting

+
# Batter Seasons
+base.url <- "https://www.fangraphs.com/api/leaders/major-league/data"
+query.params <- list(age = "", pos = "all", stats = "bat", lg = "al%2Cnl",
+                     qual = "0", season = "2022", season1 = "1871",
+                     startdate = "", enddate = "", month = "0", hand = "",
+                     team = "0", pageitems = "2000000000", pagenum = "1",
+                     ind = "0", rost = "0", players = "", type = "8",
+                     sortdir = "default", sortstat = "WAR")
+sep = "?"
+url <- base.url
+for (param in names(query.params)) {
+  url <- paste(url, sep, param, "=", query.params[[param]], sep = "")
+  sep = "&"
+}
+batter.seasons.df <- fromJSON(url)$data %>%
+  select(Name, WAR) %>%
+  mutate(Type = "Batter")
+batter.seasons.df %>% head(10)
+
##                                                                     Name
+## 1       <a href="statss.aspx?playerid=1011327&position=OF">Babe Ruth</a>
+## 2        <a href="statss.aspx?playerid=1109&position=OF">Barry Bonds</a>
+## 3     <a href="statss.aspx?playerid=1008315&position=OF">Willie Mays</a>
+## 4         <a href="statss.aspx?playerid=1002378&position=OF">Ty Cobb</a>
+## 5    <a href="statss.aspx?playerid=1013485&position=SS">Honus Wagner</a>
+## 6      <a href="statss.aspx?playerid=1000001&position=OF">Hank Aaron</a>
+## 7    <a href="statss.aspx?playerid=1012309&position=OF">Tris Speaker</a>
+## 8    <a href="statss.aspx?playerid=1014040&position=OF">Ted Williams</a>
+## 9  <a href="statss.aspx?playerid=1006030&position=2B">Rogers Hornsby</a>
+## 10 <a href="statss.aspx?playerid=1009405&position=1B/OF">Stan Musial</a>
+##         WAR   Type
+## 1  167.0179 Batter
+## 2  164.4482 Batter
+## 3  149.8535 Batter
+## 4  149.0767 Batter
+## 5  138.0926 Batter
+## 6  136.3079 Batter
+## 7  130.1979 Batter
+## 8  129.7669 Batter
+## 9  129.1392 Batter
+## 10 126.3664 Batter
+
+
+

Pitching

+
# Pitcher Seasons
+query.params$stats <- "pit"
+
+sep = "?"
+url <- base.url
+for (param in names(query.params)) {
+  url <- paste(url, sep, param, "=", query.params[[param]], sep = "")
+  sep = "&"
+}
+pitcher.seasons.df <- fromJSON(url)$data %>%
+  select(Name, WAR) %>%
+  mutate(Type = "Pitcher")
+pitcher.seasons.df %>% head(10)
+
##                                                                    Name
+## 1       <a href="statss.aspx?playerid=815&position=P">Roger Clemens</a>
+## 2        <a href="statss.aspx?playerid=1014369&position=P">Cy Young</a>
+## 3         <a href="statss.aspx?playerid=104&position=P">Greg Maddux</a>
+## 4  <a href="statss.aspx?playerid=1006511&position=P">Walter Johnson</a>
+## 5        <a href="statss.aspx?playerid=60&position=P">Randy Johnson</a>
+## 6      <a href="statss.aspx?playerid=1011348&position=P">Nolan Ryan</a>
+## 7   <a href="statss.aspx?playerid=1001098&position=P">Bert Blyleven</a>
+## 8   <a href="statss.aspx?playerid=1010210&position=P">Gaylord Perry</a>
+## 9   <a href="statss.aspx?playerid=1001964&position=P">Steve Carlton</a>
+## 10 <a href="statss.aspx?playerid=1000128&position=P">Pete Alexander</a>
+##          WAR    Type
+## 1  133.68920 Pitcher
+## 2  131.46872 Pitcher
+## 3  116.66175 Pitcher
+## 4  116.41905 Pitcher
+## 5  110.45433 Pitcher
+## 6  106.74577 Pitcher
+## 7  102.91322 Pitcher
+## 8  100.06021 Pitcher
+## 9   96.45129 Pitcher
+## 10  95.74027 Pitcher
+
+
+

Combine Batting & Pitching

+
war.df <- union(batter.seasons.df, pitcher.seasons.df) %>%
+  mutate(fangraphs_id = as.integer(gsub(".*playerid=(.*)&.*", "\\1", Name)),
+         Name = gsub(".*>(.*)<.*", "\\1", Name)) %>%
+  group_by(fangraphs_id) %>%
+  summarise(Name = first(Name), WAR = sum(WAR)) %>%
+  full_join(debut.teams.df, by = "fangraphs_id") %>%
+  rename(debut_franchise = franchID)
+war.df %>% head(10)
+
## # A tibble: 10 × 4
+##    fangraphs_id Name                WAR debut_franchise
+##           <int> <chr>             <dbl> <fct>          
+##  1            1 Alfredo Amezaga  3.22   ANA            
+##  2            2 Garret Anderson 23.9    ANA            
+##  3            3 Kevin Appier    50.4    KCR            
+##  4            4 Larry Barnes    -0.567  ANA            
+##  5            5 Scott Patterson  0.0469 NYY            
+##  6            6 Jamie Burke      0.491  ANA            
+##  7            7 Mickey Callaway  0.843  TBD            
+##  8            8 Dennis Cook      5.31   SFG            
+##  9           10 David Eckstein  16.7    ANA            
+## 10           11 Darin Erstad    28.5    ANA
+
+
+

Data Quality Check

+
cat(scales::percent(sum(is.na(war.df$debut_franchise)) /
+                      length(war.df$debut_franchise), accuracy = 0.1),
+    "of player IDs were not mapped successfully")
+
## 0.1% of player IDs were not mapped successfully
+
war.df %>% filter(is.na(debut_franchise)) %>% arrange(desc(WAR))
+
## # A tibble: 21 × 4
+##    fangraphs_id Name                 WAR debut_franchise
+##           <int> <chr>              <dbl> <fct>          
+##  1      1010104 "George Pearce"   3.73   <NA>           
+##  2      1000777 "Ed Begley"       0.555  <NA>           
+##  3      1010739 "Rip Reagan"      0.271  <NA>           
+##  4      1010882 "Bill Rhodes"     0.161  <NA>           
+##  5      1000511 "Bill Banks"      0.0872 <NA>           
+##  6      1009235 "Sparrow Morton"  0.0719 <NA>           
+##  7      1008462 "Jim McDonald"    0.0179 <NA>           
+##  8      1005394 "Ben Harrison"   -0.0173 <NA>           
+##  9      1010128 "Monte Peffer"   -0.0240 <NA>           
+## 10      1002444 " Collins"       -0.0451 <NA>           
+## # ℹ 11 more rows
+
+
+
+

Aggregate fWAR by Franchise

+
team.war.df <- war.df %>%
+    group_by(debut_franchise) %>%
+    summarise(WAR = sum(WAR, na.rm = TRUE), players = n()) %>%
+    arrange(WAR) %>%
+    tail(31) %>%
+    mutate(
+      `WAR/player` = WAR / players,
+      logo = ifelse(
+        debut_franchise != "LOU",
+        paste("<img src=\"https://cdn.ssref.net/req/202310031/tlogo/br/",
+              debut_franchise, ".png\" height=\"25\"/>", sep = ""),
+        "<img src=\"https://upload.wikimedia.org/wikipedia/en/4/4a/LouisvilleColonelsLogo.PNG\" height=\"25\"/>"
+      )
+    )
+team.war.df$logo <- factor(team.war.df$logo, levels = team.war.df$logo)
+team.war.df %>% head(10)
+
## # A tibble: 10 × 5
+##    debut_franchise   WAR players `WAR/player` logo                              
+##    <fct>           <dbl>   <int>        <dbl> <fct>                             
+##  1 COL              630.     217         2.90 "<img src=\"https://cdn.ssref.net…
+##  2 ARI              636.     199         3.19 "<img src=\"https://cdn.ssref.net…
+##  3 LOU              660.     150         4.40 "<img src=\"https://upload.wikime…
+##  4 TBD              679.     172         3.95 "<img src=\"https://cdn.ssref.net…
+##  5 FLA              937.     254         3.69 "<img src=\"https://cdn.ssref.net…
+##  6 SDP             1340.     408         3.28 "<img src=\"https://cdn.ssref.net…
+##  7 KCR             1445.     372         3.88 "<img src=\"https://cdn.ssref.net…
+##  8 MIL             1459.     317         4.60 "<img src=\"https://cdn.ssref.net…
+##  9 TOR             1571.     324         4.85 "<img src=\"https://cdn.ssref.net…
+## 10 SEA             1572.     336         4.68 "<img src=\"https://cdn.ssref.net…
+
+
+

Graph

+
team.war.sum.plot <- ggplot(team.war.df, aes(x = reorder(logo, WAR), y = WAR)) +
+  geom_col(fill = "#69b3a2") +
+  geom_text(aes(label = round(WAR, digits = 0)), color = "white", size = 6,
+            hjust = 1.1) +
+  coord_flip() +
+  scale_x_discrete(name = NULL, labels = team.war.df$logo) +
+  ggtitle("Career WAR by Players who Debuted with Each Franchise (1871-2022)") +
+  theme(plot.title = element_text(size = 22),
+        axis.text.x = element_text(size = 14),
+        axis.title.x = element_text(size = 16, face = "bold"),
+        axis.text.y = element_markdown())
+
+ggsave("~/Desktop/peteb206.github.io/tweets/1/team_war_sum.png",
+       plot = team.war.sum.plot, width = 15, height = 12)
+team.war.sum.plot
+

+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/tweets/1711818907449766233/rsconnect/documents/Career WAR by Debut Team.Rmd/rpubs.com/rpubs/Document.dcf b/tweets/1711818907449766233/rsconnect/documents/Career WAR by Debut Team.Rmd/rpubs.com/rpubs/Document.dcf new file mode 100644 index 0000000..399d552 --- /dev/null +++ b/tweets/1711818907449766233/rsconnect/documents/Career WAR by Debut Team.Rmd/rpubs.com/rpubs/Document.dcf @@ -0,0 +1,11 @@ +name: Document +title: +username: +account: rpubs +server: rpubs.com +hostUrl: rpubs.com +appId: https://api.rpubs.com/api/v1/document/1096686/2873aea40f2441ca9f415a694b728be4 +bundleId: https://api.rpubs.com/api/v1/document/1096686/2873aea40f2441ca9f415a694b728be4 +url: http://rpubs.com/publish/claim/1096686/cbd69df8f04047b28f18f6bb79887a1f +when: 1696963013.60974 +lastSyncTime: 1696963013.60975 diff --git a/tweets/1711818907449766233/team_war_sum.png b/tweets/1711818907449766233/team_war_sum.png new file mode 100644 index 0000000..c9cb388 Binary files /dev/null and b/tweets/1711818907449766233/team_war_sum.png differ