Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
peteb206 committed Oct 10, 2023
1 parent d401e93 commit 2a9dbe3
Show file tree
Hide file tree
Showing 4 changed files with 780 additions and 0 deletions.
134 changes: 134 additions & 0 deletions tweets/1711818907449766233/Career WAR by Debut Team.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Setup
```{r setup, message = FALSE}
library(jsonlite)
library(dplyr)
library(Lahman)
library(baseballr)
library(ggplot2)
library(ggtext)
player_ids.df <- playerid_lookup(last_name = "", first_name = "")
```

# Each MLB Player's 1st Team (1871 - 2022)
```{r}
# Debut teams
debut.teams.df <-
union(Lahman::Batting %>%
filter(stint == 1) %>%
select(playerID, yearID, teamID),
Lahman::Pitching %>%
filter(stint == 1) %>%
select(playerID, yearID, teamID)
) %>%
group_by(playerID) %>%
arrange(yearID) %>%
filter(row_number() == 1) %>%
left_join(Lahman::Teams %>% select(yearID, teamID, franchID),
by = c("yearID" = "yearID", "teamID" = "teamID")) %>%
left_join(Lahman::People %>% select(playerID, bbrefID), by = "playerID") %>%
left_join(player_ids.df %>% select(bbref_id, fangraphs_id),
by = c("bbrefID" = "bbref_id")) %>%
ungroup() %>%
select(fangraphs_id, franchID)
debut.teams.df %>% head(10)
```

# Career fWAR for Each Player
## Batting
```{r}
# Batter Seasons
base.url <- "https://www.fangraphs.com/api/leaders/major-league/data"
query.params <- list(age = "", pos = "all", stats = "bat", lg = "al%2Cnl",
qual = "0", season = "2022", season1 = "1871",
startdate = "", enddate = "", month = "0", hand = "",
team = "0", pageitems = "2000000000", pagenum = "1",
ind = "0", rost = "0", players = "", type = "8",
sortdir = "default", sortstat = "WAR")
sep = "?"
url <- base.url
for (param in names(query.params)) {
url <- paste(url, sep, param, "=", query.params[[param]], sep = "")
sep = "&"
}
batter.seasons.df <- fromJSON(url)$data %>%
select(Name, WAR) %>%
mutate(Type = "Batter")
batter.seasons.df %>% head(10)
```

## Pitching
```{r}
# Pitcher Seasons
query.params$stats <- "pit"
sep = "?"
url <- base.url
for (param in names(query.params)) {
url <- paste(url, sep, param, "=", query.params[[param]], sep = "")
sep = "&"
}
pitcher.seasons.df <- fromJSON(url)$data %>%
select(Name, WAR) %>%
mutate(Type = "Pitcher")
pitcher.seasons.df %>% head(10)
```

## Combine Batting & Pitching
```{r}
war.df <- union(batter.seasons.df, pitcher.seasons.df) %>%
mutate(fangraphs_id = as.integer(gsub(".*playerid=(.*)&.*", "\\1", Name)),
Name = gsub(".*>(.*)<.*", "\\1", Name)) %>%
group_by(fangraphs_id) %>%
summarise(Name = first(Name), WAR = sum(WAR)) %>%
full_join(debut.teams.df, by = "fangraphs_id") %>%
rename(debut_franchise = franchID)
war.df %>% head(10)
```

## Data Quality Check
```{r}
cat(scales::percent(sum(is.na(war.df$debut_franchise)) /
length(war.df$debut_franchise), accuracy = 0.1),
"of player IDs were not mapped successfully")
war.df %>% filter(is.na(debut_franchise)) %>% arrange(desc(WAR))
```

# Aggregate fWAR by Franchise
```{r}
team.war.df <- war.df %>%
group_by(debut_franchise) %>%
summarise(WAR = sum(WAR, na.rm = TRUE), players = n()) %>%
arrange(WAR) %>%
tail(31) %>%
mutate(
`WAR/player` = WAR / players,
logo = ifelse(
debut_franchise != "LOU",
paste("<img src=\"https://cdn.ssref.net/req/202310031/tlogo/br/",
debut_franchise, ".png\" height=\"25\"/>", sep = ""),
"<img src=\"https://upload.wikimedia.org/wikipedia/en/4/4a/LouisvilleColonelsLogo.PNG\" height=\"25\"/>"
)
)
team.war.df$logo <- factor(team.war.df$logo, levels = team.war.df$logo)
team.war.df %>% head(10)
```

# Graph
```{r}
team.war.sum.plot <- ggplot(team.war.df, aes(x = reorder(logo, WAR), y = WAR)) +
geom_col(fill = "#69b3a2") +
geom_text(aes(label = round(WAR, digits = 0)), color = "white", size = 6,
hjust = 1.1) +
coord_flip() +
scale_x_discrete(name = NULL, labels = team.war.df$logo) +
ggtitle("Career WAR by Players who Debuted with Each Franchise (1871-2022)") +
theme(plot.title = element_text(size = 22),
axis.text.x = element_text(size = 14),
axis.title.x = element_text(size = 16, face = "bold"),
axis.text.y = element_markdown())
ggsave("~/Desktop/peteb206.github.io/tweets/1/team_war_sum.png",
plot = team.war.sum.plot, width = 15, height = 12)
team.war.sum.plot
```
635 changes: 635 additions & 0 deletions tweets/1711818907449766233/Career-WAR-by-Debut-Team.html

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: Document
title:
username:
account: rpubs
server: rpubs.com
hostUrl: rpubs.com
appId: https://api.rpubs.com/api/v1/document/1096686/2873aea40f2441ca9f415a694b728be4
bundleId: https://api.rpubs.com/api/v1/document/1096686/2873aea40f2441ca9f415a694b728be4
url: http://rpubs.com/publish/claim/1096686/cbd69df8f04047b28f18f6bb79887a1f
when: 1696963013.60974
lastSyncTime: 1696963013.60975
Binary file added tweets/1711818907449766233/team_war_sum.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 2a9dbe3

Please sign in to comment.