You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
library(tidyverse)
library(tidymodels)
# inference example -----mtcarsbootstrap_samples<- rep_sample_n(mtcars, size=10, reps=1000, replace=TRUE)
bootstrap_sample_estimates<-bootstrap_samples %>%
group_by(replicate) %>% # technically the data is already grouped, but i'm putting this here to be extra explicit
summarize(avg_mpg= mean(mpg, na.rm=TRUE))
# bootstrap distribution
ggplot(bootstrap_sample_estimates, aes(x=avg_mpg)) + geom_histogram()
bootstrap_sample_estimates %>%
pull(avg_mpg) %>%
mean()
Linear regression
library(GGally)
ggpairs(mtcars)
# linear regression example -----## very similar line of thinking with the classification models## https://github.com/UBC-DSCI/dsci-100-student/issues/33mt_cars<-mtcars %>%
mutate(am=factor(am)) # why do you need this?car_split<- initial_split(mt_cars , prop=0.75, strata=am)
car_train<- training(car_split)
car_test<- testing(car_split)
lm_spec<- linear_reg() %>%
set_engine("lm") %>%
set_mode("regression")
car_recipe<- recipe(mpg~am+hp+wt, data=car_train)
# do we __have to__ center scale?lm_fit<- workflow() %>%
add_recipe(car_recipe) %>%
add_model(lm_spec) %>%
fit(data=car_train)
lm_fit## evaluate the linear model on test datalm_test_results<-lm_fit %>%
predict(car_test) %>%
bind_cols(car_test) %>%
metrics(truth=mpg, estimate=.pred)
lm_test_results## where are all the places the code can go wrong?## training on the wrong dataset, testing the wrong dataset
Inference
Linear regression
Clustering
The text was updated successfully, but these errors were encountered: