Francisco Yira Albornoz November 19th 2018
- 21.2 For loops
- 21.3 For loop variations
- 21.4 For loops vs. functionals
- 21.5 The map functions
- 21.9 Other patterns of for loops
-
Write for loops to:
-
Compute the median of every column in
mtcars
median_mtcars <- vector("double", length(mtcars))
for (i in seq_along(mtcars)) {
median_mtcars[[i]] <- median(mtcars[[i]])
}
median_mtcars
## [1] 19.200 6.000 196.300 123.000 3.695 3.325 17.710 0.000 0.000
## [10] 4.000 2.000
- Determine the type of each column in
nycflights13::flights
typecol_flights <- vector("character", length(nycflights13::flights))
for (i in seq_along(nycflights13::flights)) {
typecol_flights[[i]] <- typeof(nycflights13::flights[[i]])
}
typecol_flights
## [1] "integer" "integer" "integer" "integer" "integer" "double"
## [7] "integer" "integer" "double" "character" "integer" "character"
## [13] "character" "character" "double" "double" "double" "double"
## [19] "double"
- Compute the number of unique values in each column of
iris
n_unique_val_iris <- vector("integer", length(iris))
for (i in seq_along(iris)) {
unique_val_iris <- unique(iris[[i]])
n_unique_val_iris[[i]] <- length(unique_val_iris)
}
n_unique_val_iris
## [1] 35 23 43 22 3
- Generate 10 random normals for each of μ = − 10, 0, 10, and 100
means <- c(-10, 0, 10, 100)
random_normals <- vector("list", length(means))
for (i in seq_along(means)) {
random_normals[[i]] <- rnorm(10, mean = means[[i]])
}
random_normals
## [[1]]
## [1] -9.976628 -10.280885 -9.877374 -9.566374 -8.989570 -9.878482
## [7] -10.016595 -9.110575 -11.950144 -8.749682
##
## [[2]]
## [1] 0.4931259 1.6412130 -1.0545904 -0.2589900 -0.3901266 0.1553609
## [7] 0.9281104 0.9369941 -0.5295036 1.0132690
##
## [[3]]
## [1] 9.031743 10.041307 9.406935 10.410370 9.326198 9.751394 10.509925
## [8] 9.818359 8.827326 10.280023
##
## [[4]]
## [1] 100.34042 101.72786 100.85984 99.54684 100.41539 100.10072 100.73846
## [8] 100.81508 100.63034 101.06028
- Eliminate the for loop in each of the following examples by taking advantage of an existing function that works with vectors
out <- ""
for (x in letters) {
out <- stringr::str_c(out, x)
}
out
## [1] "abcdefghijklmnopqrstuvwxyz"
Alternative without for loop:
stringr::str_c(letters, collapse = "")
## [1] "abcdefghijklmnopqrstuvwxyz"
x <- sample(100)
sd <- 0
for (i in seq_along(x)) {
sd <- sd + (x[i] - mean(x)) ^ 2
}
sd <- sqrt(sd / (length(x) - 1))
Alternative without for loop:
sd(x)
## [1] 29.01149
x <- runif(100)
out <- vector("numeric", length(x))
out[1] <- x[1]
for (i in 2:length(x)) {
out[i] <- out[i - 1] + x[i]
}
out
cumsum(x)
-
Combine your function writing and for loop skills:
-
Write a for loop that
prints()
the lyrics to the children’s song “Alice the camel”.
names_song <- c("Alice", "Ruby", "Sally", "Felix", "Lilly", "Andy", "Larry", "Sammy")
animals_song <- c("Camel", "Rabbit", "Sloth", "Fox", "Ladybug", "Ant", "Lizard", "Spider")
numbers_song <- c("one", "two", "two", "four", "five", "six", "seven", "eight")
what_they_have <- c("hump", "ears", "toes", "legs", "spots", "legs", "stripes", "legs")
for (i in seq_along(names_song)) {
for (j in 1:4) {
writeLines(
stringr::str_c(
names_song[[i]],
" the ",
animals_song[[i]],
" has ",
numbers_song[[i]],
" ",
what_they_have[[i]],
"."
)
)
}
writeLines(stringr::str_c("Go ", names_song[[i]], " go!\n"))
}
## Alice the Camel has one hump.
## Alice the Camel has one hump.
## Alice the Camel has one hump.
## Alice the Camel has one hump.
## Go Alice go!
##
## Ruby the Rabbit has two ears.
## Ruby the Rabbit has two ears.
## Ruby the Rabbit has two ears.
## Ruby the Rabbit has two ears.
## Go Ruby go!
##
## Sally the Sloth has two toes.
## Sally the Sloth has two toes.
## Sally the Sloth has two toes.
## Sally the Sloth has two toes.
## Go Sally go!
##
## Felix the Fox has four legs.
## Felix the Fox has four legs.
## Felix the Fox has four legs.
## Felix the Fox has four legs.
## Go Felix go!
##
## Lilly the Ladybug has five spots.
## Lilly the Ladybug has five spots.
## Lilly the Ladybug has five spots.
## Lilly the Ladybug has five spots.
## Go Lilly go!
##
## Andy the Ant has six legs.
## Andy the Ant has six legs.
## Andy the Ant has six legs.
## Andy the Ant has six legs.
## Go Andy go!
##
## Larry the Lizard has seven stripes.
## Larry the Lizard has seven stripes.
## Larry the Lizard has seven stripes.
## Larry the Lizard has seven stripes.
## Go Larry go!
##
## Sammy the Spider has eight legs.
## Sammy the Spider has eight legs.
## Sammy the Spider has eight legs.
## Sammy the Spider has eight legs.
## Go Sammy go!
- Convert the nursery rhyme “ten in the bed” to a function. Generalise it to any number of people in any sleeping structure.
library(stringr)
library(english)
## Warning: package 'english' was built under R version 4.1.3
people_fell_song <-
function(n_people = 10,
sleeping_structure = "bed") {
# Input validation
if (n_people < 1)
stop("n_people should be greater than zero")
n_people <- as.integer(n_people)
if (is.na(n_people))
stop("n_people should be a valid integer")
for (i in 1:n_people) {
# Turning number of people into a word
people_in <- as.english(n_people + 1 - i)
# Handling special case for first strophe
if (i == 1) {
writeLines("Here we go!")
} else {
writeLines(str_c(str_to_title(people_in), "!"))
}
# Lines that appear in every strophe
writeLines(str_c("There were ", people_in, " in the ", sleeping_structure))
writeLines("and the little one said,")
# Handling special case for last strophe
if (people_in > 1) {
writeLines('"Roll over, roll over."')
writeLines("So they all rolled over and one fell out.\n")
} else {
writeLines('"Good night!"')
}
}
}
people_fell_song(5, "sofa")
## Here we go!
## There were five in the sofa
## and the little one said,
## "Roll over, roll over."
## So they all rolled over and one fell out.
##
## Four!
## There were four in the sofa
## and the little one said,
## "Roll over, roll over."
## So they all rolled over and one fell out.
##
## Three!
## There were three in the sofa
## and the little one said,
## "Roll over, roll over."
## So they all rolled over and one fell out.
##
## Two!
## There were two in the sofa
## and the little one said,
## "Roll over, roll over."
## So they all rolled over and one fell out.
##
## One!
## There were one in the sofa
## and the little one said,
## "Good night!"
- Convert the song “99 bottles of beer on the wall” to a function. Generalise to any number of any vessel containing any liquid on any surface.
library(stringr)
song_bottles_beer <- function(n_vessels = 99, vessel = "bottles", liquid = "beer", surface = "wall") {
# Input validation
if (n_vessels < 1)
stop("n_vessels should be greater than zero")
n_vessels <- as.integer(n_vessels)
if (is.na(n_vessels))
stop("n_vessels should be a valid integer")
for (i in 1:(n_vessels + 1)) {
# Turning number of bottles into a word
vessels_left <- (n_vessels + 1 - i)
if (vessels_left >= 1) {
writeLines(str_c(vessels_left, " ", vessel, " of ", liquid, " on the ", surface, ", ", vessels_left, " ", vessel, " of ", liquid, "."))
}
if (vessels_left >= 2) {
writeLines(str_c("Take one down and pass it around, ", (vessels_left - 1), " ", vessel, " of ", liquid, " on the ", surface, ".\n"))
}
if (vessels_left == 1) {
writeLines(str_c("Take one down and pass it around, no more ", vessel," of ", liquid," on the ", surface,".\n"))
}
if (vessels_left == 0) {
writeLines(str_c("No more ", vessel," of ", liquid," on the ", surface,", no more ", vessel, " of ", liquid, "."))
writeLines(str_c("Go to the store and buy some more, ", n_vessels," ", vessel," of ", liquid," on the ", surface,"."))
}
}
}
song_bottles_beer(5, vessel = "glasses", liquid = "water", surface = "table")
## 5 glasses of water on the table, 5 glasses of water.
## Take one down and pass it around, 4 glasses of water on the table.
##
## 4 glasses of water on the table, 4 glasses of water.
## Take one down and pass it around, 3 glasses of water on the table.
##
## 3 glasses of water on the table, 3 glasses of water.
## Take one down and pass it around, 2 glasses of water on the table.
##
## 2 glasses of water on the table, 2 glasses of water.
## Take one down and pass it around, 1 glasses of water on the table.
##
## 1 glasses of water on the table, 1 glasses of water.
## Take one down and pass it around, no more glasses of water on the table.
##
## No more glasses of water on the table, no more glasses of water.
## Go to the store and buy some more, 5 glasses of water on the table.
- It’s common to see for loops that don’t preallocate the output and instead increase the length of a vector at each step:
output <- vector("integer", 0)
for (i in seq_along(x)) {
output <- c(output, lengths(x[[i]]))
}
output
How does this affect performance? Design and execute an experiment.
library(microbenchmark)
func_wo_preallocate <- function(x) {
output <- vector("integer", 0)
for (i in seq_along(x)) {
output <- c(output, lengths(x[[i]]))
}
output
}
func_with_preallocate <- function(x) {
output <- vector("integer", length(x))
for (i in seq_along(x)) {
output[[i]] <- lengths(x[[i]])
}
output
}
test_vec <- 1:1000
microbenchmark(func_wo_preallocate(test_vec),
func_with_preallocate(test_vec),
times = 1000L)
## # A tibble: 2,000 x 2
## expr time
## <fct> <dbl>
## 1 func_with_preallocate(test_vec) 3376600
## 2 func_with_preallocate(test_vec) 402900
## 3 func_wo_preallocate(test_vec) 3551500
## 4 func_with_preallocate(test_vec) 455200
## 5 func_with_preallocate(test_vec) 491000
## 6 func_with_preallocate(test_vec) 431700
## 7 func_wo_preallocate(test_vec) 1040000
## 8 func_with_preallocate(test_vec) 399100
## 9 func_wo_preallocate(test_vec) 1258400
## 10 func_wo_preallocate(test_vec) 1674400
## # ... with 1,990 more rows
- Imagine you have a directory full of CSV files that you want to read
in. You have their paths in a vector,
files <- dir("data/", pattern = "\\.csv$", full.names = TRUE)
, and now want to read each one withread_csv()
. Write the for loop that will load them into a single data frame.
output <- vector("list", length(files))
for (i in seq_along(files)) {
output[[i]] <- read_csv(files[[i]])
}
my_big_df <- dplyr::bind_rows(output)
- What happens if you use
for (nm in names(x))
andx
has no names? What if only some of the elements are named? What if the names are not unique?
my_vec <- 1:5
names(my_vec) <- c("a", "a", "b", "c")
for (nm in names(my_vec)) {
print(my_vec[[nm]])
}
## [1] 1
## [1] 1
## [1] 3
## [1] 4
## Error in my_vec[[nm]]: subscript out of bounds
If the names are not unique then the subscripting will return the first element with that name each time that the duplicated name appears. If only some of the elements are named, there will be an error.
- Write a function that prints the mean of each numeric column in a
data frame, along with its name. For example,
show_mean(iris)
would print:
show_mean(iris)
#> Sepal.Length: 5.84
#> Sepal.Width: 3.06
#> Petal.Length: 3.76
#> Petal.Width: 1.20
(Extra challenge: what function did I use to make sure that the numbers lined up nicely, even though the variable names had different lengths?)
show_mean <- function(x) {
for (i in seq_along(mtcars)) {
mean_col <- round(mean(mtcars[[i]]), 2)
name_with_colon <- str_c(names(mtcars)[[i]], ":")
name_pad <- str_pad(name_with_colon, width = 6, side = "right")
print(str_c(name_pad, mean_col, sep = " "))
}
}
show_mean(mtcars)
## [1] "mpg: 20.09"
## [1] "cyl: 6.19"
## [1] "disp: 230.72"
## [1] "hp: 146.69"
## [1] "drat: 3.6"
## [1] "wt: 3.22"
## [1] "qsec: 17.85"
## [1] "vs: 0.44"
## [1] "am: 0.41"
## [1] "gear: 3.69"
## [1] "carb: 2.81"
- What does this code do? How does it work?
trans <- list(
disp = function(x) x * 0.0163871,
am = function(x) {
factor(x, labels = c("auto", "manual"))
}
)
for (var in names(trans)) {
mtcars[[var]] <- trans[[var]](mtcars[[var]])
}
trans
is a list where each element is a function, designed to do a
certain transformation over a specific column in mtcars
. Also, each
function is named after the column over which it is supossed to do the
transformation.
The for
cycle selects the corresponding columns in mtcars
using the
functions names, and then replaces them with the modified versions,
which are obtained applying the corresponding from the list.
- Read the documentation for
apply()
. In the 2d case, what two for loops does it generalise?
It generalises two nested for loops iterating over a two dimensions array.
- Adapt
col_summary()
so that it only applies to numeric columns. You might want to start with anis_numeric()
function that returns a logical vector that has a TRUE corresponding to each numeric column.
is_col_numeric <- function(df) {
out <- vector("logical", length(df))
for (i in seq_along(df)) {
out[[i]] <- class(df[[i]]) == "numeric"
}
out
}
col_summary <- function(df, fun) {
subset_df <- df[is_col_numeric(df)]
out <- vector("double", length(subset_df))
for (i in seq_along(subset_df)) {
out[i] <- fun(subset_df[[i]])
}
out
}
col_summary(iris, mean)
## [1] 5.843333 3.057333 3.758000 1.199333
-
Write code that uses one of the map functions to:
-
Compute the mean of every column in
mtcars
.
map_dbl(mtcars, mean)
## mpg cyl disp hp drat wt qsec
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750
## vs am gear carb
## 0.437500 0.406250 3.687500 2.812500
- Determine the type of each column in
nycflights13::flights
map_chr(mtcars, typeof)
## mpg cyl disp hp drat wt qsec vs
## "double" "double" "double" "double" "double" "double" "double" "double"
## am gear carb
## "double" "double" "double"
- Compute the number of unique values in each column of
iris
map_dbl(iris, ~length(unique(.)))
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 35 23 43 22 3
- Generate 10 random normals for each of μ = − 10, 0, 10, and 100
means <- c(-10, 0, 10, 100)
map(means, ~rnorm(10, mean = .))
## [[1]]
## [1] -10.858736 -9.793184 -10.223092 -9.057800 -10.205051 -7.976423
## [7] -10.664724 -9.251421 -7.369755 -9.666718
##
## [[2]]
## [1] -0.30214014 1.26317698 -0.31486994 -1.50665354 -0.06776626 0.45249957
## [7] 0.77038067 -0.90672827 0.48784884 0.82800872
##
## [[3]]
## [1] 11.000797 9.875195 9.202104 7.984341 9.764668 10.294582 9.869245
## [8] 9.224266 9.032104 9.513981
##
## [[4]]
## [1] 99.00828 100.47705 100.54046 98.20056 99.78811 99.44104 97.80572
## [8] 100.26525 100.45618 99.86985
- How can you create a single vector that for each column in a data frame indicates whether or not it’s a factor?
map_lgl(mtcars, is.factor)
## mpg cyl disp hp drat wt qsec vs am gear carb
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
- What happens when you use the map functions on vectors that aren’t
lists? What does
map(1:5, runif)
do? Why?
map(1:5, runif)
## [[1]]
## [1] 0.1748115
##
## [[2]]
## [1] 0.705458761 0.002346674
##
## [[3]]
## [1] 0.4691681 0.8412935 0.2413714
##
## [[4]]
## [1] 0.71314932 0.06280471 0.55132120 0.79630701
##
## [[5]]
## [1] 0.8952573 0.3166994 0.4248266 0.1931561 0.6947861
It iterates over each element of the vector, regardless of whether it is
an atomic vector or a list. In this case, it passes each element of
1:5
to the function runif
as the first argument (n
).
- What does
map(-2:2, rnorm, n = 5)
do? Why? What doesmap_dbl(-2:2, rnorm, n = 5)
do? Why?
map(-2:2, rnorm, n = 5)
## [[1]]
## [1] -1.140069 -3.528580 -2.989716 -1.835792 -2.735785
##
## [[2]]
## [1] -1.14310491 -0.80160919 -0.17416975 -0.07329977 -1.25868572
##
## [[3]]
## [1] 1.2460069 0.8118463 -1.5559002 -1.3585788 -0.7287338
##
## [[4]]
## [1] 1.3607947 0.5690289 0.8809755 1.0549564 2.3954550
##
## [[5]]
## [1] 1.145589 2.537922 1.339379 2.186574 2.697248
It passes each element of -2:2
to rnorm()
as the second argument
(mean
), since the first argument (n
) is specified through ...
.
Therefore, it creates a list of five vectors of length five, generated
from normal distributions with means going from -2
to 2
.
map_dbl(-2:2, rnorm, n = 5)
## Error in `stop_bad_type()`:
## ! Result 1 must be a single double, not a double vector of length 5
This code tries to do the same, but it fails since map_dbl()
tries to
coerce the output to an atomic numeric vector, but that can not be done
directly because each element of the result has length more than 1.
- Rewrite
map(x, function(df) lm(mpg ~ wt, data = df))
to eliminate the anonymous function.
map(x, ~lm(mpg ~ wt, data = .))
- Implement your own version of
every()
using a for loop. Compare it withpurrr::every()
. What does purrr’s version do that your version doesn’t?
x <- list(1:5, letters, list(10))
my_every <- function(x, fun) {
output <- vector("logical", length(x))
for (i in seq_along(x)) {
output[[i]] <- fun(x[[i]])
}
all(output)
}
x %>% my_every(is_vector)
## [1] TRUE
purrr version allows to pass additional arguments to the predicate
function using ...
.
- Create an enhanced
col_summary()
that applies a summary function to every numeric column in a data frame
is_col_numeric <- function(df) {
map_lgl(df, ~class(.) == "numeric")
}
col_summary <- function(df, fun) {
subset_df <- df[is_col_numeric(df)]
map_dbl(subset_df, fun)
}
col_summary(iris, mean)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.843333 3.057333 3.758000 1.199333
- A possible base R equivalent of
col_summary()
is:
col_sum3 <- function(df, f) {
is_num <- sapply(df, is.numeric)
df_num <- df[, is_num]
sapply(df_num, f)
}
But it has a number of bugs as illustrated with the following inputs:
df <- tibble(
x = 1:3,
y = 3:1,
z = c("a", "b", "c")
)
# OK
col_sum3(df, mean)
## x y
## 2 2
# Has problems: don't always return numeric vector
col_sum3(df[1:2], mean)
## x y
## 2 2
col_sum3(df[1], mean)
## x
## 2
col_sum3(df[0], mean)
## Error in `vectbl_as_col_location()`:
## ! Must subset columns with a valid subscript vector.
## x Subscript `is_num` has the wrong type `list`.
## i It must be logical, numeric, or character.
In the case where df
is subsetted by the empty vector numeric(0)
,
sapply(df, is.numeric)
returns a list instead of a logical, causing
the subsequent subsetting operation (df[, is_num
) to fail (since we
can not use a list to do subsetting, only a logical vector).
Also, even if the subsetting operation succeded in this case, the output
wouldn’t be a numeric vector, because sapply(df_num, f)
returns a list
when the result has length zero.
We can avoid both errors by replacing sapply
with map_lgl
and
map_dbl
in each call, respectively.