-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bandits #35
base: master
Are you sure you want to change the base?
Bandits #35
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -691,3 +691,247 @@ SecretarySampler <- R6::R6Class("SecretarySampler", public = list( | |
} | ||
) | ||
) | ||
#' Create an abstract streamer for the multi-armed bandit problem. | ||
#' | ||
#' @description \code{MultiArmedBandit} | ||
#' | ||
#' @docType class | ||
#' | ||
#' | ||
#' @export | ||
#' @format An \code{\link{R6Class}} generator object | ||
MultiArmedBandit <- R6::R6Class( | ||
"MultiArmedBandit", | ||
public = list( | ||
#' @description Creates a new \code{MultiArmedBandit} streamer object. | ||
#' | ||
#' @param K the number of arms | ||
#' @param T number of rounds / draws | ||
#' | ||
#' @return The new \code{MultiArmedBandit} (invisibly) | ||
initialize = function(K, T, N = NULL) { | ||
private$K <- K | ||
private$T <- T | ||
arm_info <- list() | ||
for (k in seq(K)) { | ||
arm_info[[k]] <- list(mean = CMA$new()) | ||
} | ||
private$arm_info <- arm_info | ||
invisible(self) | ||
}, | ||
#' @description Update the \code{MultiArmedBandit} streamer object. | ||
#' | ||
#' @param x a single draw from an arm | ||
#' @param k index of the chosen arm | ||
#' | ||
#' | ||
#' @return The updated code{MultiArmedBandit} (invisibly) | ||
update = function(x, k) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like this suggestion. |
||
if (length(x) > 1) { | ||
stop("Only single-value updated allowed") | ||
} | ||
private$arm_info[[k]]$mean$update(x) | ||
private$n_observed <- private$n_observed + 1 | ||
} | ||
), | ||
active = list( | ||
#' @description Returns the the list containing values known to | ||
#' to the Bandit in the current state. | ||
#' | ||
#' | ||
#' @return list with summary of the state of the Bandit. | ||
value = function() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we said that |
||
private$get_state() | ||
} | ||
), | ||
private = list( | ||
K = NULL, | ||
T = NULL, | ||
N = NULL, | ||
which = NULL, | ||
n_observed = 0, | ||
state = NULL, | ||
arm_info = NULL, | ||
#' @description Creates and returns a list with information about | ||
#' the values known to the Bandit in the current state. | ||
#' | ||
#' @return list with summary of the state of the Bandit. | ||
get_state = function() { | ||
arm_means <- c() | ||
for (k in seq(K)) { | ||
arm_means[k] <- private$arm_info[[k]]$mean$value | ||
} | ||
state <- list( | ||
which = private$which, | ||
state = private$state, | ||
n_observed = private$n_observed, | ||
arm_means = arm_means | ||
) | ||
state | ||
} | ||
) | ||
) | ||
#' Create a streamer for the uniform exploration strategy in the multi-armed | ||
#' bandit problem. | ||
#' | ||
#' @description \code{ExploreFristNBandit} | ||
#' | ||
#' @docType class | ||
#' | ||
#' @examples | ||
#' means <- c(1, 2) | ||
#' N <- 3 | ||
#' T <- 10 | ||
#' K <- length(means) | ||
#' Bandit <- ExploreFristNBandit$new(K, T, N) | ||
#' for (t in seq(T)) { | ||
#' # get which arm to choose | ||
#' k <- Bandit$value$which | ||
#' # update the streamer with a value drawn from the kth arm | ||
#' Bandit$update(rnorm(1, mean = means[k]), k) | ||
#' } | ||
#' | ||
#' @export | ||
#' @format An \code{\link{R6Class}} generator object | ||
ExploreFristNBandit <- R6::R6Class( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo + this is typically called epsilon-first There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, I relied on the Introduction to Multi-Armed Bandits by Aleksandrs Slivkin, where they call it an Explore-First algorithm with a parameter N. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, perhaps just |
||
"ExploreFirstNBandit", | ||
inherit = MultiArmedBandit, | ||
public = list( | ||
#' @description Creates a new \code{ExploreFirstNBandit} streamer | ||
#' object. | ||
#' | ||
#' @param K the number of arms | ||
#' @param T number of rounds / draws | ||
#' @param N number of draws in the exploration phase, if not specified | ||
#' the optimal value for N = (T/K) ^ (2/3) * log(T) ^ (1/3) is chosen. | ||
#' | ||
#' @examples | ||
#' Bandit <- ExploreFristNBandit$new(3, 20, 2) | ||
#' | ||
#' @return The new \code{ExploreFristNBandit} (invisibly) | ||
initialize = function(K, T, N = NULL) { | ||
super$initialize(K, T) | ||
private$state <- "exploration" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be a Boolean? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So instead of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exactly. Less potential for typos. |
||
if (is.null(N)) { | ||
N <- floor((T / K) ^ (2 / 3) * log(T) ^ (1 / 3)) | ||
} | ||
if (T < N * K) { | ||
stop(sprintf( | ||
"More draws are required for the values of | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this error could be clearer. Something like "More exploration draws are specified than the total number of rounds" |
||
N=%s and K=%s", | ||
N, | ||
K | ||
)) | ||
} | ||
private$N <- N | ||
private$which <- 1 | ||
invisible(self) | ||
}, | ||
#' @description Update the \code{ExploreFristNBandit} streamer object. | ||
#' | ||
#' @param x a single draw from an arm | ||
#' @param k index of the chosen arm | ||
#' | ||
#' @examples | ||
#' Bandit$update(rnorm(1), 1) | ||
#' | ||
#' @return The updated code{ExploreFristNBandit} (invisibly) | ||
update = function(x, k) { | ||
super$update(x, k) | ||
if (private$n_observed < K * N) { | ||
private$which <- ceiling(private$n_observed / private$N) | ||
} else if (private$n_observed == K * N) { | ||
private$state <- "exploitation" | ||
private$which <- | ||
which.max(private$get_state()$arm_means) | ||
} | ||
} | ||
), | ||
private = list(N = NULL) | ||
) | ||
|
||
#' Create a streamer for the epsilon-greedy strategy in the multi-armed | ||
#' bandit problem. | ||
#' | ||
#' @description \code{EpsilonGreedyBandit} | ||
#' | ||
#' @docType class | ||
#' | ||
#' @examples | ||
#' means <- c(1, 2, 5) | ||
#' T <- 10 | ||
#' K <- length(means) | ||
#' Bandit <- EpsilonGreedyBandit$new(K, T) | ||
#' for (t in seq(T)) { | ||
#' # get which arm to choose | ||
#' k <- Bandit$value$which | ||
#' # update the streamer with a value drawn from the kth arm | ||
#' x <- rnorm(1, mean = means[k]) | ||
#' Bandit$update(x, k) | ||
#' } | ||
#' | ||
#' @export | ||
#' @format An \code{\link{R6Class}} generator object | ||
EpsilonGreedyBandit <- R6::R6Class( | ||
"EpsilonGreedyBandit", | ||
inherit = MultiArmedBandit, | ||
public = list( | ||
#' @description Creates a new \code{EpsilonGreedyBandit} streamer | ||
#' object. | ||
#' | ||
#' @param K the number of arms | ||
#' @param T number of rounds / draws | ||
#' @param epsilon vector of length T specifying the exploration | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typically, epsilon greedy has a scalar epsilon held constant over the game (Sutton, R. S. & Barto, A. G. 1998 Reinforcement learning). I think something this general doesn't really need to exist. The closest thing is epsilon decreasing where epsilons form a geometric series. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I relied on this: https://arxiv.org/pdf/1904.07272.pdf, see Algorithm 1.2, epsilon can change in time. Perhaps we can leave it as it is and just add the possibility for epsilon to be a constant value. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair enough about the source but for the purpose of online algorithms, specifying the whole epsilon doesn't make much sense. I think sticking with a fixed epsilon of a geometric series is more in-line with the packages vision. Related to this, does it make sense that these games have finite time horizons? Shouldn't the point be that you can steam indefinitely? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree + was also not convinced by the fine time T appearing in those algorithms. I will adjust them for infinite sampling. |
||
#' probabilities at time t. If NULL, an optiam value of | ||
#' epsilon(t) = t^(-1/3) * (K * log(t))^(1 / 3) is used. | ||
#' | ||
#' @examples | ||
#' Bandit <- EpsilonGreedyBandit$new(3, 20, rep(0.1, 20)) | ||
#' | ||
#' @return The new \code{EpsilonGreedyBandit} (invisibly) | ||
initialize = function(K, T, epsilon = NULL) | ||
{ | ||
super$initialize(K, T) | ||
if (is.null(epsilon)) | ||
{ | ||
t <- seq(T) | ||
epsilon <- | ||
t ^ (-1 / 3) * (K * logb(t, base = T)) ^ (1 / 3) | ||
epsilon[1] <- 1 | ||
} | ||
if (length(epsilon) != T) { | ||
stop( | ||
"The length of probability vector epsilon, must agree | ||
with the total number of draws T." | ||
) | ||
} | ||
if (any(epsilon > 1) || any(epsilon < 0)) { | ||
stop("All values in epsilon must be between 0 and 1.") | ||
} | ||
private$state <- "exploration" | ||
private$which <- floor(runif(1, 1, K + 1)) | ||
private$epsilon <- epsilon | ||
invisible(self) | ||
}, | ||
#' @description Update the \code{EpsilonGreedyBandit} streamer object. | ||
#' | ||
#' @param x a single draw from an arm | ||
#' @param k index of the chosen arm | ||
#' | ||
#' @examples | ||
#' Bandit$update(rnorm(1), 1) | ||
#' | ||
#' @return The updated code{EpsilonGreedyBandit} (invisibly) | ||
update = function(x, k) { | ||
super$update(x, k) | ||
if (runif(1) < private$epsilon[private$n_observed]) { | ||
private$state <- "exploration" | ||
k <- floor(runif(1, 1, K + 1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
} else { | ||
private$state <- "exploitation" | ||
k <- which.max(private$get_state()$arm_means) | ||
} | ||
private$which <- k | ||
} | ||
), private = list(epsilon = NULL) | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make more sense to have the
initialize
method raise an error saying that it's abstract and then use a private method to run the code currently ininitialize
that gets called by the child?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense.