Skip to content

Commit

Permalink
option date.zero for *.GenerateData methods
Browse files Browse the repository at this point in the history
  • Loading branch information
mplatzer authored Oct 5, 2016
1 parent be53adc commit 6675172
Show file tree
Hide file tree
Showing 17 changed files with 59 additions and 28 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: BTYDplus
Type: Package
Title: Probabilistic Models for Assessing and Predicting your Customer Base
Version: 0.11.1
Version: 0.11.2
Authors@R: person("Michael", "Platzer", email = "michael.platzer@gmail.com", role = c("aut", "cre"))
Description: Provides advanced statistical methods to describe and predict customers'
purchase behavior in a non-contractual setting. It uses historic transaction records to fit a
Expand Down
3 changes: 3 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
0.11.2
- add option `date.zero` to specify start date for cohort in `*.GenerateData` methods

0.11.1
- hot fix for broken `elog2cbs` in `0.11.0` release

Expand Down
15 changes: 8 additions & 7 deletions R/mbg-cnbd-k.R
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,7 @@ xbgcnbd.PlotTrackingInc <- function(params, T.cal, T.tot, actual.inc.tracking.da
#' @param T.star Length of holdout period. This may be a vector.
#' @param params A vector with model parameters \code{k}, \code{r},
#' \code{alpha}, \code{a} and \code{b}, in that order.
#' @param date.zero Initial date for cohort start. Can be of class character, Date or POSIXt.
#' @return List of length 2:
#' \item{\code{cbs}}{A data.frame with a row for each customer and the summary statistic as columns.}
#' \item{\code{elog}}{A data.frame with a row for each transaction, and columns \code{cust}, \code{date} and \code{t}.}
Expand All @@ -830,19 +831,19 @@ xbgcnbd.PlotTrackingInc <- function(params, T.cal, T.tot, actual.inc.tracking.da
#'
#' # event log - one row per event/transaction
#' head(data$elog)
mbgcnbd.GenerateData <- function(n, T.cal, T.star = NULL, params) {
xbgcnbd.GenerateData(n, T.cal, T.star, params, dropout_at_zero = TRUE)
mbgcnbd.GenerateData <- function(n, T.cal, T.star = NULL, params, date.zero = "2000-01-01") {
xbgcnbd.GenerateData(n, T.cal, T.star, params, date.zero, dropout_at_zero = TRUE)
}

#' @rdname mbgcnbd.GenerateData
#' @export
bgcnbd.GenerateData <- function(n, T.cal, T.star = NULL, params) {
xbgcnbd.GenerateData(n, T.cal, T.star, params, dropout_at_zero = FALSE)
bgcnbd.GenerateData <- function(n, T.cal, T.star = NULL, params, date.zero = "2000-01-01") {
xbgcnbd.GenerateData(n, T.cal, T.star, params, date.zero, dropout_at_zero = FALSE)
}

#' @keywords internal
xbgcnbd.GenerateData <- function(n, T.cal, T.star = NULL, params, dropout_at_zero = NULL) {
stopifnot(!is.null(dropout_at_zero))
xbgcnbd.GenerateData <- function(n, T.cal, T.star = NULL, params, date.zero = "2000-01-01", dropout_at_zero = NULL) {
stopifnot(is.logical(dropout_at_zero))
dc.check.model.params.safe(c("k", "r", "alpha", "a", "b"), params, "xbgcnbd.GenerateData")
if (params[1] != floor(params[1]) | params[1] < 1)
stop("k must be integer being greater or equal to 1.")
Expand All @@ -857,7 +858,7 @@ xbgcnbd.GenerateData <- function(n, T.cal, T.star = NULL, params, dropout_at_zer
T.cal.fix <- max(T.cal)
T.cal <- rep(T.cal, length.out = n)
T.zero <- T.cal.fix - T.cal
date.zero <- as.POSIXct("2000-01-01 00:00:00 CEST")
date.zero <- as.POSIXct(date.zero)

# sample intertransaction timings parameter lambda for each customer
lambdas <- rgamma(n, shape = r, rate = alpha)
Expand Down
5 changes: 3 additions & 2 deletions R/nbd.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ nbd.ConditionalExpectedTransactions <- function(params, T.star, x, T.cal) {
#' @param T.cal Length of calibration period.
#' @param T.star Length of holdout period. This may be a vector.
#' @param params NBD parameters - a vector with \code{r} and \code{alpha} in that order.
#' @param date.zero Initial date for cohort start. Can be of class character, Date or POSIXt.
#' @return List of length 2:
#' \item{\code{cbs}}{A data.frame with a row for each customer and the summary statistic as columns.}
#' \item{\code{elog}}{A data.frame with a row for each transaction, and columns \code{cust}, \code{date} and \code{t}.}
Expand All @@ -153,15 +154,15 @@ nbd.ConditionalExpectedTransactions <- function(params, T.star, x, T.cal) {
#' data <- nbd.GenerateData(n, T.cal, T.star, params)
#' cbs <- data$cbs # customer by sufficient summary statistic - one row per customer
#' elog <- data$elog # Event log - one row per event/purchase
nbd.GenerateData <- function(n, T.cal, T.star, params) {
nbd.GenerateData <- function(n, T.cal, T.star, params, date.zero = "2000-01-01") {
# check model parameters
dc.check.model.params.safe(c("r", "alpha"), params, "nbd.GenerateData")

# set start date for each customer, so that they share same T.cal date
T.cal.fix <- max(T.cal)
T.cal <- rep(T.cal, length.out = n)
T.zero <- T.cal.fix - T.cal
date.zero <- as.POSIXct("2000-01-01 00:00:00 CEST")
date.zero <- as.POSIXct(date.zero)

r <- params[1]
alpha <- params[2]
Expand Down
5 changes: 3 additions & 2 deletions R/pareto-ggg-mcmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ pggg.plotRegularityRateHeterogeneity <- function(draws, xmax = NULL, fn = NULL,
#' @param T.star Length of holdout period. This may be a vector.
#' @param params A list of model parameters \code{r},
#' \code{alpha}, \code{s}, \code{beta}, \code{t} and \code{gamma}.
#' @param date.zero Initial date for cohort start. Can be of class character, Date or POSIXt.
#' @return List of length 2:
#' \item{\code{cbs}}{A data.frame with a row for each customer and the summary statistic as columns.}
#' \item{\code{elog}}{A data.frame with a row for each transaction, and columns \code{cust}, \code{date} and \code{t}.}
Expand All @@ -286,13 +287,13 @@ pggg.plotRegularityRateHeterogeneity <- function(draws, xmax = NULL, fn = NULL,
#' data <- pggg.GenerateData(n = 1000, T.cal = 32, T.star = 32, params)
#' cbs <- data$cbs # customer by sufficient summary statistic - one row per customer
#' elog <- data$elog # Event log - one row per event/purchase
pggg.GenerateData <- function(n, T.cal, T.star, params) {
pggg.GenerateData <- function(n, T.cal, T.star, params, date.zero = "2000-01-01") {

# set start date for each customer, so that they share same T.cal date
T.cal.fix <- max(T.cal)
T.cal <- rep(T.cal, length.out = n)
T.zero <- T.cal.fix - T.cal
date.zero <- as.POSIXct("2000-01-01 00:00:00 CEST")
date.zero <- as.POSIXct(date.zero)

# sample regularity parameter k for each customer
if (all(c("t", "gamma") %in% names(params))) {
Expand Down
5 changes: 3 additions & 2 deletions R/pareto-nbd-abe.R
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ abe.mcmc.DrawParameters <- function(cal.cbs, covariates = c(), mcmc = 2500, burn
#' \eqn{max(T.cal)-T.cal}.
#' @param T.star Length of holdout period. This may be a vector.
#' @param params A list of model parameters: \code{beta} and \code{gamma}.
#' @param date.zero Initial date for cohort start. Can be of class character, Date or POSIXt.
#' @return List of length 2:
#' \item{\code{cbs}}{A data.frame with a row for each customer and the summary statistic as columns.}
#' \item{\code{elog}}{A data.frame with a row for each transaction, and columns \code{cust}, \code{date} and \code{t}.}
Expand All @@ -272,13 +273,13 @@ abe.mcmc.DrawParameters <- function(cal.cbs, covariates = c(), mcmc = 2500, burn
#' data <- abe.GenerateData(n = 2000, T.cal = 32, T.star = 32, params)
#' cbs <- data$cbs # customer by sufficient summary statistic - one row per customer
#' elog <- data$elog # Event log - one row per event/purchase
abe.GenerateData <- function(n, T.cal, T.star, params) {
abe.GenerateData <- function(n, T.cal, T.star, params, date.zero = "2000-01-01") {

# set start date for each customer, so that they share same T.cal date
T.cal.fix <- max(T.cal)
T.cal <- rep(T.cal, length.out = n)
T.zero <- T.cal.fix - T.cal
date.zero <- as.POSIXct("2000-01-01 00:00:00 CEST")
date.zero <- as.POSIXct(date.zero)

if (!is.matrix(params$beta))
params$beta <- matrix(params$beta, nrow = 1, ncol = 2)
Expand Down
5 changes: 3 additions & 2 deletions R/pareto-nbd-mcmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ pnbd.mcmc.DrawParameters <- function(cal.cbs, mcmc = 2500, burnin = 500, thin =
#' @param T.star Length of holdout period. This may be a vector.
#' @param params A list of model parameters \code{r},
#' \code{alpha}, \code{s}, \code{beta}.
#' @param date.zero Initial date for cohort start. Can be of class character, Date or POSIXt.
#' @return List of length 2:
#' \item{\code{cbs}}{A data.frame with a row for each customer and the summary statistic as columns.}
#' \item{\code{elog}}{A data.frame with a row for each transaction, and columns \code{cust}, \code{date} and \code{t}.}
Expand All @@ -269,7 +270,7 @@ pnbd.mcmc.DrawParameters <- function(cal.cbs, mcmc = 2500, burnin = 500, thin =
#' data <- pnbd.GenerateData(n = 1000, T.cal = 32, T.star = 32, params)
#' cbs <- data$cbs # customer by sufficient summary statistic - one row per customer
#' elog <- data$elog # Event log - one row per event/purchase
pnbd.GenerateData <- function(n, T.cal, T.star, params) {
pnbd.GenerateData <- function(n, T.cal, T.star, params, date.zero = "2000-01-01") {
params$k <- 1
pggg.GenerateData(n, T.cal, T.star, params)
pggg.GenerateData(n, T.cal, T.star, params, date.zero)
}
4 changes: 3 additions & 1 deletion man/abe.GenerateData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions man/mbgcnbd.GenerateData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/nbd.GenerateData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/pggg.GenerateData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/pnbd.GenerateData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion tests/testthat/test-bg-cnbd-k.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,17 @@ test_that("BG/CNBD-k", {
set.seed(1)
params <- c(1, 0.85, 1.45, 0.79, 2.42)
n <- 2000
date.zero <- "2010-01-01"
sim <- bgcnbd.GenerateData(n,
round(runif(n, 36, 96) / 12) * 12,
36,
params)
params,
date.zero)
cbs <- sim$cbs
elog <- sim$elog
expect_is(elog$date, "POSIXct")
expect_is(cbs$first, "POSIXct")
expect_equal(min(sim$elog$date), as.POSIXct(date.zero))

params.est.btyd <- BTYD::bgnbd.EstimateParameters(cbs)
params.est.btyd_plus <- bgcnbd.EstimateParameters(cbs, k = 1)[-1]
Expand Down
3 changes: 2 additions & 1 deletion tests/testthat/test-mbg-cnbd-k.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ test_that("MBG/CNBD-k", {
sim <- mbgcnbd.GenerateData(n,
round(runif(n, 36, 96) / 12) * 12,
36,
params)
params,
"2010-01-01")
cbs <- sim$cbs
elog <- sim$elog

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-nbd.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ test_that("NBD", {
# generate artificial NBD data
set.seed(1)
params <- c(r = 0.85, alpha = 4.45)
expect_silent(nbd.GenerateData(100, 32, c(16, 32), params))
expect_silent(nbd.GenerateData(100, 32, c(16, 32), params, "2010-01-01"))
cbs <- nbd.GenerateData(5000, 32, 32, params)$cbs

# estimate parameters, and compare to true parameters
Expand Down
3 changes: 2 additions & 1 deletion tests/testthat/test-pareto-nbd-abe.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ test_that("Pareto/NBD (Abe) MCMC", {
sim <- abe.GenerateData(n,
round(runif(n, 36, 96) / 12) * 12,
36,
params)
params,
"2010-01-01")
cbs <- sim$cbs

# test basic parameter estimation
Expand Down
8 changes: 6 additions & 2 deletions tests/testthat/test-pareto-nbd-mcmc.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@ test_that("Pareto/NBD MCMC", {
n <- 5
T.cal <- c(26, 26, 28.5, 52, 52)
T.star <- 52
sim <- pggg.GenerateData(5, T.cal, T.star, params)
date.zero <- "2010-01-01"
sim <- pggg.GenerateData(5, T.cal, T.star, params, date.zero)
expect_equal(names(sim), c("cbs", "elog"))
expect_is(sim$cbs, "data.frame")
expect_is(sim$elog, "data.frame")
expect_equal(names(sim$elog), c("cust", "t", "date"))
expect_equal(nrow(sim$cbs), n)
expect_equal(uniqueN(sim$elog$cust), n)
expect_is(sim$elog$date, "POSIXct")
expect_equal(min(sim$elog$date), as.POSIXct(date.zero))
expect_equal(min(sim$cbs$first), as.POSIXct(date.zero))
simElog <- sim$elog
simCBS <- sim$cbs
# recreate CBS via `elog2cbs`
Expand All @@ -24,7 +28,7 @@ test_that("Pareto/NBD MCMC", {
T.tot = date.zero + max(T.cal + T.star) * 3600 * 24 * 7)
expect_equal(simCBSc, subset(simCBS, select = names(simCBSc)))
# multiple T.star's
sim <- pggg.GenerateData(100, 52, c(26, 104), params)
sim <- pggg.GenerateData(100, 52, c(26, 104), params, date.zero = as.Date("2010-01-01"))
expect_true(all(c("x.star26", "x.star104") %in% names(sim$cbs)))
expect_true(all(sim$cbs$x.star104 >= sim$cbs$x.star26))
expect_true(any(sim$cbs$x.star104 > sim$cbs$x.star26))
Expand Down

0 comments on commit 6675172

Please sign in to comment.