diff --git a/DESCRIPTION b/DESCRIPTION index 8ab19ff978..48ca3a18f0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -254,6 +254,7 @@ Suggests: sda, sf, smoof, + snow, sparseLDA, stepPlr, survAUC, diff --git a/R/makeLearner.R b/R/makeLearner.R index 67a5dbc1e4..5a55d34c5e 100644 --- a/R/makeLearner.R +++ b/R/makeLearner.R @@ -72,7 +72,7 @@ #' `ntree = se.ntree`, the latter of which controls the number of trees in the #' individual random forests which are bootstrapped. The "noisy bootstrap" is #' executed when `se.ntree < ntree` which is less computationally expensive. A -#' Monte-Carlo bias correction may make the latter option prefarable in many +#' Monte-Carlo bias correction may make the latter option preferable in many #' cases. Defaults are `se.boot = 50` and `se.ntree = 100`. #' #' \item If `se.method = "sd"`, the default, the standard deviation of the @@ -105,20 +105,21 @@ #' the target feature in training data, respectively. #' #' The default method is \dQuote{mean} which corresponds to the ZeroR algorithm -#' from WEKA, see . +#' from WEKA. #' #' @section classif.featureless: #' #' Method \dQuote{majority} predicts always the majority class for each new -#' observation. In the case of ties, one randomly sampled, constant class is predicted -#' for all observations in the test set. -#' This method is used as the default. It is very similar to the ZeroR classifier -#' from WEKA (see ). The only difference is +#' observation. In the case of ties, one randomly sampled, constant class is +#' predicted for all observations in the test set. +#' This method is used as the default. It is very similar to the ZeroR +#' classifier from WEKA. The only difference is #' that ZeroR always predicts the first class of the tied class values instead #' of sampling them randomly. #' -#' Method \dQuote{sample-prior} always samples a random class for each individual test -#' observation according to the prior probabilities observed in the training data. +#' Method \dQuote{sample-prior} always samples a random class for each +#' individual test observation according to the prior probabilities observed in +#' the training data. #' #' If you opt to predict probabilities, the class probabilities always #' correspond to the prior probabilities observed in the training data. diff --git a/R/measures.R b/R/measures.R index 4561817b1b..0408e9ad2b 100644 --- a/R/measures.R +++ b/R/measures.R @@ -1375,7 +1375,7 @@ cindex = makeMeasure( #' @references #' H. Uno et al. #' *On the C-statistics for Evaluating Overall Adequacy of Risk Prediction Procedures with Censored Survival Data* -#' Statistics in medicine. 2011;30(10):1105-1117. . +#' Statistics in medicine. 2011;30(10):1105-1117. \doi{10.1002/sim.4154}. cindex.uno = makeMeasure( id = "cindex.uno", minimize = FALSE, best = 1, worst = 0, properties = c("surv", "req.pred", "req.truth", "req.model", "req.task"), diff --git a/README.md b/README.md index e56c5995fc..940e66563a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # mlr -Package website: [release](https://mlr.mlr-org.com/) | [dev](https://mlr.mlr-org.com/dev) +Package website: [release](https://mlr.mlr-org.com/) | [dev](https://mlr.mlr-org.com/dev/) Machine learning in R. @@ -9,9 +9,9 @@ Machine learning in R. [![tic](https://github.com/mlr-org/mlr/workflows/tic/badge.svg?branch=main)](https://github.com/mlr-org/mlr/actions) [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version-ago/mlr)](https://cran.r-project.org/package=mlr) [![cran checks](https://cranchecks.info/badges/worst/mlr)](https://cran.r-project.org/web/checks/check_results_mlr.html) -[![CRAN Downloads](https://cranlogs.r-pkg.org/badges/mlr)](https://cran.rstudio.com/web/packages/mlr/index.html) +[![CRAN Downloads](https://cranlogs.r-pkg.org/badges/mlr)](https://cran.r-project.org/package=mlr) [![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr-blue.svg)](https://stackoverflow.com/questions/tagged/mlr) -[![lifecycle](https://img.shields.io/badge/lifecycle-retired-orange.svg)](https://www.tidyverse.org/lifecycle/#retired) +[![lifecycle](https://img.shields.io/badge/lifecycle-retired-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![codecov](https://codecov.io/gh/mlr-org/mlr/branch/main/graph/badge.svg)](https://codecov.io/gh/mlr-org/mlr) @@ -50,7 +50,7 @@ remotes::install_github("mlr-org/mlr") ## Citing {mlr} in publications -Please cite our [JMLR paper](http://jmlr.org/papers/v17/15-066.html) [[bibtex](http://www.jmlr.org/papers/v17/15-066.bib)]. +Please cite our [JMLR paper](https://jmlr.org/papers/v17/15-066.html) [[bibtex](https://www.jmlr.org/papers/v17/15-066.bib)]. Some parts of the package were created as part of other publications. If you use these parts, please cite the relevant work appropriately. diff --git a/man/makeLearner.Rd b/man/makeLearner.Rd index ff8af99b95..08cf0832f7 100644 --- a/man/makeLearner.Rd +++ b/man/makeLearner.Rd @@ -114,7 +114,7 @@ of the bootstrap predictions. The "brute force" bootstrap is executed when \code{ntree = se.ntree}, the latter of which controls the number of trees in the individual random forests which are bootstrapped. The "noisy bootstrap" is executed when \code{se.ntree < ntree} which is less computationally expensive. A -Monte-Carlo bias correction may make the latter option prefarable in many +Monte-Carlo bias correction may make the latter option preferable in many cases. Defaults are \code{se.boot = 50} and \code{se.ntree = 100}. \item If \code{se.method = "sd"}, the default, the standard deviation of the @@ -149,22 +149,23 @@ for each new observation which corresponds to the observed mean or median of the target feature in training data, respectively. The default method is \dQuote{mean} which corresponds to the ZeroR algorithm -from WEKA, see \url{https://weka.wikispaces.com/ZeroR}. +from WEKA. } \section{classif.featureless}{ Method \dQuote{majority} predicts always the majority class for each new -observation. In the case of ties, one randomly sampled, constant class is predicted -for all observations in the test set. -This method is used as the default. It is very similar to the ZeroR classifier -from WEKA (see \url{https://weka.wikispaces.com/ZeroR}). The only difference is +observation. In the case of ties, one randomly sampled, constant class is +predicted for all observations in the test set. +This method is used as the default. It is very similar to the ZeroR +classifier from WEKA. The only difference is that ZeroR always predicts the first class of the tied class values instead of sampling them randomly. -Method \dQuote{sample-prior} always samples a random class for each individual test -observation according to the prior probabilities observed in the training data. +Method \dQuote{sample-prior} always samples a random class for each +individual test observation according to the prior probabilities observed in +the training data. If you opt to predict probabilities, the class probabilities always correspond to the prior probabilities observed in the training data. diff --git a/man/measures.Rd b/man/measures.Rd index a26828116e..dbf8ce8413 100644 --- a/man/measures.Rd +++ b/man/measures.Rd @@ -287,7 +287,7 @@ IEEE Transactions on Knowledge and Data Engineering, vol. 21, no. 9. pp. 1263-12 H. Uno et al. \emph{On the C-statistics for Evaluating Overall Adequacy of Risk Prediction Procedures with Censored Survival Data} -Statistics in medicine. 2011;30(10):1105-1117. \url{https://doi.org/10.1002/sim.4154}. +Statistics in medicine. 2011;30(10):1105-1117. \doi{10.1002/sim.4154}. H. Uno et al. \emph{Evaluating Prediction Rules for T-Year Survivors with Censored Regression Models} diff --git a/tests/testthat.R b/tests/testthat.R index 6f07e6e916..7f3b815168 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,2 +1,5 @@ library(testthat) -test_check("mlr") +if (identical(Sys.getenv("NOT_CRAN"), "true")) { + set.seed(getOption("mlr.debug.seed")) + test_check("mlr") +} diff --git a/tests/testthat/test_parallel_mpi.R b/tests/testthat/test_parallel_mpi.R index 6d50f7eb69..3e8e0d1ef8 100644 --- a/tests/testthat/test_parallel_mpi.R +++ b/tests/testthat/test_parallel_mpi.R @@ -2,6 +2,8 @@ test_that("parallel resampling", { skip_on_os("mac") skip_on_ci() + skip_on_cran() + doit = function(mode, level) { lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("CV", iters = 2L) @@ -20,6 +22,8 @@ test_that("parallel resampling", { test_that("parallel tuning", { skip_on_os("mac") skip_on_ci() + skip_on_cran() + doit = function(mode, level) { lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("CV", iters = 2L) @@ -40,6 +44,7 @@ test_that("parallel tuning", { test_that("parallel featsel", { skip_on_os("mac") skip_on_ci() + skip_on_cran() doit = function(mode, level) { lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("CV", iters = 2L) @@ -59,6 +64,8 @@ test_that("parallel featsel", { test_that("parallel exporting of options works", { skip_on_os("mac") skip_on_ci() + skip_on_cran() + doit = function(mode, level) { data = iris @@ -82,6 +89,8 @@ test_that("parallel exporting of options works", { test_that("parallel partial dependence", { skip_on_os("mac") skip_on_ci() + skip_on_cran() + doit = function(mode) { lrn = makeLearner("regr.rpart") fit = train(lrn, regr.task) @@ -98,6 +107,8 @@ test_that("parallel partial dependence", { test_that("parallel ensembles", { skip_on_os("mac") skip_on_ci() + skip_on_cran() + doit = function(mode, level) { on.exit(parallelStop()) diff --git a/tests/testthat/test_surv_measures.R b/tests/testthat/test_surv_measures.R index 0f4f7051f2..356dbd12fc 100644 --- a/tests/testthat/test_surv_measures.R +++ b/tests/testthat/test_surv_measures.R @@ -34,6 +34,8 @@ test_that("survival measures do not do stupid things", { }) test_that("setting measure pars works", { + requirePackagesOrSkip("survAUC", default.method = "load") + mod = train("surv.rpart", wpbc.task) pred = predict(mod, wpbc.task) @@ -58,6 +60,8 @@ test_that("setting measure pars works", { test_that("hand constructed tests", { requirePackagesOrSkip("Hmisc", default.method = "load") + requirePackagesOrSkip("survAUC", default.method = "load") + n = 100 time = sort(rexp(n, 0.1)) + 1 data = data.frame(time = time, status = 1, x1 = order(time)) @@ -76,8 +80,10 @@ test_that("hand constructed tests", { expect_equal(unname(perf), c(1, 1, 0.99)) }) - test_that("ibrier measure works with surv tasks", { + requirePackagesOrSkip("survAUC", default.method = "load") + requirePackagesOrSkip("pec", default.method = "load") + set.seed(getOption("mlr.debug.seed")) rin = makeResampleInstance(makeResampleDesc("CV", iters = 2), task = wpbc.task) lrn = makeLearner("surv.coxph", x = TRUE) diff --git a/tests/testthat/test_tuneParams.R b/tests/testthat/test_tuneParams.R index fb0629d42f..3b33080620 100644 --- a/tests/testthat/test_tuneParams.R +++ b/tests/testthat/test_tuneParams.R @@ -14,6 +14,8 @@ test_that("names for minimize are set correctly", { }) test_that("tuneParams with resample.fun", { + cl = + lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("Holdout") ps = makeParamSet( @@ -63,6 +65,7 @@ test_that("tuneParams with resample.fun", { }) test_that("tuneParams output works as documented", { + cl = lrn = makeLearner("classif.ksvm") rdesc = makeResampleDesc("Holdout") ps = makeParamSet( @@ -96,6 +99,8 @@ test_that("tuneParams output works as documented", { }) test_that("tuneParams output works as documented", { + cl = + lrn = makeLearner("classif.ksvm") rdesc = makeResampleDesc("Holdout") ps = makeParamSet( @@ -130,6 +135,8 @@ test_that("tuneParams output works as documented", { test_that("tuning with a fixed ensemble methods and varying base methods works", { + cl = + # TODO: make it possible to choose arbitrary number of base.methods -> cannot # tune an argument of a param. We need to make makeDiscreteVectorParam more # flexible to allow more than one ensemble.method @@ -161,6 +168,8 @@ test_that("tuning with a fixed ensemble methods and varying base methods works", test_that("tuning with a fixed ensemble methods and varying base methods works", { + cl = + # TODO: choose arbitrary number of base.methods -> cannot tune an argument of # a param. We need to make makeDiscreteVectorParam more flexible. allow more # than one ensemble.method diff --git a/tests/testthat/test_tune_tuneMBO.R b/tests/testthat/test_tune_tuneMBO.R index e90716d256..b4160553e2 100644 --- a/tests/testthat/test_tune_tuneMBO.R +++ b/tests/testthat/test_tune_tuneMBO.R @@ -1,5 +1,6 @@ test_that("tuneMBO", { + requirePackagesOrSkip("rgenoud", default.method = "load") n.des = 8 n.iter = 2 res = makeResampleDesc("Holdout") diff --git a/tests/testthat/test_tune_tuneParamsMultiCrit.R b/tests/testthat/test_tune_tuneParamsMultiCrit.R index 1df24ef45a..601505b287 100644 --- a/tests/testthat/test_tune_tuneParamsMultiCrit.R +++ b/tests/testthat/test_tune_tuneParamsMultiCrit.R @@ -1,5 +1,7 @@ test_that("tuneParamsMultiCrit", { + requirePackagesOrSkip("emoa", default.method = "load") + lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("Holdout") ps = makeParamSet( @@ -75,6 +77,9 @@ test_that("tuneParamsMultiCrit", { test_that("tuneParamsMultiCrit works with low number of evals and dependencies", { + + requirePackagesOrSkip("emoa", default.method = "load") + # we had a bug here triggered thru code in PH ps = makeParamSet( makeNumericParam("C", lower = -12, upper = 12, trafo = function(x) 2^x), @@ -92,6 +97,8 @@ test_that("tuneParamsMultiCrit works with low number of evals and dependencies", # FIXME: I am not sure how we can check wich value is imputed for the optimizer? test_that("y imputing works", { + requirePackagesOrSkip("emoa", default.method = "load") + configureMlr(on.learner.error = "quiet") lrn = makeLearner("classif.__mlrmocklearners__2") rdesc = makeResampleDesc("Holdout") @@ -110,6 +117,8 @@ test_that("y imputing works", { }) test_that("tuneParamsMultiCrit with budget", { + requirePackagesOrSkip("emoa", default.method = "load") + lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("Holdout") ps = makeParamSet( @@ -162,6 +171,7 @@ test_that("tuneParamsMultiCrit with budget", { }) test_that("plotTuneMultiCritResult works with pretty.names", { + requirePackagesOrSkip("emoa", default.method = "load") lrn = makeLearner("classif.rpart") ps = makeParamSet( makeDiscreteParam("minsplit", values = c(5, 10)) @@ -174,6 +184,7 @@ test_that("plotTuneMultiCritResult works with pretty.names", { }) test_that("tuneParamsMultiCrit with resample.fun", { + requirePackagesOrSkip("emoa", default.method = "load") lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("Holdout") ps = makeParamSet( @@ -210,6 +221,8 @@ test_that("tuneParamsMultiCrit with resample.fun", { }) test_that("check n.objectives for MBO multi crit", { + requirePackagesOrSkip("emoa", default.method = "load") + lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("Holdout") ps = makeParamSet( diff --git a/tests/testthat/test_tune_tuneThreshold.R b/tests/testthat/test_tune_tuneThreshold.R index 35a6267e47..b1940b18cd 100644 --- a/tests/testthat/test_tune_tuneThreshold.R +++ b/tests/testthat/test_tune_tuneThreshold.R @@ -1,5 +1,6 @@ test_that("tuneThreshold", { + requirePackagesOrSkip("lhs", default.method = "load") # binary classes, 1 th lrn = makeLearner("classif.lda", predict.type = "prob")