Skip to content

Commit

Permalink
Prepare release (#673)
Browse files Browse the repository at this point in the history
  • Loading branch information
mllg authored Aug 5, 2021
1 parent 30b402d commit 0df584c
Show file tree
Hide file tree
Showing 15 changed files with 96 additions and 87 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mlr3
Title: Machine Learning in R - Next Generation
Version: 0.11.0-9000
Version: 0.12.0
Authors@R:
c(person(given = "Michel",
family = "Lang",
Expand Down
17 changes: 12 additions & 5 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
# mlr3 0.11.0-9000
# mlr3 0.12.0

* New method to label columns in tasks: `Task$label()`.
* New method to assign labels to columns in tasks: `Task$label()`.
These will be used in visualizations in the future.
* New method to add stratification variables: `Task$add_strata()`.
* New helper function `partition()` to split a task into a training and test
set.
* New standardized getter `loglik()` for class `Learner`.
* New measures `"aic"` and `"bic"` to compute the Akaike Information Criterion
or the Bayesian Information Criterion, respectively.
* New Resampling method: `ResamplingCustomCV`.
* New Resampling method: `ResamplingCustomCV`. Creates a custom resampling split
based on the levels of a user-provided factor variable.
* New argument `encapsulate` for `resample()` and `benchmark()` to conveniently
enable encapsulation and also set the fallback learner to the respective
enable encapsulation and also set the fallback learner to the
featureless learner. This is simply for convenience, configuring each learner
individually is still possible and allows a more fine-grained control (#634,
#642).
* New field `parallel_predict` for `Learner` to enable parallel predictions via
the future backend. This currently is only enabled while calling the
`$predict()` or `$predict_newdata` methods and is disabled during `resample()`
and `benchmark()` where you have other means to parallelize.
* Deprecated public (and already documented as internal) field `$data` in
`ResampleResult` and `BenchmarkResult` to simplify the API and avoid
confusion. The converter `as.data.table()` can be used instead.
confusion. The converter `as.data.table()` can be used instead to access the
internal data.
* Measures now have formal hyperparameters. A popular example where this is
required is the F1 score, now implemented with customizable `beta`.
* Changed default of argument `ordered` in `Task$data()` from `TRUE` to `FALSE`.
Expand Down
3 changes: 2 additions & 1 deletion R/Learner.R
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,8 @@ Learner = R6Class("Learner",
#' @template field_hash
hash = function(rhs) {
assert_ro_binding(rhs)
calculate_hash(class(self), self$id, self$param_set$values, private$.predict_type, self$fallback$hash)
calculate_hash(class(self), self$id, self$param_set$values, private$.predict_type,
self$fallback$hash, self$parallel_predict)
},

#' @field phash (`character(1)`)\cr
Expand Down
55 changes: 32 additions & 23 deletions R/Task.R
Original file line number Diff line number Diff line change
Expand Up @@ -583,29 +583,6 @@ Task = R6Class("Task",
setnames(strata, sprintf("..stratum_%s", cols))
self$cbind(strata)
self$set_col_roles(names(strata), roles = "stratum")
},


#' @description
#' Assigns `labels` (prettier formated names) to columns `cols`.
#' Internally updates the column `label` of the table in field `col_info` by reference.
#'
#' @param cols (`character()`)\cr
#' Column identifiers to label.
#' @param labels (`character()`)\cr
#' New labels. Will be repeated to match the length of `cols`.
#' Set to `NA` to remove a label.
#'
#' @return Modified `self`.
label = function(cols, labels) {
assert_character(cols, any.missing = FALSE, unique = TRUE)
assert_character(labels)
assert_subset(cols, self$col_info$id)
labels = rep_len(as.character(labels), length(cols))

self$col_info[list(cols), "label" := labels, on = "id"]

invisible(self)
}
),

Expand Down Expand Up @@ -842,6 +819,38 @@ Task = R6Class("Task",
}
data = self$backend$data(private$.row_roles$use, c(self$backend$primary_key, weight_cols))
setnames(data, c("row_id", "weight"))[]
},


#' @field labels (named `character()`)\cr
#' Retrieve `labels` (prettier formated names) from columns.
#' Internally queries the column `label` of the table in field `col_info`.
#' Columns ids referenced by the name of the vector, the labels are the actual string values.
#'
#' Assigning to this column update the task by reference.
#' You have to provide a character vector of labels, named with column ids.
#' To remove a label, set it to `NA`.
#' Alternatively, you can provide a [data.frame()] with the two columns
#' `"id"` and `"label"`.
labels = function(rhs) {
active = union(self$target_names, self$feature_names)

if (missing(rhs)) {
tab = self$col_info[list(active), c("id", "label"), on = "id", nomatch = NULL, with = FALSE]
return(set_names(tab[["label"]], tab[["id"]]))
}

if (is.data.frame(rhs)) { # convert to named character
assert_data_frame(rhs, ncols = 2L)
assert_names(names(rhs), permutation.of = c("id", "label"))
rhs = set_names(rhs[["label"]], rhs[["id"]])
}

assert_names(names(rhs), type = "unique")
assert_subset(names(rhs), active)
self$col_info[list(names(rhs)), "label" := rhs, on = "id"]

invisible(self)
}
),

Expand Down
10 changes: 8 additions & 2 deletions R/partition.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,14 @@
#' # regression task
#' task = tsk("boston_housing")
#'
#' # roughly equal size split with stratification
#' str(partition(task, ratio = 0.5))
#' # roughly equal size split while stratifying on the binned response
#' split = partition(task, ratio = 0.5)
#' data = data.frame(
#' y = c(task$truth(split$train), task$truth(split$test)),
#' split = rep(c("train", "predict"), lengths(split))
#' )
#' boxplot(y ~ split, data = data)
#'
#'
#' # classification task
#' task = tsk("pima")
Expand Down
2 changes: 1 addition & 1 deletion R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' @section Learn mlr3:
#' * Book on mlr3: \url{https://mlr3book.mlr-org.com}
#' * Use cases and examples gallery: \url{https://mlr3gallery.mlr-org.com}
#' * Cheat Sheets: \url{https://cheatsheets.mlr-org.com}
#' * Cheat Sheets: \url{https://github.com/mlr-org/mlr3cheatsheets}
#'
#' @section mlr3 extensions:
#' * Preprocessing and machine learning pipelines: \CRANpkg{mlr3pipelines}
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,11 @@ Status](https://www.r-pkg.org/badges/version-ago/mlr3)](https://cran.r-project.o
status](https://mlr3extralearners.mlr-org.com/articles/learners/learner_status.html)
to see their build status
- **Cheatsheets**
- [Overview of cheatsheets](https://cheatsheets.mlr-org.com)
- [Overview of cheatsheets](https://github.com/mlr-org/mlr3cheatsheets)
- [mlr3](https://cheatsheets.mlr-org.com/mlr3.pdf)
- [mlr3tuning](https://cheatsheets.mlr-org.com/mlr3tuning.pdf)
- [mlr3pipelines](https://cheatsheets.mlr-org.com/mlr3pipelines.pdf)
- [mlr3fselect](https://cheatsheets.mlr-org.com/mlr3fselect.pdf)
- **Videos**:
- [useR2019 talk on
mlr3](https://www.youtube.com/watch?v=wsP2hiFnDQs)
Expand Down Expand Up @@ -149,7 +150,7 @@ measure <- msr("classif.acc")
prediction$score(measure)
```

## classif.acc
## classif.acc
## 0.9130435

### Resample
Expand Down Expand Up @@ -178,7 +179,7 @@ rr$score(measure)
rr$aggregate(measure)
```

## classif.acc
## classif.acc
## 0.918688

## Extension Packages
Expand Down
38 changes: 11 additions & 27 deletions man/Task.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/TaskClassif.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/TaskRegr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/TaskSupervised.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/TaskUnsupervised.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/mlr3-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions man/partition.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 16 additions & 17 deletions tests/testthat/test_Task.R
Original file line number Diff line number Diff line change
Expand Up @@ -405,26 +405,25 @@ test_that("$add_strata", {
test_that("column labels", {
task = tsk("iris")
expect_character(task$col_info$label)
expect_true(allMissing(task$col_info$label))
expect_true(allMissing(task$labels))

labels = c("pl", "pw", "sl", "sw", "species")
task$col_info$label = c(NA, labels)
task$labels = c(Species = "sp")
expect_equal(task$labels[["Species"]], "sp")
expect_equal(count_missing(task$labels), 4L)

task$rbind(iris[1, , drop = FALSE])
expect_names(na.omit(task$col_info$label), permutation.of = labels)
fn = task$feature_names
task$labels = set_names(toupper(fn), fn)
expect_equal(unname(task$labels), c("sp", toupper(fn)))

task$cbind(data.frame(foo = 1:151))
task$col_info
expect_names(na.omit(task$col_info$label), permutation.of = labels)
expect_error({ task$labels = c(foo = "as") }, "names")

dt = data.table(id = c(task$target_names, task$feature_names))
dt$label = tolower(dt$id)

task = tsk("iris")
task$label("Petal.Length", "pl")
expect_equal(task$col_info["Petal.Length", label], "pl")

task$label(c("Sepal.Length", "Sepal.Width"), c("sl", "sw"))
expect_equal(task$col_info["Sepal.Length", label], "sl")
expect_equal(task$col_info["Sepal.Width", label], "sw")

task$label("Petal.Length", NA)
expect_equal(task$col_info["Petal.Length", label], NA_character_)
task$labels = dt
expect_equal(
unname(task$labels),
tolower(c(task$target_names, task$feature_names))
)
})

0 comments on commit 0df584c

Please sign in to comment.