Prepare 0.5.0 release (#114)

* Prepare 0.5.0 release * Bump version * Fix URL bug * Bump roxygen2 version * Remove runPioneeR + housekeeping * Update Readme in anticipation of approval * Add further alignment with CRAN policies * Add example to compute_malmquist() * Update LICENSE and copyright holder * Standardise examples across functions * Use dontrun for run_pioneer example --------- Co-authored-by: Ove Haugland Jakobsen <ohj@mac.com>
Riksrevisjonen · Oct 25, 2024 · 6e66148 · 6e66148
1 parent a68eb99
commit 6e66148
Show file tree

Hide file tree

Showing 24 changed files with 583 additions and 1,063 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -13,3 +13,5 @@ tmp
 ^DOCKERFILE$
 ^README.Rmd$
 ^tests/script
+^cran-comments\.md$
+^CRAN-SUBMISSION$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,16 +1,21 @@
 Package: pioneeR
-Title: Perform productivity analyses in a web browser
-Version: 0.4.0.9000
+Title: Productivity and Efficiency Analysis using DEA
+Version: 0.5.0
 Authors@R: c(
     person("Ove Haugland", "Jakobsen", role = c("aut", "cre"), email = "ohj@mac.com"),
-    person("Aleksander", "Eilertsen", role = "aut", email = "ale@riksrevisjonen.no"),
+    person("Aleksander Eilertsen", "Valberg", role = "aut", email = "ale@riksrevisjonen.no"),
     person("Jan Roar", "Beckstrøm", role = "ctb", email = "jrb@riksrevisjonen.no"),
     person("Lars Skaage", "Engebretsen", role = "ctb", email = "lse@riksrevisjonen.no"),
     person("Jonas", "Månsson", role = "ctb", email = "jonas.mansson@riksrevisjonen.no"),
     person("Joachim", "Sandnes", role = "ctb", email = "jsn@riksrevisjonen.no"),
-    person(family = "Riksrevisjonen", role = "cph"))
-Description: Application to conduct productivity and efficiency (DEA) analysis. 
-License: GPL-3 | file LICENSE
+    person(family = "National Audit Office of Norway", role = "cph"))
+Description: Measure productivity and efficiency using Data Envelopment Analysis 
+    (DEA). Available methods include DEA under different technology assumptions, 
+    bootstrapping of efficiency scores and calculation of the Malmquist 
+    productivity index. Analyses can be performed either in the console or with 
+    the provided 'shiny' app. See Banker, R.; Charnes, A.; Cooper, W.W. (1984)
+    <doi:10.1287/mnsc.30.9.1078>, Färe, R.; Grosskopf, S. (1996) <doi:10.1007/978-94-009-1816-0>.
+License: GPL-3
 URL: https://riksrevisjonen.github.io/pioneeR/, https://github.com/Riksrevisjonen/pioneeR
 BugReports: https://github.com/Riksrevisjonen/pioneeR/issues
 Depends:
@@ -31,6 +36,7 @@ Imports:
     scales,
     writexl
 Suggests:
+    deaR,
     rmarkdown,
     knitr,
     testthat (>= 3.0.0),
@@ -40,6 +46,5 @@ Language: en-GB
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
-VignetteBuilder: knitr
+RoxygenNote: 7.3.2
 Config/testthat/edition: 3
diff --git a/LICENSE b/LICENSE
diff --git a/NAMESPACE b/NAMESPACE
@@ -16,7 +16,6 @@ export(compute_dea)
 export(compute_malmquist)
 export(compute_scale_efficiency)
 export(create_matrix)
-export(runPioneeR)
 export(run_pioneer)
 export(summary_tbl_dea)
 export(unset_env_vars)

diff --git a/NEWS.md b/NEWS.md
@@ -1,11 +1,11 @@
-# pioneeR 0.4.0.9000
+# pioneeR 0.5.0
 
 ## Breaking changes
 
 - Results for the Malmquist index are now returned ala Farrell (not Shephard). This is consistent with how results
 for other models are currently presented in the app. Users should note that this changes the interpretation of the malmquist index for input-oriented models. See `?compute_malmquist()` for details. (#81)
-- Removed the select input box for returns to scale in the Malmquist analysis tab. The currently supported Malmquist index is the one defined by Färe & Grosskopf (1996). Components of the index is thus always computed based on the same returns to scale (either CRS or VRS depending on the specific component), so the select input box
-is not needed. 
+- Removed the select input box for returns to scale in the Malmquist analysis tab. The currently supported Malmquist index is the one defined by Färe & Grosskopf (1996). Components of the index is thus always computed based on the same returns to scale (either CRS or VRS depending on the specific component), so the select input box is not needed.
+- `runPioneeR()` has been removed to avoid confusion for new users with the release on CRAN.
 
 ## Enhancements
 

diff --git a/R/bootstrap-dea.R b/R/bootstrap-dea.R
@@ -3,17 +3,17 @@ NULL
 
 #' Bootstrap a DEA model
 #'
-#' Run bootstrap on a DEA model to estimate bias corrected efficiency scores and confidence
-#' intervals
+#' Run bootstrap on a DEA model to estimate bias corrected efficiency scores and
+#' confidence intervals.
 #'
-#' @param dea An object of type pioneer_dea from `compute_dea()`
-#' @param alpha One minus the confidence level required (defaults to 0.05)
-#' @param bw_rule A string with the type of bandwidth rule to be used, or a number with the
-#'   bandwidth parameter. See details.
-#' @param iterations The number of bootstrap iterations to be performed
+#' @param dea An object of class 'pioneer_dea' from `compute_dea()`.
+#' @param alpha One minus the confidence level required. Default is 0.05.
+#' @param bw_rule A string with the type of bandwidth rule to be used, or a number
+#'   with the bandwidth parameter. See details.
+#' @param iterations The number of bootstrap iterations to perform. Default is 2000.
 #'
 #' @details
-#' In order to bootstrap a DEA model, you must first create a DEA model object using the
+#' In order to bootstrap a DEA model, you must first create a model object using the
 #' `compute_dea()` function. Note that you currently can only bootstrap models using
 #' constant or variable returns to scale (RTS). If you try to bootstrap a model using another
 #' RTS, the bootstrap will fail with an error message.
@@ -22,21 +22,26 @@ NULL
 #' `silverman` for the Silverman rule, or `scott` for the Scott rule. If you provide a
 #' number, this will be used directly as the bandwidth parameter `h`. This can be useful
 #' to replicate results where `h` is given, such as Simar & Wilson (1998). For most practical
-#' applications of the bootstrap, the default of unbias cross validation is sensible.
+#' applications of the bootstrap, the default value of unbiased cross validation is sensible.
 #'
+#' @return A list of class `pioneer_bootstrap`.
 #' @examples
-#' \dontrun{
-#' # Get data
+#' # Load example data
 #' fare89 <- deaR::Electric_plants
 #' # Estimate efficiency
-#' mod <- compute_dea(fare89, 'Plant', c('Labor', 'Fuel', 'Capital'), 'Output', 'vrs', 'in')
-#' # Run bootstrap
-#' boot <- bootstrap_dea(mod, iterations = 2000)
-#' }
-#'
-#' @return A list object of class `pioneer_bootstrap`
+#' mod <- compute_dea(
+#'   data = fare89,
+#'   input = c("Labor", "Fuel", "Capital"),
+#'   output = "Output",
+#'   id = "Plant",
+#' )
+#' # Run bootstrap. Reducing the number of iterations to save processing time
+#' boot <- bootstrap_dea(mod, iterations = 100)
+#' # Print results
+#' print(boot)
+#' # Get summary
+#' summary(boot)
 #' @seealso [compute_dea()]
-#'
 #' @export
 bootstrap_dea <- function(dea, alpha = 0.05, bw_rule = 'ucv', iterations = 2000) {
 

diff --git a/R/compute_dea.R b/R/compute_dea.R
@@ -18,8 +18,25 @@
 #' @param super If `TRUE` super efficiency scores are calculated.
 #' @param slack If `TRUE` slack values are calculated.
 #' @param peers If `TRUE` peers are added to the response.
-#'
-#' @return A list of class `pioneer_dea`
+#' @return A list of class `pioneer_dea`.
+#' @examples
+#' # Load example data
+#' fare89 <- deaR::Electric_plants
+#' # Estimate efficiency
+#' mod <- compute_dea(
+#'   data = fare89,
+#'   input = c("Labor", "Fuel", "Capital"),
+#'   output = "Output",
+#'   id = "Plant",
+#'   rts = "vrs",
+#'   orientation = "in"
+#' )
+#' # Print results
+#' print(mod)
+#' # Get summary
+#' summary(mod)
+#' # Convert to data frame
+#' df <- as.data.frame(mod)
 #' @export
 compute_dea <- function(
     data,

diff --git a/R/compute_malmquist.R b/R/compute_malmquist.R
@@ -16,9 +16,26 @@
 #' @param id A string with the DMU id or name variable.
 #' @param time A string with the time period variable.
 #' @return A list of class `pioneer_mlm`
-#' @export
+#' @examples
+#' # Load example data
+#' chnEconomy <- deaR::EconomyLong
+#' # Estimate Malmquist
+#' mod <- compute_malmquist(
+#'   data = chnEconomy,
+#'   id = 'DMUs',
+#'   time = 'Period',
+#'   input = c('Labor', 'Capital'),
+#'   output = 'GIOV',
+#'   orientation = 'in')
+#' # Print results
+#' print(mod)
+#' # Get summary
+#' summary(mod)
+#' # Convert to data frame
+#' df <- as.data.frame(mod)
 #' @references
 #' Färe, R., Grosskopf, S. (1996). _Intertemporal production frontiers: With dynamic DEA_. Springer.
+#' @export
 compute_malmquist <- function(
     data,
     input,

diff --git a/R/dea-utils.R b/R/dea-utils.R
@@ -3,10 +3,17 @@
 #' Create a matrix for input or output variables that can be used in DEA models
 #' from a supplied data.frame
 #'
-#' @param df A data.frame
-#' @param columns A vector of column names that should be included in the matrix
-#' @param id The name of the column with the DMU IDs
-#' @param normalize If `TRUE`, all columns will be normalized with a mean of 1
+#' @param df A data.frame containing the data.
+#' @param columns A character vector of column names to include in the matrix.
+#' @param id A character string specifying the column with DMU IDs.
+#' @param normalize A logical indicating whether to normalize the columns by their
+#'   mean. Defaults to `FALSE`.
+#'
+#' @examples
+#' df <- data.frame(id = 1:3, a = c(10, 20, 30), b = c(5, 15, 25))
+#' create_matrix(df, columns = c("a", "b"), id = "id", normalize = TRUE)
+#'
+#' @return A matrix of inputs or outputs
 #'
 #' @export
 create_matrix <- function(df, columns, id, normalize = FALSE) {
@@ -37,6 +44,17 @@ create_matrix <- function(df, columns, id, normalize = FALSE) {
 #' @param digits An integer with the number of digits to round to. If `NULL` the
 #'   values are kept unrounded.
 #'
+#' @return A data frame containing the efficiency scores for CRS, VRS, the
+#'   Scale Efficiency, the VRS to NIRS ratio, and a recommendation on whether to
+#'   increase or decrease the size of the DMU.
+#'
+#' @examples
+#' # Create matrices with random values
+#' inputs <- matrix(runif(10, 1, 10), ncol = 2)
+#' outputs <- matrix(runif(10, 1, 10), ncol = 2)
+#' # Compute scale efficiency
+#' res <- compute_scale_efficiency(
+#'   inputs, outputs, orientation = 'out', digits = 2)
 #' @export
 compute_scale_efficiency <- function(
     x,

diff --git a/R/server.R b/R/server.R
@@ -117,7 +117,7 @@ server <- function(input, output, session) {
   }, ignoreInit = TRUE)
 
   data <- reactive({
-    # Return the active data object. This can be a data frame sent with the runPioneeR
+    # Return the active data object. This can be a data frame sent with the run_pioneer
     # call, a restored data object from a previous session, or a new dataset uploaded
     # by the user.
     reactives$data

diff --git a/R/shiny-functions.R b/R/shiny-functions.R
@@ -26,8 +26,8 @@ pioneer_scripts <- function() {
 #' Run the pioneeR app on your local machine.
 #'
 #' @param x A data frame that should be loaded with the app. See details.
-#' @param port The TCP port that the application should listen on.
-#' @param ... Other arguments to send to \code{\link[shiny]{runApp}}
+#' @param port Integer. The TCP port that the application should listen on.
+#' @param ... Other arguments to send to [shiny::runApp()].
 #'
 #' @details
 #' Note that pioneeR must be loaded into the namespace with `library(pioneeR)`
@@ -37,8 +37,16 @@ pioneer_scripts <- function() {
 #' a data frame or a character string with the object name of the data frame you
 #' want to be loaded when the app launches. Note that you should only use data
 #' frame objects. If you have a tibble (from the tidyverse) or a data table, you
-#' can convert to an ordinary data.frame using \code{\link[base]{as.data.frame}}.
+#' can convert to an ordinary data.frame using [as.data.frame()]
 #'
+#' @return None
+#' @examples
+#' # Only run this example in interactive R sessions
+#' if (interactive()) {
+#'   df = deaR::Electric_plants
+#'   # Load app with data.frame and set port to 8080
+#'   run_pioneer(x = df, port = 8080)
+#' }
 #' @export
 run_pioneer <- function(x = NULL, port = NULL, ...) {
 
@@ -58,17 +66,13 @@ run_pioneer <- function(x = NULL, port = NULL, ...) {
 
 }
 
-#' @rdname run_pioneer
-#' @export
-runPioneeR <- function(x = NULL, port = NULL, ...) {
-  deprecation_warning(alternative = "run_pioneeR", next_release = TRUE)
-  run_pioneer(x = x, port = port, ...)
-}
-
 #' Unset environment variables
 #'
 #' Unsets the environment variables set by pioneeR
 #'
+#' @return A logical vector, with elements being `TRUE` if unsetting the variable succeeded
+#' @examples
+#' unset_env_vars()
 #' @export
 unset_env_vars <- \() Sys.unsetenv('PIONEER_DATA')
 
@@ -100,8 +104,8 @@ check_balance <- function(data, id_var, time_var) {
 
 }
 
-#' @param level Level of alert
-#' @param message The message to show the user
+#' @param level A character string with the level of alert
+#' @param message A character string with the message to show the user
 #' @param object A reactive object to update
 #' @param append Boolean. If the message should be appended to the reactive
 #' @noRd

diff --git a/R/summary-dea.R b/R/summary-dea.R
@@ -4,44 +4,63 @@
 #'
 #' @details
 #' The function will return a summary table for efficiency scores from a DEA model.
-#' Efficiency scores will be placed in 11 bins, where DMUs with an effciency score
+#' Efficiency scores will be placed in 11 bins, where DMUs with an efficiency score
 #' equal to 1 are placed in a separate bin. For output oriented models with range
 #' \[1, Inf\], bins are created with `1/bin`. Bin widths will be equal to models
 #' with range \[0, 1\].
 #'
-#' @param x A vector of efficiency scores or a Farrell object
-#'
-#' @return data.frame
+#' @param x A numeric vector of efficiency scores or an object of class `pioneer_dea`
 #'
+#' @return A [data.frame()] with summary statistics
+#' @examples
+#' # Load example data
+#' fare89 <- deaR::Electric_plants
+#' # Estimate efficiency
+#' mod <- compute_dea(
+#'   data = fare89,
+#'   input = c("Labor", "Fuel", "Capital"),
+#'   output = "Output",
+#'   rts = "vrs"
+#' )
+#' # Get a summary table of efficiency scores
+#' summary_tbl_dea(mod)
+#' # You can also create the table from a numeric vector of efficiency scores
+#' res <- as.data.frame(mod)
+#' summary_tbl_dea(res$efficiency)
 #' @export
 summary_tbl_dea <- function(x) {
-  UseMethod('summary_tbl_dea')
+  UseMethod("summary_tbl_dea")
 }
 
 #' @method summary_tbl_dea pioneer_dea
 #' @export
 summary_tbl_dea.pioneer_dea <- function(x) {
-  eff <- x$results$efficiency
+  eff <- x$efficiency
   summary_tbl_dea(eff)
 }
 
 #' @method summary_tbl_dea numeric
 #' @export
 summary_tbl_dea.numeric <- function(x) {
+  # Input validation
+  if (!is.numeric(x) || length(x) == 0) {
+    cli::cli_abort("Input must be a non-empty numeric vector")
+  }
+  # Remove missing
   x <- x[!is.na(x)]
   # Check if efficiency scores are in range [0, 1]
   range0 <- min(x) < 1L
   bins <- if (range0) seq(0, 1.1, .1) else round(1/rev(seq(0, 1.1, .1)), 3L)
   # Values equal to 1 be in last bin for input and first bin for output orientation
   # Create new labels to use with the cut function
   labs <- if (range0) {
-    c(sprintf('%s <= E < %s', bins[1:10], bins[2:11]), 'E == 1')
+    c(sprintf("%s <= E < %s", bins[1:10], bins[2:11]), "E == 1")
   } else {
-    c('F == 1', sprintf('%.3f < F <= %.3f', bins[2:11], bins[3:12]))
+    c("F == 1", sprintf("%.3f < F <= %.3f", bins[2:11], bins[3:12]))
   }
   # If efficiency scores are in range [0, 1] bins must be closed on the left
   eff_bin <- cut(x, breaks =  bins, labels = labs, right = !range0)
   eff_df <- table(eff_bin) |> as.data.frame()
-  colnames(eff_df) <- c('Range', 'Frequency')
+  colnames(eff_df) <- c("Range", "Frequency")
   eff_df
 }