diff --git a/.dev/test-value_at.R b/.dev/test-value_at.R
new file mode 100644
index 000000000..3c8272101
--- /dev/null
+++ b/.dev/test-value_at.R
@@ -0,0 +1,12 @@
+test_that("value_at", {
+  data(efc, package = "datawizard")
+  expect_equal(value_at(efc$e42dep, 5), 4, ignore_attr = TRUE)
+  expect_equal(value_at(efc$c12hour, 4), NA_real_, ignore_attr = TRUE)
+  expect_equal(value_at(efc$c12hour, 4, remove_na = TRUE), 168, ignore_attr = TRUE)
+  expect_equal(value_at(efc$c12hour, 5:7), efc$c12hour[5:7], ignore_attr = TRUE)
+  expect_equal(value_at(efc$e42dep, 123456, default = 55), 55, ignore_attr = TRUE)
+  expect_null(value_at(efc$e42dep, 123456))
+  expect_null(value_at(efc$e42dep, NULL))
+  expect_error(value_at(efc$e42dep, NA), regex = "`position` can't")
+  expect_error(value_at(efc$e42dep, c(3, NA)), regex = "`position` can't")
+})
diff --git a/.dev/value_at.R b/.dev/value_at.R
new file mode 100644
index 000000000..cdadc9dc6
--- /dev/null
+++ b/.dev/value_at.R
@@ -0,0 +1,52 @@
+#' @title Find the value(s) at a specific position in a variable
+#' @name value_at
+#'
+#' @description This function can be used to extract one or more values at a
+#' specific position in a variable.
+#'
+#' @param x A vector or factor.
+#' @param position An integer or a vector of integers, indicating the position(s)
+#' of the value(s) to be returned. Negative values are counted from the end of
+#' the vector. If `NA`, an error is thrown.
+#' @param remove_na Logical, if `TRUE`, missing values are removed before
+#' computing the position. If `FALSE`, missing values are included in the
+#' computation.
+#' @param default The value to be returned if the position is out of range.
+#'
+#' @seealso `data_summary()` to use `value_at()` inside a `data_summary()` call.
+#'
+#' @return A vector with the value(s) at the specified position(s).
+#'
+#' @examples
+#' data(mtcars)
+#' # 5th value
+#' value_at(mtcars$mpg, 5)
+#' # last value
+#' value_at(mtcars$mpg, -1)
+#' # out of range, return default
+#' value_at(mtcars$mpg, 150)
+#' # return 2nd and fifth value
+#' value_at(mtcars$mpg, c(2, 5))
+#' @export
+value_at <- function(x, position = 1, default = NULL, remove_na = FALSE) {
+  if (remove_na) {
+    x <- x[!is.na(x)]
+  }
+  n <- length(x)
+  unlist(lapply(position, .values_at, x = x, n = n, default = default), use.names = FALSE)
+}
+
+# helper ----
+
+.values_at <- function(x, position, n, default) {
+  if (is.na(position)) {
+    insight::format_error("`position` can't be `NA`.")
+  }
+  if (position < 0L) {
+    position <- position + n + 1
+  }
+  if (position <= 0 || position > n) {
+    return(default)
+  }
+  x[position]
+}
diff --git a/R/adjust.R b/R/adjust.R
index 01b1b5a2c..821821864 100644
--- a/R/adjust.R
+++ b/R/adjust.R
@@ -124,11 +124,11 @@ adjust <- function(data,
       predictors[predictors == predictors_num] <- paste0("s(", predictors_num, ")")
     }
     formula_predictors <- paste(c("1", predictors), collapse = " + ")
-    formula <- paste(var, "~", formula_predictors)
+    model_formula <- paste(var, "~", formula_predictors)
 
     x <- .model_adjust_for(
       data = data[unique(c(var, effect, facs))],
-      formula,
+      model_formula = model_formula,
       multilevel = multilevel,
       additive = additive,
       bayesian = bayesian,
@@ -148,7 +148,7 @@ data_adjust <- adjust
 
 #' @keywords internal
 .model_adjust_for <- function(data,
-                              formula,
+                              model_formula,
                               multilevel = FALSE,
                               additive = FALSE,
                               bayesian = FALSE,
@@ -159,32 +159,28 @@ data_adjust <- adjust
     # Bayesian
     if (bayesian) {
       insight::check_if_installed("rstanarm")
-      model <- rstanarm::stan_gamm4(stats::as.formula(formula), random = formula_random, data = data, refresh = 0)
+      model <- rstanarm::stan_gamm4(stats::as.formula(model_formula), random = formula_random, data = data, refresh = 0)
       # Frequentist
     } else {
       insight::check_if_installed("gamm4")
-      model <- gamm4::gamm4(stats::as.formula(formula), random = formula_random, data = data)
+      model <- gamm4::gamm4(stats::as.formula(model_formula), random = formula_random, data = data)
     }
 
     # Linear -------------------------
-  } else {
+  } else if (bayesian) {
     # Bayesian
-    if (bayesian) {
-      insight::check_if_installed("rstanarm")
-      if (multilevel) {
-        model <- rstanarm::stan_lmer(paste(formula, formula_random), data = data, refresh = 0)
-      } else {
-        model <- rstanarm::stan_glm(formula, data = data, refresh = 0)
-      }
-      # Frequentist
+    insight::check_if_installed("rstanarm")
+    if (multilevel) {
+      model <- rstanarm::stan_lmer(paste(model_formula, formula_random), data = data, refresh = 0)
     } else {
-      if (multilevel) {
-        insight::check_if_installed("lme4")
-        model <- lme4::lmer(paste(formula, formula_random), data = data)
-      } else {
-        model <- stats::lm(formula, data = data)
-      }
+      model <- rstanarm::stan_glm(model_formula, data = data, refresh = 0)
     }
+  } else if (multilevel) {
+    # Frequentist
+    insight::check_if_installed("lme4")
+    model <- lme4::lmer(paste(model_formula, formula_random), data = data)
+  } else {
+    model <- stats::lm(model_formula, data = data)
   }
 
   adjusted <- insight::get_residuals(model)
diff --git a/R/assign_labels.R b/R/assign_labels.R
index 9390101ad..bd35513bf 100644
--- a/R/assign_labels.R
+++ b/R/assign_labels.R
@@ -79,7 +79,7 @@ assign_labels.numeric <- function(x, variable = NULL, values = NULL, ...) {
       attr(x, "label") <- variable
     } else {
       insight::format_error(
-        "Variable labels (argument `variable`) must be provided as a single character string, e.g. `variable = \"mylabel\"`."
+        "Variable labels (argument `variable`) must be provided as a single character string, e.g. `variable = \"mylabel\"`." # nolint
       )
     }
   }
@@ -88,13 +88,13 @@ assign_labels.numeric <- function(x, variable = NULL, values = NULL, ...) {
   if (!is.null(values)) {
     # extract unique values
     unique_values <- as.vector(sort(stats::na.omit(unique(x))))
-    labels <- NULL
+    value_labels <- NULL
 
     # do we have a names vector for "values"?
     # else check if number of labels and values match
     if (is.null(names(values))) {
       if (length(values) == length(unique_values)) {
-        labels <- stats::setNames(unique_values, values)
+        value_labels <- stats::setNames(unique_values, values)
       } else {
         insight::format_error(
           "Cannot add labels. Number of unique values and number of value labels are not equal.",
@@ -114,11 +114,11 @@ assign_labels.numeric <- function(x, variable = NULL, values = NULL, ...) {
 
       if (length(values)) {
         # we need to switch names and values
-        labels <- stats::setNames(coerce_to_numeric(names(values)), values)
+        value_labels <- stats::setNames(coerce_to_numeric(names(values)), values)
       }
     }
 
-    attr(x, "labels") <- labels
+    attr(x, "labels") <- value_labels
   }
 
   x
diff --git a/R/categorize.R b/R/categorize.R
index d440069d8..341d0c0c9 100644
--- a/R/categorize.R
+++ b/R/categorize.R
@@ -145,28 +145,8 @@ categorize.numeric <- function(x,
                                labels = NULL,
                                verbose = TRUE,
                                ...) {
-  # check arguments
-  if (is.character(split)) {
-    split <- match.arg(
-      split,
-      choices = c(
-        "median", "mean", "quantile", "equal_length", "equal_range",
-        "equal", "equal_distance", "range", "distance"
-      )
-    )
-  }
-
-  if (is.character(split) && split %in% c("quantile", "equal_length") && is.null(n_groups)) {
-    insight::format_error(
-      "Recoding based on quantiles or equal-sized groups requires the `n_groups` argument to be specified."
-    )
-  }
-
-  if (is.character(split) && split == "equal_range" && is.null(n_groups) && is.null(range)) {
-    insight::format_error(
-      "Recoding into groups with equal range requires either the `range` or `n_groups` argument to be specified."
-    )
-  }
+  # sanity check
+  split <- .sanitize_split_arg(split, n_groups, range)
 
   # handle aliases
   if (identical(split, "equal_length")) split <- "length"
@@ -221,28 +201,7 @@ categorize.numeric <- function(x,
   original_x[!is.na(original_x)] <- out
 
   # turn into factor?
-  if (!is.null(labels)) {
-    if (length(labels) == length(unique(out))) {
-      original_x <- as.factor(original_x)
-      levels(original_x) <- labels
-    } else if (length(labels) == 1 && labels %in% c("mean", "median")) {
-      original_x <- as.factor(original_x)
-      no_na_x <- original_x[!is.na(original_x)]
-      if (labels == "mean") {
-        labels <- stats::aggregate(x, list(no_na_x), FUN = mean, na.rm = TRUE)$x
-      } else {
-        labels <- stats::aggregate(x, list(no_na_x), FUN = stats::median, na.rm = TRUE)$x
-      }
-      levels(original_x) <- insight::format_value(labels, ...)
-    } else if (isTRUE(verbose)) {
-      insight::format_warning(
-        "Argument `labels` and levels of the recoded variable are not of the same length.",
-        "Variable will not be converted to factor."
-      )
-    }
-  }
-
-  original_x
+  .original_x_to_factor(original_x, x, labels, out, verbose, ...)
 }
 
 
@@ -283,15 +242,15 @@ categorize.data.frame <- function(x,
   # create the new variables and updates "select", so new variables are processed
   if (!isFALSE(append)) {
     # process arguments
-    args <- .process_append(
+    my_args <- .process_append(
       x,
       select,
       append,
       append_suffix = "_r"
     )
     # update processed arguments
-    x <- args$x
-    select <- args$select
+    x <- my_args$x
+    select <- my_args$select
   }
 
   x[select] <- lapply(
@@ -342,15 +301,15 @@ categorize.grouped_df <- function(x,
   # create the new variables and updates "select", so new variables are processed
   if (!isFALSE(append)) {
     # process arguments
-    args <- .process_append(
+    my_args <- .process_append(
       x,
       select,
       append,
       append_suffix = "_r"
     )
     # update processed arguments
-    x <- args$x
-    select <- args$select
+    x <- my_args$x
+    select <- my_args$select
   }
 
   x <- as.data.frame(x)
@@ -387,3 +346,56 @@ categorize.grouped_df <- function(x,
   }
   seq(lowest, max(x), by = range)
 }
+
+
+.sanitize_split_arg <- function(split, n_groups, range) {
+  # check arguments
+  if (is.character(split)) {
+    split <- match.arg(
+      split,
+      choices = c(
+        "median", "mean", "quantile", "equal_length", "equal_range",
+        "equal", "equal_distance", "range", "distance"
+      )
+    )
+  }
+
+  if (is.character(split) && split %in% c("quantile", "equal_length") && is.null(n_groups)) {
+    insight::format_error(
+      "Recoding based on quantiles or equal-sized groups requires the `n_groups` argument to be specified."
+    )
+  }
+
+  if (is.character(split) && split == "equal_range" && is.null(n_groups) && is.null(range)) {
+    insight::format_error(
+      "Recoding into groups with equal range requires either the `range` or `n_groups` argument to be specified."
+    )
+  }
+
+  split
+}
+
+
+.original_x_to_factor <- function(original_x, x, labels, out, verbose, ...) {
+  if (!is.null(labels)) {
+    if (length(labels) == length(unique(out))) {
+      original_x <- as.factor(original_x)
+      levels(original_x) <- labels
+    } else if (length(labels) == 1 && labels %in% c("mean", "median")) {
+      original_x <- as.factor(original_x)
+      no_na_x <- original_x[!is.na(original_x)]
+      if (labels == "mean") {
+        labels <- stats::aggregate(x, list(no_na_x), FUN = mean, na.rm = TRUE)$x
+      } else {
+        labels <- stats::aggregate(x, list(no_na_x), FUN = stats::median, na.rm = TRUE)$x
+      }
+      levels(original_x) <- insight::format_value(labels, ...)
+    } else if (isTRUE(verbose)) {
+      insight::format_warning(
+        "Argument `labels` and levels of the recoded variable are not of the same length.",
+        "Variable will not be converted to factor."
+      )
+    }
+  }
+  original_x
+}
diff --git a/R/data_modify.R b/R/data_modify.R
index 6234255fd..c9b9d035a 100644
--- a/R/data_modify.R
+++ b/R/data_modify.R
@@ -350,7 +350,14 @@ data_modify.grouped_df <- function(data, ..., .if = NULL, .at = NULL, .modify =
   }
 
   # finally, we can evaluate expression and get values for new variables
-  new_variable <- try(with(data, eval(symbol)), silent = TRUE)
+  symbol_string <- insight::safe_deparse(symbol)
+  if (!is.null(symbol_string) && all(symbol_string == "n()")) {
+    # "special" functions
+    new_variable <- nrow(data)
+  } else {
+    # default evaluation of expression
+    new_variable <- try(with(data, eval(symbol)), silent = TRUE)
+  }
 
   # successful, or any errors, like misspelled variable name?
   if (inherits(new_variable, "try-error")) {
diff --git a/R/data_summary.R b/R/data_summary.R
index 5f8b0ee08..c0c5bc50f 100644
--- a/R/data_summary.R
+++ b/R/data_summary.R
@@ -8,13 +8,14 @@
 #' @param by Optional character string, indicating the name of a variable in `x`.
 #' If supplied, the data will be split by this variable and summary statistics
 #' will be computed for each group.
-#' @param include_na Logical, if `TRUE`, missing values are included as a level
+#' @param include_na Logical. If `TRUE`, missing values are included as a level
 #' in the grouping variable. If `FALSE`, missing values are omitted from the
 #' grouping variable.
 #' @param ... One or more named expressions that define the new variable name
 #' and the function to compute the summary statistic. Example:
 #' `mean_sepal_width = mean(Sepal.Width)`. The expression can also be provided
-#' as a character string, e.g. `"mean_sepal_width = mean(Sepal.Width)"`.
+#' as a character string, e.g. `"mean_sepal_width = mean(Sepal.Width)"`. The
+#' summary function `n()` can be used to count the number of observations.
 #'
 #' @return A data frame with the requested summary statistics.
 #'
@@ -38,6 +39,17 @@
 #'
 #' # expressions can also be supplied as character strings
 #' data_summary(mtcars, "MW = mean(mpg)", "SD = sd(mpg)", by = c("am", "gear"))
+#'
+#' # count observations within groups
+#' data_summary(mtcars, observations = n(), by = c("am", "gear"))
+#'
+#' # first and last observations of "mpg" within groups
+#' data_summary(
+#'   mtcars,
+#'   first = mpg[1],
+#'   last = mpg[length(mpg)],
+#'   by = c("am", "gear")
+#' )
 #' @export
 data_summary <- function(x, ...) {
   UseMethod("data_summary")
@@ -74,19 +86,24 @@ data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
   } else {
     # sanity check - is "by" a character string?
     if (!is.character(by)) {
-      insight::format_error("Argument `by` must be a character string, indicating the name of a variable in the data.")
+      insight::format_error("Argument `by` must be a character string indicating the name of variables in the data.")
     }
     # is "by" in the data?
     if (!all(by %in% colnames(x))) {
       by_not_found <- by[!by %in% colnames(x)]
       insight::format_error(
-        paste0("Variable \"", by_not_found, "\" not found in the data."),
+        paste0(
+          "Variable",
+          ifelse(length(by_not_found) > 1, "s ", " "),
+          text_concatenate(by_not_found, enclose = "\""),
+          " not found in the data."
+        ),
         .misspelled_string(colnames(x), by_not_found, "Possibly misspelled?")
       )
     }
     # split data, add NA levels, if requested
     l <- lapply(x[by], function(i) {
-      if (include_na) {
+      if (include_na && anyNA(i)) {
         addNA(i)
       } else {
         i
@@ -178,6 +195,18 @@ data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) {
     })
   }
 
+  # check for correct length of output - must be a single value!
+  if (any(lengths(out) != 1)) {
+    insight::format_error(
+      paste0(
+        "Each expression must return a single value. Following expression",
+        ifelse(sum(lengths(out) != 1) > 1, "s", " "),
+        " returned more than one value: ",
+        text_concatenate(vapply(dots[lengths(out) != 1], insight::safe_deparse, character(1)), enclose = "\"")
+      )
+    )
+  }
+
   out
 }
 
diff --git a/man/data_summary.Rd b/man/data_summary.Rd
index 0602dc7da..ccbf4c524 100644
--- a/man/data_summary.Rd
+++ b/man/data_summary.Rd
@@ -15,13 +15,14 @@ data_summary(x, ...)
 \item{...}{One or more named expressions that define the new variable name
 and the function to compute the summary statistic. Example:
 \code{mean_sepal_width = mean(Sepal.Width)}. The expression can also be provided
-as a character string, e.g. \code{"mean_sepal_width = mean(Sepal.Width)"}.}
+as a character string, e.g. \code{"mean_sepal_width = mean(Sepal.Width)"}. The
+summary function \code{n()} can be used to count the number of observations.}
 
 \item{by}{Optional character string, indicating the name of a variable in \code{x}.
 If supplied, the data will be split by this variable and summary statistics
 will be computed for each group.}
 
-\item{include_na}{Logical, if \code{TRUE}, missing values are included as a level
+\item{include_na}{Logical. If \code{TRUE}, missing values are included as a level
 in the grouping variable. If \code{FALSE}, missing values are omitted from the
 grouping variable.}
 }
@@ -52,4 +53,15 @@ data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c("am", "gear"))
 
 # expressions can also be supplied as character strings
 data_summary(mtcars, "MW = mean(mpg)", "SD = sd(mpg)", by = c("am", "gear"))
+
+# count observations within groups
+data_summary(mtcars, observations = n(), by = c("am", "gear"))
+
+# first and last observations of "mpg" within groups
+data_summary(
+  mtcars,
+  first = mpg[1],
+  last = mpg[length(mpg)],
+  by = c("am", "gear")
+)
 }
diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R
index aede5c042..746d4c51a 100644
--- a/tests/testthat/test-data_summary.R
+++ b/tests/testthat/test-data_summary.R
@@ -141,6 +141,26 @@ test_that("data_summary, errors", {
     data_summary(mtcars, mw = mesn(mpg), by = "am"),
     regex = "There was an error"
   )
+  # wrong variable name
+  expect_error(
+    data_summary(mtcars, n = max(mpeg)),
+    regex = "There was an error"
+  )
+  # expression returns more than one value
+  expect_error(
+    data_summary(mtcars, n = unique(mpg), j = c(min(am), max(am)), by = c("am", "gear")),
+    regex = "Each expression must return"
+  )
+})
+
+
+test_that("data_summary, values_at", {
+  data(mtcars)
+  out <- data_summary(mtcars, pos1 = mpg[1], pos_end = mpg[length(mpg)], by = c("am", "gear"))
+  # same as:
+  # dplyr::summarise(mtcars, pos1 = dplyr::first(mpg), pos_end = dplyr::last(mpg), .by = c("am", "gear"))
+  expect_equal(out$pos1, c(21.4, 24.4, 21, 26), tolerance = 1e-3)
+  expect_equal(out$pos_end, c(19.2, 17.8, 21.4, 15), tolerance = 1e-3)
 })
 
 
@@ -200,3 +220,11 @@ test_that("data_summary, expression as variable", {
   expect_named(out, c("am", "gear", "MW", "SD"))
   expect_equal(out$SD, aggregate(mtcars["mpg"], list(mtcars$am, mtcars$gear), sd)$mpg, tolerance = 1e-4)
 })
+
+
+test_that("data_summary, extra functions", {
+  data(mtcars)
+  # n()
+  out <- data_summary(mtcars, n = n(), by = c("am", "gear"))
+  expect_identical(out$n, c(15L, 4L, 8L, 5L))
+})