added plot_jn function

Kss2k · Nov 27, 2024 · e2fe937 · e2fe937
1 parent 92c4d9a
commit e2fe937
Show file tree

Hide file tree

Showing 10 changed files with 343 additions and 12 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -74,11 +74,21 @@ export(modsemify)
 export(multiplyIndicatorsCpp)
 export(parameter_estimates)
 export(plot_interaction)
+export(plot_jn)
 export(standardized_estimates)
 export(trace_path)
 export(var_interactions)
 export(vcov_modsem_da)
 importFrom(Rcpp,sourceCpp)
+importFrom(ggplot2,aes)
+importFrom(ggplot2,annotate)
+importFrom(ggplot2,geom_line)
+importFrom(ggplot2,geom_ribbon)
+importFrom(ggplot2,geom_vline)
+importFrom(ggplot2,ggplot)
+importFrom(ggplot2,labs)
+importFrom(ggplot2,scale_fill_manual)
+importFrom(ggplot2,theme_minimal)
 importFrom(stats,coef)
 importFrom(stats,coefficients)
 importFrom(stats,nobs)

diff --git a/R/modsem_pi.R b/R/modsem_pi.R
@@ -45,8 +45,7 @@
 #'
 #' @param run should the model be run via \code{lavaan}, if \code{FALSE} only modified syntax and data is returned
 #'
-#' @param na.rm should missing values be removed (case-wise)? Default is \code{NULL}, which has the same effect as \code{TRUE}
-#' but will generate a warning if missing values are present. If \code{TRUE}, missing values are removed without any warning. 
+#' @param na.rm should missing values be removed (case-wise)? Defaults to FALSE. If \code{TRUE}, missing values are removed case-wise.
 #' If \code{FALSE} they are not removed.
 #'
 #' @param suppress.warnings.lavaan should warnings from \code{lavaan} be suppressed?
@@ -140,7 +139,7 @@ modsem_pi <- function(model.syntax = NULL,
                       estimator = "ML",
                       group = NULL,
                       run = TRUE,
-                      na.rm = NULL,
+                      na.rm = FALSE,
                       suppress.warnings.lavaan = FALSE,
                       suppress.warnings.match = FALSE,
                       ...) {

diff --git a/R/plot_interaction.R b/R/plot_interaction.R
@@ -123,3 +123,196 @@ calc_se <- function(x, var, n, s) {
   SSx <- (n - 1) * var # sum of squares of x
   s * sqrt(1 / n + x ^ 2 / SSx)
 }
+
+#' Plot Interaction Effect Using the Johnson-Neyman Technique
+#'
+#' This function plots the simple slopes of an interaction effect across different values of a moderator variable using the Johnson-Neyman technique. It identifies regions where the effect of the predictor on the outcome is statistically significant.
+#'
+#' @param x The name of the predictor variable (as a character string).
+#' @param z The name of the moderator variable (as a character string).
+#' @param y The name of the outcome variable (as a character string).
+#' @param xz The name of the interaction term. If not specified, it will be created using \code{x} and \code{z}.
+#' @param model A fitted model object of class \code{modsem_da}, \code{modsem_mplus}, \code{modsem_pi}, or \code{lavaan}.
+#' @param min_z The minimum value of the moderator variable \code{z} to be used in the plot (default is -3).
+#' @param max_z The maximum value of the moderator variable \code{z} to be used in the plot (default is 3).
+#' @param alpha The significance level for the confidence intervals (default is 0.05).
+#' @param ... Additional arguments (currently not used).
+#' @return A \code{ggplot} object showing the interaction plot with regions of significance.
+#' @details
+#' The function calculates the simple slopes of the predictor variable \code{x} on the outcome variable \code{y} at different levels of the moderator variable \code{z}. It uses the Johnson-Neyman technique to identify the regions of \code{z} where the effect of \code{x} on \code{y} is statistically significant.
+#'
+#' It extracts the necessary coefficients and variance-covariance information from the fitted model object. The function then computes the critical t-value and solves the quadratic equation derived from the t-statistic equation to find the Johnson-Neyman points.
+#'
+#' The plot displays:
+#' \itemize{
+#'   \item The estimated simple slopes across the range of \code{z}.
+#'   \item Confidence intervals around the slopes.
+#'   \item Regions where the effect is significant (shaded areas).
+#'   \item Vertical dashed lines indicating the Johnson-Neyman points.
+#'   \item Text annotations providing the exact values of the Johnson-Neyman points.
+#' }
+#'
+#' @examples
+#' \dontrun{
+#' library(modsem)
+#'
+#' m1 <-  ' 
+#'   visual  =~ x1 + x2 + x3 
+#'   textual =~ x4 + x5 + x6
+#'   speed   =~ x7 + x8 + x9
+#' 
+#'   visual ~ speed + textual + speed:textual
+#' '
+#' 
+#' est <- modsem(m1, data = lavaan::HolzingerSwineford1939, method = "ca")
+#' plot_jn(x = "speed", z = "textual", y = "visual", model = est, max_z = 6)
+#' }
+#' @importFrom ggplot2 ggplot aes geom_line geom_ribbon geom_vline annotate scale_fill_manual labs theme_minimal
+#' @export
+plot_jn <- function(x, z, y, xz = NULL, model, min_z = -3, max_z = 3, alpha = 0.05, ...) {
+  # Check if model is a valid object
+  stopif(!inherits(model, c("modsem_da", "modsem_mplus", "modsem_pi", "lavaan")),
+         "model must be of class 'modsem_pi', 'modsem_da', 'modsem_mplus', or 'lavaan'")
+
+  # If interaction term is not specified, create it
+  if (is.null(xz)) xz <- paste(x, z, sep = ":")
+  xz <- c(xz, reverseIntTerm(xz))
+  if (!inherits(model, c("modsem_da", "modsem_mplus")) && !inherits(model, "lavaan")) {
+    xz <- stringr::str_remove_all(xz, ":")
+  }
+
+  # Extract parameter estimates
+  parTable <- parameter_estimates(model)
+  parTable <- getMissingLabels(parTable)
+
+  # Extract coefficients
+  beta_x <- parTable[parTable$lhs == y & parTable$rhs == x & parTable$op == "~", "est"]
+  beta_xz <- parTable[parTable$lhs == y & parTable$rhs %in% xz & parTable$op == "~", "est"]
+
+  stopif(length(beta_x) == 0, "Coefficient for x not found in model")
+  stopif(length(beta_xz) == 0, "Coefficient for interaction term not found in model")
+
+  # Extract variance-covariance matrix
+  vcov_matrix <- vcov(model)
+
+  label_beta_x  <- parTable[parTable$lhs == y & parTable$rhs == x & parTable$op == "~", "label"]
+  label_beta_xz <- parTable[parTable$lhs == y & parTable$rhs %in% xz & parTable$op == "~", "label"]
+
+  # Extract variances and covariances
+  var_beta_x <- vcov_matrix[label_beta_x, label_beta_x]
+  var_beta_xz <- vcov_matrix[label_beta_xz, label_beta_xz]
+  cov_beta_x_beta_xz <- vcov_matrix[label_beta_x, label_beta_xz]
+
+  nobs <- nobs(model)
+  npar <- length(coef(model))
+
+  df_resid <- nobs - npar
+
+  stopif(df_resid < 1, "Degrees of freedom for residuals must be greater than 0. ",
+         "The model may have fewer observations than parameters.")
+
+  # Critical t-value
+  t_crit <- stats::qt(1 - alpha / 2, df_resid)
+
+  # Quadratic equation components
+  A <- beta_xz^2 - t_crit^2 * var_beta_xz
+  B <- 2 * beta_x * beta_xz - 2 * t_crit^2 * cov_beta_x_beta_xz
+  C <- beta_x^2 - t_crit^2 * var_beta_x
+
+  discriminant <- B^2 - 4 * A * C
+
+  if (A == 0) {
+    # Linear case
+    if (B != 0) {
+      z_jn <- -C / B
+      significant_everywhere <- FALSE
+      z_lower <- z_jn
+      z_upper <- z_jn
+    } else {
+      message("No regions where the effect transitions between significant and non-significant.")
+      significant_everywhere <- TRUE
+    }
+  } else if (discriminant < 0) {
+    message("No regions where the effect transitions between significant and non-significant.")
+    significant_everywhere <- TRUE
+  } else if (discriminant == 0) {
+    # One real root
+    z_jn <- -B / (2 * A)
+    significant_everywhere <- FALSE
+    z_lower <- z_jn
+    z_upper <- z_jn
+  } else {
+    # Two real roots
+    z1 <- (-B + sqrt(discriminant)) / (2 * A)
+    z2 <- (-B - sqrt(discriminant)) / (2 * A)
+    z_lower <- min(z1, z2)
+    z_upper <- max(z1, z2)
+    significant_everywhere <- FALSE
+  }
+
+  z_range <- seq(min_z, max_z, length.out = 100)
+
+  # Calculate slopes and statistics
+  slope <- beta_x + beta_xz * z_range
+  SE_slope <- sqrt(var_beta_x + z_range^2 * var_beta_xz + 2 * z_range * cov_beta_x_beta_xz)
+  t_value <- slope / SE_slope
+  p_value <- 2 * (1 - stats::pt(abs(t_value), df_resid))
+  significant <- p_value < alpha
+
+  # Create plotting data frame
+  df_plot <- data.frame(z = z_range, slope = slope, SE = SE_slope, t = t_value, p = p_value, significant = significant)
+
+  # Plotting
+  p <- ggplot2::ggplot(df_plot, ggplot2::aes(x = z, y = slope)) +
+    ggplot2::geom_line() +
+    ggplot2::geom_ribbon(ggplot2::aes(ymin = slope - t_crit * SE_slope, ymax = slope + t_crit * SE_slope, fill = significant), alpha = 0.2) +
+    ggplot2::scale_fill_manual(values = c("TRUE" = "blue", "FALSE" = "grey"), guide = "none") +
+    ggplot2::labs(x = z, y = paste("Simple slope of", x, "on", y)) +
+    ggplot2::theme_minimal()
+
+  # Add Johnson-Neyman points if applicable
+  if (!significant_everywhere) {
+    # Ensure JN points are within the range of z
+    if (exists("z_jn")) {
+      # Single JN point
+      if (z_jn >= min_z && z_jn <= max_z) {
+        p <- p +
+          ggplot2::geom_vline(xintercept = z_jn, linetype = "dashed", color = "red") +
+          ggplot2::annotate("text", x = z_jn, y = max(df_plot$slope), label = paste("JN point:", round(z_jn, 2)),
+                            hjust = -0.1, vjust = 1, color = "black")
+      }
+    } else {
+      # Two JN points
+      if (z_lower >= min_z && z_lower <= max_z && z_upper >= min_z && z_upper <= max_z) {
+        # Add light red fill between z_lower and z_upper
+        p <- p +
+          ggplot2::annotate(geom="rect", xmin = z_lower, xmax = z_upper, ymin = -Inf, ymax = Inf, fill = "red", alpha = 0.1)
+      }
+      if (z_lower >= min_z && z_lower <= max_z) {
+        p <- p +
+          ggplot2::geom_vline(xintercept = z_lower, linetype = "dashed", color = "red") +
+          ggplot2::annotate("text", x = z_lower, y = max(df_plot$slope), label = paste("JN point:", round(z_lower, 2)),
+                            hjust = -0.1, vjust = 1, color = "black")
+      }
+      if (z_upper >= min_z && z_upper <= max_z) {
+        p <- p +
+          ggplot2::geom_vline(xintercept = z_upper, linetype = "dashed", color = "red") +
+          ggplot2::annotate("text", x = z_upper, y = max(df_plot$slope), label = paste("JN point:", round(z_upper, 2)),
+                            hjust = -0.1, vjust = 1, color = "black")
+      }
+    }
+  }
+
+  p
+}
+
+# Helper function to reverse interaction term
+reverseIntTerm <- function(xz) {
+  if (grepl(":", xz)) {
+    vars <- strsplit(xz, ":")[[1]]
+    rev_xz <- paste(rev(vars), collapse = ":")
+  } else {
+    rev_xz <- xz
+  }
+  rev_xz
+}
diff --git a/R/utils.R b/R/utils.R
@@ -348,3 +348,12 @@ stripColonsParTable <- function(parTable) {
   parTable$rhs <- stringr::str_remove_all(parTable$rhs, ":")
   parTable
 }
+
+
+getMissingLabels <- function(parTable) {
+  if (!"label" %in% colnames(parTable)) parTable$label <- ""
+  missing <- is.na(parTable$label) | parTable$label == ""
+  labels <- sprintf("%s%s%s", parTable$lhs, parTable$op, parTable$rhs)
+  parTable[missing, "label"] <- labels[missing]
+  parTable
+}
diff --git a/man/modsem_pi.Rd b/man/modsem_pi.Rd
diff --git a/man/plot_jn.Rd b/man/plot_jn.Rd
diff --git a/tests/testthat/test_lav_models.R b/tests/testthat/test_lav_models.R
@@ -82,8 +82,13 @@ testthat::expect_warning({
 # testing plot function 
 plot_interaction(x = "ind60", z = "dem60", y = "dem65", xz = "ind60:dem60", 
                  vals_z = c(-0.5, 0.5), model = estimates[[1]][["rca"]])
-plot_interaction(x = "speed", z = "textual", y = "visual", xz = "speed:textual", 
+plot_interaction(x = "speed", z = "textual", y = "visual", 
                  vals_z = c(-0.5, 0.5), model = estimates[[2]][["ca"]])
+testthat::expect_error(
+  plot_jn(x = "ind60", z = "dem60", y = "dem65", model = estimates[[1]][["rca"]])
+)
+plot_jn(x = "speed", z = "textual", y = "visual", model = estimates[[2]][["ca"]],
+        max_z = 6)
 
 print(summary(estimates[[1]][["rca"]]))
 print(estimates[[1]][["rca"]])

diff --git a/tests/testthat/test_missing.R b/tests/testthat/test_missing.R
@@ -13,9 +13,9 @@ oneInt2 <- oneInt
 oneInt2[c(176, 176, 258, 1900),
         c(1, 2, 3, 7)] <- NA
 
-# Double centering approach
-testthat::expect_warning(modsem(m1, oneInt2),
-                         regex = "Removing missing values case-wise")
+# double centering approach
+est <- modsem(m1, oneInt2)
+testthat::expect_true(any(is.na(est$data)))
 
 est <- modsem(m1, oneInt2, na.rm=TRUE)
 testthat::expect_true(!any(is.na(est$data)))
@@ -24,8 +24,8 @@ est <- modsem(m1, oneInt2, na.rm=FALSE)
 testthat::expect_true(any(is.na(est$data)))
 
 # Residual Centering Approach
-testthat::expect_warning(modsem(m1, oneInt2, method = "rca"),
-                         regex = "Removing missing values case-wise")
+est <- modsem(m1, oneInt2, method = "rca")
+testthat::expect_true(any(is.na(est$data)))
 
 est <- modsem(m1, oneInt2, method = "rca", na.rm=TRUE)
 testthat::expect_true(!any(is.na(est$data)))

diff --git a/tests/testthat/test_qml.R b/tests/testthat/test_qml.R
@@ -35,6 +35,9 @@ est2 <- modsem(tpb, data = TPB, method = "qml",
                robust.se = TRUE,
                standardize = TRUE, convergence = 1e-2)
 print(summary(est2, H0 = FALSE))
+plot_jn(x = "INT", z = "PBC", y = "BEH", model = est2,
+        min_z = -1.5, max_z = -0.5)
+
 
 testthat::expect_equal(standardized_estimates(est2), 
                        parameter_estimates(est2))