added network comparison function (network.compare)

o passes `devtools::check`
hfgolino · Jul 11, 2024 · 5148a7b · 5148a7b
1 parent 817ef49
commit 5148a7b
Show file tree

Hide file tree

Showing 5 changed files with 495 additions and 1 deletion.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: EGAnet
 Title: Exploratory Graph Analysis – a Framework for Estimating the Number of Dimensions in Multivariate Data using Network Psychometrics
 Version: 2.0.7
-Date: 2024-05-13
+Date: 2024-07-11
 Authors@R: c(person("Hudson", "Golino", email = "hfg9s@virginia.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-1601-1447")),
 	     person("Alexander", "Christensen", email = "alexpaulchristensen@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-9798-7037")),
 	     person("Robert", "Moulder", email = "rgm4fd@virginia.edu", role = "ctb", comment = c(ORCID = "0000-0001-7504-9560")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -100,6 +100,7 @@ export(jsd)
 export(modularity)
 export(net.loads)
 export(net.scores)
+export(network.compare)
 export(network.estimation)
 export(network.predictability)
 export(polychoric.matrix)

diff --git a/NEWS b/NEWS
@@ -6,6 +6,8 @@ WEBSITE: https://r-ega.net
 
 + ADD: `frobenius` norm to compare networks
 
++ ADD: `network.compare` function to test for differences in network structures using three different metrics (Frobenius Norm, Jensen-Shannon Similarity, total network strength)
+
 + UPDATE: `jsd` with `method = "spectral"` uses absolute values (no longer produces impossible values)
 
 + UPDATE: NEWS is now formatted in markdown

diff --git a/R/network.compare.R b/R/network.compare.R
@@ -0,0 +1,333 @@
+#' @title Compares Network Structures Using Permutation
+#'
+#' @description A permutation implementation to determine statistical
+#' significance of whether the network structures are different from one another
+#'
+#' @param base Matrix or data frame.
+#' Should consist only of variables to be used in the analysis.
+#' First dataset
+#'
+#' @param comparison Matrix or data frame.
+#' Should consist only of variables to be used in the analysis.
+#' Second dataset
+#'
+#' @param corr Character (length = 1).
+#' Method to compute correlations.
+#' Defaults to \code{"auto"}.
+#' Available options:
+#'
+#' \itemize{
+#'
+#' \item \code{"auto"} --- Automatically computes appropriate correlations for
+#' the data using Pearson's for continuous, polychoric for ordinal,
+#' tetrachoric for binary, and polyserial/biserial for ordinal/binary with
+#' continuous. To change the number of categories that are considered
+#' ordinal, use \code{ordinal.categories}
+#' (see \code{\link[EGAnet]{polychoric.matrix}} for more details)
+#'
+#' \item \code{"cor_auto"} --- Uses \code{\link[qgraph]{cor_auto}} to compute correlations.
+#' Arguments can be passed along to the function
+#'
+#' \item \code{"pearson"} --- Pearson's correlation is computed for all
+#' variables regardless of categories
+#'
+#' \item \code{"spearman"} --- Spearman's rank-order correlation is computed
+#' for all variables regardless of categories
+#'
+#' }
+#'
+#' For other similarity measures, compute them first and input them
+#' into \code{data} with the sample size (\code{n})
+#'
+#' @param na.data Character (length = 1).
+#' How should missing data be handled?
+#' Defaults to \code{"pairwise"}.
+#' Available options:
+#'
+#' \itemize{
+#'
+#' \item \code{"pairwise"} --- Computes correlation for all available cases between
+#' two variables
+#'
+#' \item \code{"listwise"} --- Computes correlation for all complete cases in the dataset
+#'
+#' }
+#'
+#' @param model Character (length = 1).
+#' Defaults to \code{"glasso"}.
+#' Available options:
+#'
+#' \itemize{
+#'
+#' \item \code{"BGGM"} --- Computes the Bayesian Gaussian Graphical Model.
+#' Set argument \code{ordinal.categories} to determine
+#' levels allowed for a variable to be considered ordinal.
+#' See \code{?BGGM::estimate} for more details
+#'
+#' \item \code{"glasso"} --- Computes the GLASSO with EBIC model selection.
+#' See \code{\link[EGAnet]{EBICglasso.qgraph}} for more details
+#'
+#' \item \code{"TMFG"} --- Computes the TMFG method.
+#' See \code{\link[EGAnet]{TMFG}} for more details
+#'
+#' }
+#'
+#' @param iter Numeric (length = 1).
+#' Number of permutations to perform.
+#' Defaults to \code{1000} (recommended)
+#'
+#' @param ncores Numeric (length = 1).
+#' Number of cores to use in computing results.
+#' Defaults to \code{ceiling(parallel::detectCores() / 2)} or half of your
+#' computer's processing power.
+#' Set to \code{1} to not use parallel computing
+#'
+#' @param verbose Boolean (length = 1).
+#' Should progress be displayed?
+#' Defaults to \code{TRUE}.
+#' Set to \code{FALSE} to not display progress
+#'
+#' @param seed Numeric (length = 1).
+#' Defaults to \code{NULL} or random results.
+#' Set for reproducible results.
+#' See \href{https://r-ega.net/articles/reproducibility-prng.html}{Reproducibility and PRNG}
+#' for more details on random number generation in \code{\link{EGAnet}}
+#'
+#' @param ... Additional arguments that can be passed on to
+#' \code{\link[EGAnet]{auto.correlate}},
+#' \code{\link[EGAnet]{network.estimation}},
+#' \code{\link[EGAnet]{community.detection}},
+#' \code{\link[EGAnet]{community.consensus}}, and
+#' \code{\link[EGAnet]{EGA}}
+#'
+#' @return Returns data frame with row names of each measure, empirical value (\code{statisticl}), and p-value based on the permutation test (\code{p.value})
+#'
+#' @author Hudson Golino <hfg9s at virginia.edu> and Alexander P. Christensen <alexpaulchristensen@gmail.com>
+#'
+#' @examples
+#' # Load data
+#' wmt <- wmt2[,7:24]
+#'
+#' # Set groups (if necessary)
+#' groups <- rep(1:2, each = nrow(wmt) / 2)
+#'
+#' # Groups
+#' group1 <- wmt[groups == 1,]
+#' group2 <- wmt[groups == 2,]
+#'
+#' \dontrun{
+#' results <- network.compare(group1, group2)}
+#'
+#' @references
+#' \strong{Frobenius Norm} \cr
+#' Ulitzsch, E., Khanna, S., Rhemtulla, M., & Domingue, B. W. (2023).
+#' A graph theory based similarity metric enables comparison of subpopulation psychometric networks.
+#' \emph{Psychological Methods}.
+#'
+#' \strong{Jensen-Shannon Similarity (1 - Distance)} \cr
+#' De Domenico, M., Nicosia, V., Arenas, A., & Latora, V. (2015).
+#' Structural reducibility of multilayer networks.
+#' \emph{Nature Communications}, \emph{6}(1), 1–9.
+#'
+#' \strong{Total Network Strength} \cr
+#' van Borkulo, C. D., van Bork, R., Boschloo, L., Kossakowski, J. J., Tio, P., Schoevers, R. A., Borsboom, D., & Waldorp, L. J. (2023).
+#' Comparing network structures on three aspects: A permutation test.
+#' \emph{Psychological Methods}, \emph{28}(6), 1273–1285.
+#'
+#'
+#' @export
+#'
+# Perform permutations for network structures ----
+# Updated 11.07.2024
+network.compare <- function(
+    base, comparison,
+    # EGA arguments
+    corr = c("auto", "cor_auto", "pearson", "spearman"),
+    na.data = c("pairwise", "listwise"),
+    model = c("BGGM", "glasso", "TMFG"),
+    # Permutation arguments
+    iter = 1000, ncores, verbose = TRUE, seed = NULL,
+    ...
+)
+{
+
+  # Experimental warning
+  experimental("network.compare")
+
+  # Check for missing arguments (argument, default, function)
+  corr <- set_default(corr, "auto", EGA.estimate)
+  na.data <- set_default(na.data, "pairwise", auto.correlate)
+  model <- set_default(model, "glasso", network.estimation)
+
+  # Set cores
+  if(missing(ncores)){ncores <- ceiling(parallel::detectCores() / 2)}
+
+  # Check for input errors
+  error_return <- network.compare_errors(base, comparison, iter, verbose, seed, ...)
+
+  # Get ellipse
+  ellipse <- list(...)
+
+  # Check for seed
+  if(!is.null(seed)){
+    seeds <- reproducible_seeds(iter, seed)
+  }else{
+
+    # Set all seeds to zero (or random)
+    seeds <- rep(0, iter)
+
+    # Check for external suppression (from `invariance`)
+    if(!"suppress" %in% names(ellipse) || !ellipse$suppress){
+      message("Argument 'seed' is set to `NULL`. Results will not be reproducible. Set 'seed' for reproducible results")
+    }
+
+  }
+
+  # Get empirical networks
+  base_empirical_network <- EGA(
+    base, corr = corr, na.data = na.data,
+    model = model, plot.EGA = FALSE, ...
+  )$network
+  comparison_empirical_network <- EGA(
+    comparison, corr = corr, na.data = na.data,
+    model = model, plot.EGA = FALSE, ...
+  )$network
+
+  # Get empirical estimates
+  empirical_values = abs(
+    c(
+      "Frobenius" = frobenius(base_empirical_network, comparison_empirical_network),
+      "JSS" = 1 - jsd(base_empirical_network, comparison_empirical_network),
+      "Total Strength" = sum(colSums(abs(base_empirical_network), na.rm = TRUE), na.rm = TRUE) -
+        sum(colSums(abs(comparison_empirical_network), na.rm = TRUE), na.rm = TRUE)
+    )
+  )
+
+  # Create combined dataset
+  combined <- rbind(base, comparison)
+
+  # Set up indices
+  combined_index <- nrow_sequence(combined)
+  base_length <- dim(base)[1]
+
+  # Perform permutations
+  permutated_values <- do.call(
+    rbind, parallel_process(
+      iterations = iter, datalist = seeds, FUN = function(seed, ...){
+
+        # Get shuffled indices
+        base_shuffled <- shuffle(combined_index, size = base_length, seed = seed)
+
+        # Get permutated networks
+        base_network <- EGA(
+          combined[base_shuffled,], corr = corr, na.data = na.data,
+          model = model, plot.EGA = FALSE, ...
+        )$network
+        comparison_network <- EGA(
+          combined[-base_shuffled,], corr = corr, na.data = na.data,
+          model = model, plot.EGA = FALSE, ...
+        )$network
+
+        # Return permutated estimates
+        return(
+          c(
+            "Frobenius" = frobenius(base_network, comparison_network),
+            "JSS" = 1 - jsd(base_network, comparison_network),
+            "Total Strength" = sum(colSums(abs(base_network), na.rm = TRUE), na.rm = TRUE) -
+              sum(colSums(abs(comparison_network), na.rm = TRUE), na.rm = TRUE)
+          )
+        )
+
+      }, ncores = ncores, progress = verbose, ...
+    )
+  )
+
+  # Return statistics
+  return(
+    t(data.frame(
+      "statistic" = empirical_values,
+      "p.value" = colMeans(
+        sweep(
+          x = abs(permutated_values), MARGIN = 2,
+          STATS = empirical_values, FUN = ">="
+        ), na.rm = TRUE
+      )
+    ))
+  )
+
+}
+
+# Bug Checking ----
+# wmt <- wmt2[-1,7:24]
+# groups <- rep(1:2, each = nrow(wmt) / 2)
+# base <- wmt[groups == 1,]; comparison <- wmt[groups == 2,]
+# corr = "auto"; na.data = "pairwise"; model = "glasso"
+# iter = 1000; ncores = 8; verbose = TRUE; seed = NULL
+
+#' @noRd
+# Errors ----
+# Updated 10.07.2024
+network.compare_errors <- function(base, comparison, iter, verbose, seed, ...)
+{
+
+  # 'base' errors
+  object_error(base, c("matrix", "data.frame", "tibble"), "network.compare")
+
+  # Check for tibble
+  if(get_object_type(base) == "tibble"){
+    base <- as.data.frame(base)
+  }
+
+  # 'comparison' errors
+  object_error(comparison, c("matrix", "data.frame", "tibble"), "network.compare")
+
+  # Check for tibble
+  if(get_object_type(comparison) == "tibble"){
+    comparison <- as.data.frame(comparison)
+  }
+
+  # 'iter' errors
+  length_error(iter, 1, "network.compare")
+  typeof_error(iter, "numeric", "network.compare")
+
+  # 'verbose' errors
+  length_error(verbose, 1, "network.compare")
+  typeof_error(verbose, "logical", "network.compare")
+
+  # 'seed' errors
+  if(!is.null(seed)){
+    length_error(seed, 1, "network.compare")
+    typeof_error(seed, "numeric", "network.compare")
+    range_error(seed,  c(0, Inf), "network.compare")
+  }
+
+  # Check for usable data
+  if(needs_usable(list(...))){
+    base <- usable_data(base, verbose)
+  }
+
+  # Check for usable data
+  if(needs_usable(list(...))){
+    comparison <- usable_data(comparison, verbose)
+  }
+
+  # Error checks complete
+
+  # Check for length of input in expected length
+  if(dim(base)[2] != dim(comparison)[2]){
+    .handleSimpleError(
+      h = stop,
+      msg = paste0(
+        "Number of columns for '", deparse(substitute(base)),
+        "' (", dim(base)[2], ") did not match number of columns for '",
+        deparse(substitute(comparison)), "' (", dim(comparison)[2], ").\n",
+        "Number of columns in these datasets must match"
+      ),
+      call = "network.compare"
+    )
+  }
+
+  # Return 'base' and 'comparison'
+  return(list(base = base, comparison = comparison))
+
+}