Skip to content

Commit

Permalink
added network comparison function (network.compare)
Browse files Browse the repository at this point in the history
o passes `devtools::check`
  • Loading branch information
AlexChristensen committed Jul 11, 2024
1 parent 817ef49 commit 5148a7b
Show file tree
Hide file tree
Showing 5 changed files with 495 additions and 1 deletion.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: EGAnet
Title: Exploratory Graph Analysis – a Framework for Estimating the Number of Dimensions in Multivariate Data using Network Psychometrics
Version: 2.0.7
Date: 2024-05-13
Date: 2024-07-11
Authors@R: c(person("Hudson", "Golino", email = "hfg9s@virginia.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-1601-1447")),
person("Alexander", "Christensen", email = "alexpaulchristensen@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-9798-7037")),
person("Robert", "Moulder", email = "rgm4fd@virginia.edu", role = "ctb", comment = c(ORCID = "0000-0001-7504-9560")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ export(jsd)
export(modularity)
export(net.loads)
export(net.scores)
export(network.compare)
export(network.estimation)
export(network.predictability)
export(polychoric.matrix)
Expand Down
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ WEBSITE: https://r-ega.net

+ ADD: `frobenius` norm to compare networks

+ ADD: `network.compare` function to test for differences in network structures using three different metrics (Frobenius Norm, Jensen-Shannon Similarity, total network strength)

+ UPDATE: `jsd` with `method = "spectral"` uses absolute values (no longer produces impossible values)

+ UPDATE: NEWS is now formatted in markdown
Expand Down
333 changes: 333 additions & 0 deletions R/network.compare.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,333 @@
#' @title Compares Network Structures Using Permutation
#'
#' @description A permutation implementation to determine statistical
#' significance of whether the network structures are different from one another
#'
#' @param base Matrix or data frame.
#' Should consist only of variables to be used in the analysis.
#' First dataset
#'
#' @param comparison Matrix or data frame.
#' Should consist only of variables to be used in the analysis.
#' Second dataset
#'
#' @param corr Character (length = 1).
#' Method to compute correlations.
#' Defaults to \code{"auto"}.
#' Available options:
#'
#' \itemize{
#'
#' \item \code{"auto"} --- Automatically computes appropriate correlations for
#' the data using Pearson's for continuous, polychoric for ordinal,
#' tetrachoric for binary, and polyserial/biserial for ordinal/binary with
#' continuous. To change the number of categories that are considered
#' ordinal, use \code{ordinal.categories}
#' (see \code{\link[EGAnet]{polychoric.matrix}} for more details)
#'
#' \item \code{"cor_auto"} --- Uses \code{\link[qgraph]{cor_auto}} to compute correlations.
#' Arguments can be passed along to the function
#'
#' \item \code{"pearson"} --- Pearson's correlation is computed for all
#' variables regardless of categories
#'
#' \item \code{"spearman"} --- Spearman's rank-order correlation is computed
#' for all variables regardless of categories
#'
#' }
#'
#' For other similarity measures, compute them first and input them
#' into \code{data} with the sample size (\code{n})
#'
#' @param na.data Character (length = 1).
#' How should missing data be handled?
#' Defaults to \code{"pairwise"}.
#' Available options:
#'
#' \itemize{
#'
#' \item \code{"pairwise"} --- Computes correlation for all available cases between
#' two variables
#'
#' \item \code{"listwise"} --- Computes correlation for all complete cases in the dataset
#'
#' }
#'
#' @param model Character (length = 1).
#' Defaults to \code{"glasso"}.
#' Available options:
#'
#' \itemize{
#'
#' \item \code{"BGGM"} --- Computes the Bayesian Gaussian Graphical Model.
#' Set argument \code{ordinal.categories} to determine
#' levels allowed for a variable to be considered ordinal.
#' See \code{?BGGM::estimate} for more details
#'
#' \item \code{"glasso"} --- Computes the GLASSO with EBIC model selection.
#' See \code{\link[EGAnet]{EBICglasso.qgraph}} for more details
#'
#' \item \code{"TMFG"} --- Computes the TMFG method.
#' See \code{\link[EGAnet]{TMFG}} for more details
#'
#' }
#'
#' @param iter Numeric (length = 1).
#' Number of permutations to perform.
#' Defaults to \code{1000} (recommended)
#'
#' @param ncores Numeric (length = 1).
#' Number of cores to use in computing results.
#' Defaults to \code{ceiling(parallel::detectCores() / 2)} or half of your
#' computer's processing power.
#' Set to \code{1} to not use parallel computing
#'
#' @param verbose Boolean (length = 1).
#' Should progress be displayed?
#' Defaults to \code{TRUE}.
#' Set to \code{FALSE} to not display progress
#'
#' @param seed Numeric (length = 1).
#' Defaults to \code{NULL} or random results.
#' Set for reproducible results.
#' See \href{https://r-ega.net/articles/reproducibility-prng.html}{Reproducibility and PRNG}
#' for more details on random number generation in \code{\link{EGAnet}}
#'
#' @param ... Additional arguments that can be passed on to
#' \code{\link[EGAnet]{auto.correlate}},
#' \code{\link[EGAnet]{network.estimation}},
#' \code{\link[EGAnet]{community.detection}},
#' \code{\link[EGAnet]{community.consensus}}, and
#' \code{\link[EGAnet]{EGA}}
#'
#' @return Returns data frame with row names of each measure, empirical value (\code{statisticl}), and p-value based on the permutation test (\code{p.value})
#'
#' @author Hudson Golino <hfg9s at virginia.edu> and Alexander P. Christensen <alexpaulchristensen@gmail.com>
#'
#' @examples
#' # Load data
#' wmt <- wmt2[,7:24]
#'
#' # Set groups (if necessary)
#' groups <- rep(1:2, each = nrow(wmt) / 2)
#'
#' # Groups
#' group1 <- wmt[groups == 1,]
#' group2 <- wmt[groups == 2,]
#'
#' \dontrun{
#' results <- network.compare(group1, group2)}
#'
#' @references
#' \strong{Frobenius Norm} \cr
#' Ulitzsch, E., Khanna, S., Rhemtulla, M., & Domingue, B. W. (2023).
#' A graph theory based similarity metric enables comparison of subpopulation psychometric networks.
#' \emph{Psychological Methods}.
#'
#' \strong{Jensen-Shannon Similarity (1 - Distance)} \cr
#' De Domenico, M., Nicosia, V., Arenas, A., & Latora, V. (2015).
#' Structural reducibility of multilayer networks.
#' \emph{Nature Communications}, \emph{6}(1), 1–9.
#'
#' \strong{Total Network Strength} \cr
#' van Borkulo, C. D., van Bork, R., Boschloo, L., Kossakowski, J. J., Tio, P., Schoevers, R. A., Borsboom, D., & Waldorp, L. J. (2023).
#' Comparing network structures on three aspects: A permutation test.
#' \emph{Psychological Methods}, \emph{28}(6), 1273–1285.
#'
#'
#' @export
#'
# Perform permutations for network structures ----
# Updated 11.07.2024
network.compare <- function(
base, comparison,
# EGA arguments
corr = c("auto", "cor_auto", "pearson", "spearman"),
na.data = c("pairwise", "listwise"),
model = c("BGGM", "glasso", "TMFG"),
# Permutation arguments
iter = 1000, ncores, verbose = TRUE, seed = NULL,
...
)
{

# Experimental warning
experimental("network.compare")

# Check for missing arguments (argument, default, function)
corr <- set_default(corr, "auto", EGA.estimate)
na.data <- set_default(na.data, "pairwise", auto.correlate)
model <- set_default(model, "glasso", network.estimation)

# Set cores
if(missing(ncores)){ncores <- ceiling(parallel::detectCores() / 2)}

# Check for input errors
error_return <- network.compare_errors(base, comparison, iter, verbose, seed, ...)

# Get ellipse
ellipse <- list(...)

# Check for seed
if(!is.null(seed)){
seeds <- reproducible_seeds(iter, seed)
}else{

# Set all seeds to zero (or random)
seeds <- rep(0, iter)

# Check for external suppression (from `invariance`)
if(!"suppress" %in% names(ellipse) || !ellipse$suppress){
message("Argument 'seed' is set to `NULL`. Results will not be reproducible. Set 'seed' for reproducible results")
}

}

# Get empirical networks
base_empirical_network <- EGA(
base, corr = corr, na.data = na.data,
model = model, plot.EGA = FALSE, ...
)$network
comparison_empirical_network <- EGA(
comparison, corr = corr, na.data = na.data,
model = model, plot.EGA = FALSE, ...
)$network

# Get empirical estimates
empirical_values = abs(
c(
"Frobenius" = frobenius(base_empirical_network, comparison_empirical_network),
"JSS" = 1 - jsd(base_empirical_network, comparison_empirical_network),
"Total Strength" = sum(colSums(abs(base_empirical_network), na.rm = TRUE), na.rm = TRUE) -
sum(colSums(abs(comparison_empirical_network), na.rm = TRUE), na.rm = TRUE)
)
)

# Create combined dataset
combined <- rbind(base, comparison)

# Set up indices
combined_index <- nrow_sequence(combined)
base_length <- dim(base)[1]

# Perform permutations
permutated_values <- do.call(
rbind, parallel_process(
iterations = iter, datalist = seeds, FUN = function(seed, ...){

# Get shuffled indices
base_shuffled <- shuffle(combined_index, size = base_length, seed = seed)

# Get permutated networks
base_network <- EGA(
combined[base_shuffled,], corr = corr, na.data = na.data,
model = model, plot.EGA = FALSE, ...
)$network
comparison_network <- EGA(
combined[-base_shuffled,], corr = corr, na.data = na.data,
model = model, plot.EGA = FALSE, ...
)$network

# Return permutated estimates
return(
c(
"Frobenius" = frobenius(base_network, comparison_network),
"JSS" = 1 - jsd(base_network, comparison_network),
"Total Strength" = sum(colSums(abs(base_network), na.rm = TRUE), na.rm = TRUE) -
sum(colSums(abs(comparison_network), na.rm = TRUE), na.rm = TRUE)
)
)

}, ncores = ncores, progress = verbose, ...
)
)

# Return statistics
return(
t(data.frame(
"statistic" = empirical_values,
"p.value" = colMeans(
sweep(
x = abs(permutated_values), MARGIN = 2,
STATS = empirical_values, FUN = ">="
), na.rm = TRUE
)
))
)

}

# Bug Checking ----
# wmt <- wmt2[-1,7:24]
# groups <- rep(1:2, each = nrow(wmt) / 2)
# base <- wmt[groups == 1,]; comparison <- wmt[groups == 2,]
# corr = "auto"; na.data = "pairwise"; model = "glasso"
# iter = 1000; ncores = 8; verbose = TRUE; seed = NULL

#' @noRd
# Errors ----
# Updated 10.07.2024
network.compare_errors <- function(base, comparison, iter, verbose, seed, ...)
{

# 'base' errors
object_error(base, c("matrix", "data.frame", "tibble"), "network.compare")

# Check for tibble
if(get_object_type(base) == "tibble"){
base <- as.data.frame(base)
}

# 'comparison' errors
object_error(comparison, c("matrix", "data.frame", "tibble"), "network.compare")

# Check for tibble
if(get_object_type(comparison) == "tibble"){
comparison <- as.data.frame(comparison)
}

# 'iter' errors
length_error(iter, 1, "network.compare")
typeof_error(iter, "numeric", "network.compare")

# 'verbose' errors
length_error(verbose, 1, "network.compare")
typeof_error(verbose, "logical", "network.compare")

# 'seed' errors
if(!is.null(seed)){
length_error(seed, 1, "network.compare")
typeof_error(seed, "numeric", "network.compare")
range_error(seed, c(0, Inf), "network.compare")
}

# Check for usable data
if(needs_usable(list(...))){
base <- usable_data(base, verbose)
}

# Check for usable data
if(needs_usable(list(...))){
comparison <- usable_data(comparison, verbose)
}

# Error checks complete

# Check for length of input in expected length
if(dim(base)[2] != dim(comparison)[2]){
.handleSimpleError(
h = stop,
msg = paste0(
"Number of columns for '", deparse(substitute(base)),
"' (", dim(base)[2], ") did not match number of columns for '",
deparse(substitute(comparison)), "' (", dim(comparison)[2], ").\n",
"Number of columns in these datasets must match"
),
call = "network.compare"
)
}

# Return 'base' and 'comparison'
return(list(base = base, comparison = comparison))

}
Loading

0 comments on commit 5148a7b

Please sign in to comment.