Skip to content

Commit

Permalink
some internal updates
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexChristensen committed Sep 25, 2024
1 parent 8d87f4f commit 8832476
Show file tree
Hide file tree
Showing 2 changed files with 275 additions and 5 deletions.
8 changes: 3 additions & 5 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -2854,15 +2854,13 @@ pcor2cor <- function(partial_correlations)

#' @noRd
# Partial correlations to inverse covariances ----
# Updated 29.06.2023
# Updated 25.09.2024
pcor2inv <- function(partial_correlations)
{

# Set diagonal to negative 1
diag(partial_correlations) <- -1

# Return inverse covariance matrix
return(solve(-partial_correlations))
return(solve(pcor2cor(partial_correlations)))
# Using `pcor2cor` puts it back to the original precision matrix values

}

Expand Down
272 changes: 272 additions & 0 deletions R/simEGM.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
#' Simulate data following a Exploratory Graph Model
#'
#' @description Function to simulate data based on the Exploratory Graph Model
#'
#' @param communities Numeric (length = 1).
#' Number of communities to generate
#'
#' @param variables Numeric vector (length = 1 or \code{communities}).
#' Number of variables per community
#'
#' @param loadings Character (length = 1).
#' Magnitude of the assigned network loadings.
#' Available options:
#'
#' \itemize{
#'
#' \item \code{"small"} --- about 0.20
#'
#' \item \code{"moderate"} --- about 0.35
#'
#' \item \code{"large"} --- about 0.50
#'
#' }
#'
#' @param cross.loadings Numeric (length = 1).
#' Standard deviation of a normal distribution with a mean of zero.
#' Defaults to \code{1}
#'
#' @param correlations Character (length = 1).
#' Magnitude of the community correlations.
#' Available options:
#'
#' \itemize{
#'
#' \item \code{"none"} --- 0.00
#'
#' \item \code{"small"} --- 0.15 (adds about 0.015 to the cross-loadings)
#'
#' \item \code{"moderate"} --- 0.30 (adds about 0.03 to the cross-loadings)
#'
#' \item \code{"large"} --- 0.50 (adds about 0.06 to the cross-loadings)
#'
#' \item \code{"very large"} --- 0.70 (adds about 0.09 to the cross-loadings)
#'
#' }
#'
#' @param sample.size Numeric (length = 1).
#' Number of observations to generate
#'
#' @param max.iterations Numeric (length = 1).
#' Number of iterations to attempt to get convergence before erroring out.
#' Defaults to \code{100}
#'
#' @examples
#' \dontrun{
#' # Estimate EGM
#' EGM_data <- simEGM(
#' communities = 2, variables = 6,
#' loadings = "moderate", correlations = "moderate",
#' sample.size = 1000
#' )}
#'
#' @author Hudson F. Golino <hfg9s at virginia.edu> and Alexander P. Christensen <alexpaulchristensen@gmail.com>
#'
#' @noRd
#
# Simulate EGM ----
simEGM <- function(
communities, variables,
loadings = c("small", "moderate", "large"), cross.loadings = 0.01,
correlations = c("none", "small", "moderate", "large", "very large"),
sample.size, max.iterations = 100
)
{


# Check for missing arguments (argument, default, function)
loadings <- set_default(loadings, "moderate", simEGM)
correlations <- set_default(correlations, "moderate", simEGM)

# Argument errors (return data in case of tibble)
simEGM_errors(communities, variables, cross.loadings, sample.size, max.iterations)

# Set up membership based on communities and variables
membership <- swiftelse(
length(variables) == 1,
rep(seq_len(communities), each = variables),
rep(seq_len(communities), times = variables)
)

# Set community sequence
community_sequence <- seq_len(communities)

# Table variables
variables <- table(membership)

# Get total variables
total_variables <- sum(variables)

# Get the start and end of the variables
end <- cumsum(variables)
start <- (end + 1) - variables

# Generate loadings
loadings_matrix <- matrix(
0, nrow = total_variables, ncol = communities,
dimnames = list(
paste0("V", format_integer(seq_len(total_variables), digits(total_variables) - 1)),
paste0("C", format_integer(community_sequence, digits(communities) - 1))
)
)

# Determine loading ranges
loading_range <- switch(
loadings,
"small" = 0.15,
"moderate" = 0.20,
"large" = 0.25
)

# Determine correlation ranges
correlation_range <- swiftelse(
correlations == "none", 0.00,
switch(
correlations,
"small" = 0.015,
"moderate" = 0.03,
"large" = 0.06,
"very large" = 0.09
)
)

# Initialize R
R <- NA

# Count iterations
count <- 0

# Set cross-loading sparsity parameter
sparsity <- 1 / log10(sample.size)

# Iterate until positive definite
while(anyNA(R) || any(matrix_eigenvalues(R) < 0)){

# Increase count
count <- count + 1

# Populate loadings
for(i in community_sequence){

# Populate assigned loadings
loadings_matrix[start[i]:end[i], i] <- loading_range + rnorm_ziggurat(variables[i]) * 0.02
# rnorm(variables[i], mean = loading_range, sd = 0.01)

# Get indices
indices <- loadings_matrix[start[i]:end[i], -i]

# Number of variables
index_length <- length(indices)

# Add correlations on cross-loadings
loadings_matrix[start[i]:end[i], -i] <- correlation_range + rnorm_ziggurat(index_length) * 0.01
# rnorm(index_length, mean = correlation_range, sd = 0.01)

# Generate cross-loadings
cross_loading <- sample( # Probability of cross-loading being included is set by `log10(sample.size)`
c(0, 1), size = index_length, replace = TRUE, prob = c(sparsity, 1 - sparsity)
) * (0.00 + rnorm_ziggurat(index_length) * cross.loadings)
# rnorm(index_length, mean = 0.00, sd = cross.loadings)

# Populate cross-loading
loadings_matrix[start[i]:end[i], -i] <- loadings_matrix[start[i]:end[i], -i] + cross_loading

}

# Compute A
A <- -tcrossprod(loadings_matrix)

# Obtain uniqueness
uniqueness <- 1 - rowSums(loadings_matrix^2)

# Add uniqueness to diagonal
diag(A) <- uniqueness

# Obtain anti-image
anti_image <- A %*% pcor2inv(A) %*% A

# Obtain partial correlation matrix
P <- -anti_image / tcrossprod(sqrt(uniqueness))
diag(P) <- 0

# Convert to correlation matrix
R <- silent_call(pcor2cor(P))

# Check for max iterations
if(count >= max.iterations){

# Stop and send error
.handleSimpleError(
h = stop,
msg = paste0(
"Maximum iterations reached with no convergence. \n\n",
"Try:\n",
" + increasing 'loadings'\n",
" + increasing 'sample.size'\n",
" + decreasing 'correlations'"
),
call = "simEGM"
)

}

}

# Perform Cholesky decomposition
cholesky <- chol(R)

# Generate data
data <- MASS_mvrnorm_quick(
p = total_variables, np = total_variables * sample.size,
coV = diag(total_variables)
)

# Return results
return(
list(
data = data %*% cholesky,
population_partial_correlatiion = P,
population_correlation = R,
parameters = list(
loadings = loadings_matrix,
correlations = correlations,
membership = membership,
iterations = count
)
)
)

}

#' @noRd
# Errors ----
# Updated 25.09.2024
simEGM_errors <- function(communities, variables, cross.loadings, sample.size, max.iterations)
{

# 'communities'
length_error(communities, 1, "simEGM")
typeof_error(communities, "numeric", "simEGM")
range_error(communities, c(1, Inf), "simEGM")

# 'variables'
length_error(variables, c(1, length(communities)), "simEGM")
typeof_error(variables, "numeric", "simEGM")
range_error(variables, c(2, Inf), "simEGM")

# 'cross.loadings'
length_error(cross.loadings, 1, "simEGM")
typeof_error(cross.loadings, "numeric", "simEGM")
range_error(cross.loadings, c(0, 1), "simEGM")

# 'sample.size'
length_error(sample.size, 1, "simEGM")
typeof_error(sample.size, "numeric", "simEGM")
range_error(sample.size, c(1, Inf), "simEGM")

# 'max.iterations'
length_error(max.iterations, 1, "simEGM")
typeof_error(max.iterations, "numeric", "simEGM")
range_error(max.iterations, c(1, Inf), "simEGM")

}

0 comments on commit 8832476

Please sign in to comment.