From 15635a00284d90451dc0e01c91916eabb16d5387 Mon Sep 17 00:00:00 2001 From: "Pavel N. Krivitsky" Date: Sun, 1 Dec 2024 15:26:50 +1100 Subject: [PATCH] as_tibble.combined_network() can now optionally return .NetworkID and .NetworkName as columns, and N() operators' lm= formula can now access them. --- DESCRIPTION | 4 ++-- R/InitErgmTerm.multinet.R | 18 +++++++++++++++--- man/N-ergmTerm-c9b59cc1.Rd | 4 ++++ man/as_tibble.combined_networks.Rd | 5 ++++- tests/testthat/test-multinet.R | 29 +++++++++++++---------------- 5 files changed, 38 insertions(+), 22 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index bf4254b5..69ded091 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ergm.multi -Version: 0.2.1-4163 -Date: 2024-11-18 +Version: 0.3.0-4164 +Date: 2024-12-01 Title: Fit, Simulate and Diagnose Exponential-Family Models for Multiple or Multilayer Networks Authors@R: c(person(c("Pavel", "N."), "Krivitsky", role=c("aut","cre"), email="pavel@statnet.org", comment=c(ORCID="0000-0002-9101-3362")), person(c("Mark", "S."), "Handcock", role=c("ctb"), email="handcock@stat.ucla.edu"), diff --git a/R/InitErgmTerm.multinet.R b/R/InitErgmTerm.multinet.R index c7bc862f..554d01f0 100644 --- a/R/InitErgmTerm.multinet.R +++ b/R/InitErgmTerm.multinet.R @@ -85,11 +85,12 @@ InitErgmTerm..subnets <- function(nw, arglist, ...){ #' @param attrnames a list (or a selection index) for attributes to obtain; for combined networks, defaults to all. #' @param unit whether to obtain edge, vertex, or network attributes. #' @template ergmTerm-NetworkIDName +#' @param store.nid whether to include columns with network ID and network name; the columns will be named with the arguments passed to `.NetworkID` and `.NetworkName`. #' @param ... additional arguments, currently passed to unlist()]. #' #' @seealso [network::as_tibble.network()] #' @export -as_tibble.combined_networks<-function(x,attrnames=(match.arg(unit)%in%c("vertices","networks")), ..., unit=c("edges", "vertices", "networks"), .NetworkID=".NetworkID", .NetworkName=".NetworkName"){ +as_tibble.combined_networks<-function(x,attrnames=(match.arg(unit)%in%c("vertices","networks")), ..., unit=c("edges", "vertices", "networks"), .NetworkID=".NetworkID", .NetworkName=".NetworkName", store.nid=FALSE){ unit <- match.arg(unit) if(unit!="networks") return(NextMethod()) @@ -105,7 +106,14 @@ as_tibble.combined_networks<-function(x,attrnames=(match.arg(unit)%in%c("vertice if(is.logical(attrnames) || is.numeric(attrnames)) attrnames <- na.omit(names(al)[attrnames]) else intersect(attrnames, names(al)) - al[attrnames] %>% lapply(simplify_simple, ...) %>% as_tibble() + out <- al[attrnames] %>% lapply(simplify_simple, ...) %>% as_tibble() + + if(store.nid){ + out[[.NetworkID]] <- unique(x %v% .NetworkID) + out[[.NetworkName]] <- unique(x %v% .NetworkName) + } + + out } get_lminfo <- function(nattrs, lm=~1, subset=TRUE, contrasts=NULL, offset=NULL, weights=1){ @@ -169,6 +177,10 @@ assert_LHS_Networks <- function(nw, nid, term_trace = TRUE, call = if(term_trace #' error. This can be avoided by pre-filtering with `subset`, which #' controls which networks are affected by the term. #' +#' The formula may also reference `.NetworkID` and `.NetworkName`. In +#' particular, `~0+factor(.NetworkID)` will evaluate `formula` on each +#' network individually. +#' #' @note Care should be taken to avoid multicollinearity when using #' this operator. As with the [lm()] function, `lm` formulas have an #' implicit intercept, which can be suppressed by specifying `~ 0 + @@ -257,7 +269,7 @@ InitErgmTerm.N <- function(nw, arglist, ..., N.compact_stats=TRUE, .NetworkID=". nwl <- subnetwork_templates(nw, a$.NetworkID, a$.NetworkName) nwnames <- names(nwl) nn <- length(nwl) - nattrs <- as_tibble(nw, unit="networks", .NetworkID=a$.NetworkID, .NetworkName=a$.NetworkName) + nattrs <- as_tibble(nw, unit="networks", .NetworkID=a$.NetworkID, .NetworkName=a$.NetworkName, store.nid=TRUE) lmi <- get_lminfo(nattrs, lm=a$lm, subset=a$subset, contrasts=a$contrasts, offset=a$offset, weights=a$weights) diff --git a/man/N-ergmTerm-c9b59cc1.Rd b/man/N-ergmTerm-c9b59cc1.Rd index d6fd138c..0f396dbb 100644 --- a/man/N-ergmTerm-c9b59cc1.Rd +++ b/man/N-ergmTerm-c9b59cc1.Rd @@ -47,6 +47,10 @@ statistics. If \code{lm} refers to any network attributes for which some networks have missing values, the term will stop with an error. This can be avoided by pre-filtering with \code{subset}, which controls which networks are affected by the term. + +The formula may also reference \code{.NetworkID} and \code{.NetworkName}. In +particular, \code{~0+factor(.NetworkID)} will evaluate \code{formula} on each +network individually. } \note{ Care should be taken to avoid multicollinearity when using diff --git a/man/as_tibble.combined_networks.Rd b/man/as_tibble.combined_networks.Rd index 951a0fed..fe41c471 100644 --- a/man/as_tibble.combined_networks.Rd +++ b/man/as_tibble.combined_networks.Rd @@ -10,7 +10,8 @@ ..., unit = c("edges", "vertices", "networks"), .NetworkID = ".NetworkID", - .NetworkName = ".NetworkName" + .NetworkName = ".NetworkName", + store.nid = FALSE ) } \arguments{ @@ -25,6 +26,8 @@ \item{.NetworkID, .NetworkName}{Optional strings indicating the vertex attributes used to distinguish and name the networks; intended to be used by term developers.} + +\item{store.nid}{whether to include columns with network ID and network name; the columns will be named with the arguments passed to \code{.NetworkID} and \code{.NetworkName}.} } \description{ A method to obtain a network attribute table from a diff --git a/tests/testthat/test-multinet.R b/tests/testthat/test-multinet.R index 84dbbd07..def60d27 100644 --- a/tests/testthat/test-multinet.R +++ b/tests/testthat/test-multinet.R @@ -12,17 +12,14 @@ data(samplk) logit <- function(p) log(p/(1-p)) times <- 1:3 -samplk1%n%"t" <- times[1] samplk1%e%"w" <- floor(runif(network.edgecount(samplk1), 0, 4)) -samplk2%n%"t" <- times[2] samplk2%e%"w" <- floor(runif(network.edgecount(samplk2), 0, 4)) -samplk3%n%"t" <- times[3] samplk3%e%"w" <- floor(runif(network.edgecount(samplk3), 0, 4)) samplkl <- list(samplk1, samplk2, samplk3) samplks <- Networks(samplk1, samplk2, samplk3) test_that("N() summary with an LM and noncompacted stats", { - summ.N <- summary(samplks~N(~edges+nodematch("cloisterville"), ~1+t), term.options=list(N.compact_stats=FALSE)) + summ.N <- summary(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID), term.options=list(N.compact_stats=FALSE)) summ.l <- unlist(lapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~edges+nodematch("cloisterville"), nw~., from.new="nw")))) names(summ.l) <- paste0("N#", rep(1:3, each=2), "~", names(summ.l)) expect_equal(summ.N, summ.l) @@ -30,21 +27,21 @@ test_that("N() summary with an LM and noncompacted stats", { test_that("N() summary with offset and compacted stats", { - summ.N <- summary(samplks~N(~edges, offset=~t)) + summ.N <- summary(samplks~N(~edges, offset=~.NetworkID)) summ.l <- sapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~edges, nw~., from.new="nw"))) summ.l <- c(sum(summ.l), sum(summ.l*1:3)) expect_equal(summ.N, summ.l, ignore_attr=TRUE) }) test_that("Valued N() summary with an LM and noncompacted stats", { - summ.N <- summary(samplks~N(~sum+nodematch("cloisterville", form="sum"), ~1+t), term.options=list(N.compact_stats=FALSE), response="w") + summ.N <- summary(samplks~N(~sum+nodematch("cloisterville", form="sum"), ~1+.NetworkID), term.options=list(N.compact_stats=FALSE), response="w") summ.l <- unlist(lapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~sum+nodematch("cloisterville", form="sum"), nw~., from.new="nw"), response="w"))) expect_equal(summ.N, summ.l, ignore_attr=TRUE) }) test_that("Valued N() summary with offset and compacted stats", { - summ.N <- summary(samplks~N(~sum, offset=~t), response="w") + summ.N <- summary(samplks~N(~sum, offset=~.NetworkID), response="w") summ.l <- sapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~sum, nw~., from.new="nw"), response="w")) summ.l <- c(sum(summ.l), sum(summ.l*1:3)) expect_equal(summ.N, summ.l, ignore_attr=TRUE) @@ -58,21 +55,21 @@ for(N.compact_stats in c(FALSE,TRUE)){ test_that(paste("N() estimation with an LM and", testlab),{ # Three networks, jointly. - ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+t), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats))) + ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats))) nr <- sapply(lapply(pl, `[[`, "response"),length) y <- unlist(lapply(pl, `[[`, "response")) x <- do.call(rbind,lapply(pl, `[[`, "predictor")) - x <- as.data.frame(cbind(x,t=rep(times, nr))) + x <- as.data.frame(cbind(x,.NetworkID=rep(times, nr))) w <- unlist(lapply(pl, `[[`, "weights")) - glmfit <- glm(y~t*nodematch.cloisterville,data=x,weights=w,family="binomial") + glmfit <- glm(y~.NetworkID*nodematch.cloisterville,data=x,weights=w,family="binomial") expect_equal(coef(glmfit),coef(ergmfit), ignore_attr=TRUE, tolerance=1e-7) }) test_that(paste("N() estimation with an LM, subsetting, and", testlab),{ # Ignore second (test subset). - ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+t, subset=quote(t!=2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats))) + ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID, subset=quote(.NetworkID!=2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats))) pl2 <- pl[-2] times2 <- times[-2] @@ -81,15 +78,15 @@ for(N.compact_stats in c(FALSE,TRUE)){ y <- unlist(lapply(pl2, `[[`, "response")) x <- do.call(rbind,lapply(pl2, `[[`, "predictor")) - x <- as.data.frame(cbind(x,t=rep(times2, nr))) + x <- as.data.frame(cbind(x,.NetworkID=rep(times2, nr))) w <- unlist(lapply(pl2, `[[`, "weights")) - glmfit <- glm(y~t*nodematch.cloisterville,data=x,weights=w,family="binomial") + glmfit <- glm(y~.NetworkID*nodematch.cloisterville,data=x,weights=w,family="binomial") expect_equal(coef(glmfit),coef(ergmfit), ignore_attr=TRUE, tolerance=1e-7) }) test_that(paste("N() estimation with an LM, subsetting down to only 1 network, and", testlab),{ # Ignore first and third (test subset). - expect_warning(ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+t, subset=quote(t==2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats))), ".*model is nonidentifiable.*") + expect_warning(ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID, subset=quote(.NetworkID==2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats))), ".*model is nonidentifiable.*") pl2 <- pl[2] times2 <- times[2] @@ -98,9 +95,9 @@ for(N.compact_stats in c(FALSE,TRUE)){ y <- unlist(lapply(pl2, `[[`, "response")) x <- do.call(rbind,lapply(pl2, `[[`, "predictor")) - x <- as.data.frame(cbind(x,t=rep(times2, nr))) + x <- as.data.frame(cbind(x,.NetworkID=rep(times2, nr))) w <- unlist(lapply(pl2, `[[`, "weights")) - glmfit <- glm(y~t*nodematch.cloisterville,data=x,weights=w,family="binomial") + glmfit <- glm(y~.NetworkID*nodematch.cloisterville,data=x,weights=w,family="binomial") # Note that unlike glmift, ergm cannot detect nonidentifiability at # this time. if(N.compact_stats){