From 15635a00284d90451dc0e01c91916eabb16d5387 Mon Sep 17 00:00:00 2001
From: "Pavel N. Krivitsky"
Date: Sun, 1 Dec 2024 15:26:50 +1100
Subject: [PATCH] as_tibble.combined_network() can now optionally return
.NetworkID and .NetworkName as columns, and N() operators' lm= formula can
now access them.
---
DESCRIPTION | 4 ++--
R/InitErgmTerm.multinet.R | 18 +++++++++++++++---
man/N-ergmTerm-c9b59cc1.Rd | 4 ++++
man/as_tibble.combined_networks.Rd | 5 ++++-
tests/testthat/test-multinet.R | 29 +++++++++++++----------------
5 files changed, 38 insertions(+), 22 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
index bf4254b5..69ded091 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: ergm.multi
-Version: 0.2.1-4163
-Date: 2024-11-18
+Version: 0.3.0-4164
+Date: 2024-12-01
Title: Fit, Simulate and Diagnose Exponential-Family Models for Multiple or Multilayer Networks
Authors@R: c(person(c("Pavel", "N."), "Krivitsky", role=c("aut","cre"), email="pavel@statnet.org", comment=c(ORCID="0000-0002-9101-3362")),
person(c("Mark", "S."), "Handcock", role=c("ctb"), email="handcock@stat.ucla.edu"),
diff --git a/R/InitErgmTerm.multinet.R b/R/InitErgmTerm.multinet.R
index c7bc862f..554d01f0 100644
--- a/R/InitErgmTerm.multinet.R
+++ b/R/InitErgmTerm.multinet.R
@@ -85,11 +85,12 @@ InitErgmTerm..subnets <- function(nw, arglist, ...){
#' @param attrnames a list (or a selection index) for attributes to obtain; for combined networks, defaults to all.
#' @param unit whether to obtain edge, vertex, or network attributes.
#' @template ergmTerm-NetworkIDName
+#' @param store.nid whether to include columns with network ID and network name; the columns will be named with the arguments passed to `.NetworkID` and `.NetworkName`.
#' @param ... additional arguments, currently passed to unlist()].
#'
#' @seealso [network::as_tibble.network()]
#' @export
-as_tibble.combined_networks<-function(x,attrnames=(match.arg(unit)%in%c("vertices","networks")), ..., unit=c("edges", "vertices", "networks"), .NetworkID=".NetworkID", .NetworkName=".NetworkName"){
+as_tibble.combined_networks<-function(x,attrnames=(match.arg(unit)%in%c("vertices","networks")), ..., unit=c("edges", "vertices", "networks"), .NetworkID=".NetworkID", .NetworkName=".NetworkName", store.nid=FALSE){
unit <- match.arg(unit)
if(unit!="networks") return(NextMethod())
@@ -105,7 +106,14 @@ as_tibble.combined_networks<-function(x,attrnames=(match.arg(unit)%in%c("vertice
if(is.logical(attrnames) || is.numeric(attrnames)) attrnames <- na.omit(names(al)[attrnames])
else intersect(attrnames, names(al))
- al[attrnames] %>% lapply(simplify_simple, ...) %>% as_tibble()
+ out <- al[attrnames] %>% lapply(simplify_simple, ...) %>% as_tibble()
+
+ if(store.nid){
+ out[[.NetworkID]] <- unique(x %v% .NetworkID)
+ out[[.NetworkName]] <- unique(x %v% .NetworkName)
+ }
+
+ out
}
get_lminfo <- function(nattrs, lm=~1, subset=TRUE, contrasts=NULL, offset=NULL, weights=1){
@@ -169,6 +177,10 @@ assert_LHS_Networks <- function(nw, nid, term_trace = TRUE, call = if(term_trace
#' error. This can be avoided by pre-filtering with `subset`, which
#' controls which networks are affected by the term.
#'
+#' The formula may also reference `.NetworkID` and `.NetworkName`. In
+#' particular, `~0+factor(.NetworkID)` will evaluate `formula` on each
+#' network individually.
+#'
#' @note Care should be taken to avoid multicollinearity when using
#' this operator. As with the [lm()] function, `lm` formulas have an
#' implicit intercept, which can be suppressed by specifying `~ 0 +
@@ -257,7 +269,7 @@ InitErgmTerm.N <- function(nw, arglist, ..., N.compact_stats=TRUE, .NetworkID=".
nwl <- subnetwork_templates(nw, a$.NetworkID, a$.NetworkName)
nwnames <- names(nwl)
nn <- length(nwl)
- nattrs <- as_tibble(nw, unit="networks", .NetworkID=a$.NetworkID, .NetworkName=a$.NetworkName)
+ nattrs <- as_tibble(nw, unit="networks", .NetworkID=a$.NetworkID, .NetworkName=a$.NetworkName, store.nid=TRUE)
lmi <- get_lminfo(nattrs, lm=a$lm, subset=a$subset, contrasts=a$contrasts, offset=a$offset, weights=a$weights)
diff --git a/man/N-ergmTerm-c9b59cc1.Rd b/man/N-ergmTerm-c9b59cc1.Rd
index d6fd138c..0f396dbb 100644
--- a/man/N-ergmTerm-c9b59cc1.Rd
+++ b/man/N-ergmTerm-c9b59cc1.Rd
@@ -47,6 +47,10 @@ statistics. If \code{lm} refers to any network attributes for which some
networks have missing values, the term will stop with an
error. This can be avoided by pre-filtering with \code{subset}, which
controls which networks are affected by the term.
+
+The formula may also reference \code{.NetworkID} and \code{.NetworkName}. In
+particular, \code{~0+factor(.NetworkID)} will evaluate \code{formula} on each
+network individually.
}
\note{
Care should be taken to avoid multicollinearity when using
diff --git a/man/as_tibble.combined_networks.Rd b/man/as_tibble.combined_networks.Rd
index 951a0fed..fe41c471 100644
--- a/man/as_tibble.combined_networks.Rd
+++ b/man/as_tibble.combined_networks.Rd
@@ -10,7 +10,8 @@
...,
unit = c("edges", "vertices", "networks"),
.NetworkID = ".NetworkID",
- .NetworkName = ".NetworkName"
+ .NetworkName = ".NetworkName",
+ store.nid = FALSE
)
}
\arguments{
@@ -25,6 +26,8 @@
\item{.NetworkID, .NetworkName}{Optional strings indicating the
vertex attributes used to distinguish and name the networks;
intended to be used by term developers.}
+
+\item{store.nid}{whether to include columns with network ID and network name; the columns will be named with the arguments passed to \code{.NetworkID} and \code{.NetworkName}.}
}
\description{
A method to obtain a network attribute table from a
diff --git a/tests/testthat/test-multinet.R b/tests/testthat/test-multinet.R
index 84dbbd07..def60d27 100644
--- a/tests/testthat/test-multinet.R
+++ b/tests/testthat/test-multinet.R
@@ -12,17 +12,14 @@ data(samplk)
logit <- function(p) log(p/(1-p))
times <- 1:3
-samplk1%n%"t" <- times[1]
samplk1%e%"w" <- floor(runif(network.edgecount(samplk1), 0, 4))
-samplk2%n%"t" <- times[2]
samplk2%e%"w" <- floor(runif(network.edgecount(samplk2), 0, 4))
-samplk3%n%"t" <- times[3]
samplk3%e%"w" <- floor(runif(network.edgecount(samplk3), 0, 4))
samplkl <- list(samplk1, samplk2, samplk3)
samplks <- Networks(samplk1, samplk2, samplk3)
test_that("N() summary with an LM and noncompacted stats", {
- summ.N <- summary(samplks~N(~edges+nodematch("cloisterville"), ~1+t), term.options=list(N.compact_stats=FALSE))
+ summ.N <- summary(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID), term.options=list(N.compact_stats=FALSE))
summ.l <- unlist(lapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~edges+nodematch("cloisterville"), nw~., from.new="nw"))))
names(summ.l) <- paste0("N#", rep(1:3, each=2), "~", names(summ.l))
expect_equal(summ.N, summ.l)
@@ -30,21 +27,21 @@ test_that("N() summary with an LM and noncompacted stats", {
test_that("N() summary with offset and compacted stats", {
- summ.N <- summary(samplks~N(~edges, offset=~t))
+ summ.N <- summary(samplks~N(~edges, offset=~.NetworkID))
summ.l <- sapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~edges, nw~., from.new="nw")))
summ.l <- c(sum(summ.l), sum(summ.l*1:3))
expect_equal(summ.N, summ.l, ignore_attr=TRUE)
})
test_that("Valued N() summary with an LM and noncompacted stats", {
- summ.N <- summary(samplks~N(~sum+nodematch("cloisterville", form="sum"), ~1+t), term.options=list(N.compact_stats=FALSE), response="w")
+ summ.N <- summary(samplks~N(~sum+nodematch("cloisterville", form="sum"), ~1+.NetworkID), term.options=list(N.compact_stats=FALSE), response="w")
summ.l <- unlist(lapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~sum+nodematch("cloisterville", form="sum"), nw~., from.new="nw"), response="w")))
expect_equal(summ.N, summ.l, ignore_attr=TRUE)
})
test_that("Valued N() summary with offset and compacted stats", {
- summ.N <- summary(samplks~N(~sum, offset=~t), response="w")
+ summ.N <- summary(samplks~N(~sum, offset=~.NetworkID), response="w")
summ.l <- sapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~sum, nw~., from.new="nw"), response="w"))
summ.l <- c(sum(summ.l), sum(summ.l*1:3))
expect_equal(summ.N, summ.l, ignore_attr=TRUE)
@@ -58,21 +55,21 @@ for(N.compact_stats in c(FALSE,TRUE)){
test_that(paste("N() estimation with an LM and", testlab),{
# Three networks, jointly.
- ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+t), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats)))
+ ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats)))
nr <- sapply(lapply(pl, `[[`, "response"),length)
y <- unlist(lapply(pl, `[[`, "response"))
x <- do.call(rbind,lapply(pl, `[[`, "predictor"))
- x <- as.data.frame(cbind(x,t=rep(times, nr)))
+ x <- as.data.frame(cbind(x,.NetworkID=rep(times, nr)))
w <- unlist(lapply(pl, `[[`, "weights"))
- glmfit <- glm(y~t*nodematch.cloisterville,data=x,weights=w,family="binomial")
+ glmfit <- glm(y~.NetworkID*nodematch.cloisterville,data=x,weights=w,family="binomial")
expect_equal(coef(glmfit),coef(ergmfit), ignore_attr=TRUE, tolerance=1e-7)
})
test_that(paste("N() estimation with an LM, subsetting, and", testlab),{
# Ignore second (test subset).
- ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+t, subset=quote(t!=2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats)))
+ ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID, subset=quote(.NetworkID!=2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats)))
pl2 <- pl[-2]
times2 <- times[-2]
@@ -81,15 +78,15 @@ for(N.compact_stats in c(FALSE,TRUE)){
y <- unlist(lapply(pl2, `[[`, "response"))
x <- do.call(rbind,lapply(pl2, `[[`, "predictor"))
- x <- as.data.frame(cbind(x,t=rep(times2, nr)))
+ x <- as.data.frame(cbind(x,.NetworkID=rep(times2, nr)))
w <- unlist(lapply(pl2, `[[`, "weights"))
- glmfit <- glm(y~t*nodematch.cloisterville,data=x,weights=w,family="binomial")
+ glmfit <- glm(y~.NetworkID*nodematch.cloisterville,data=x,weights=w,family="binomial")
expect_equal(coef(glmfit),coef(ergmfit), ignore_attr=TRUE, tolerance=1e-7)
})
test_that(paste("N() estimation with an LM, subsetting down to only 1 network, and", testlab),{
# Ignore first and third (test subset).
- expect_warning(ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+t, subset=quote(t==2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats))), ".*model is nonidentifiable.*")
+ expect_warning(ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID, subset=quote(.NetworkID==2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats))), ".*model is nonidentifiable.*")
pl2 <- pl[2]
times2 <- times[2]
@@ -98,9 +95,9 @@ for(N.compact_stats in c(FALSE,TRUE)){
y <- unlist(lapply(pl2, `[[`, "response"))
x <- do.call(rbind,lapply(pl2, `[[`, "predictor"))
- x <- as.data.frame(cbind(x,t=rep(times2, nr)))
+ x <- as.data.frame(cbind(x,.NetworkID=rep(times2, nr)))
w <- unlist(lapply(pl2, `[[`, "weights"))
- glmfit <- glm(y~t*nodematch.cloisterville,data=x,weights=w,family="binomial")
+ glmfit <- glm(y~.NetworkID*nodematch.cloisterville,data=x,weights=w,family="binomial")
# Note that unlike glmift, ergm cannot detect nonidentifiability at
# this time.
if(N.compact_stats){