From 15635a00284d90451dc0e01c91916eabb16d5387 Mon Sep 17 00:00:00 2001
From: "Pavel N. Krivitsky" <p.krivitsky@unsw.edu.au>
Date: Sun, 1 Dec 2024 15:26:50 +1100
Subject: [PATCH] as_tibble.combined_network() can now optionally return
 .NetworkID and .NetworkName as columns, and N() operators' lm= formula can
 now access them.

---
 DESCRIPTION                        |  4 ++--
 R/InitErgmTerm.multinet.R          | 18 +++++++++++++++---
 man/N-ergmTerm-c9b59cc1.Rd         |  4 ++++
 man/as_tibble.combined_networks.Rd |  5 ++++-
 tests/testthat/test-multinet.R     | 29 +++++++++++++----------------
 5 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index bf4254b5..69ded091 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ergm.multi
-Version: 0.2.1-4163
-Date: 2024-11-18
+Version: 0.3.0-4164
+Date: 2024-12-01
 Title: Fit, Simulate and Diagnose Exponential-Family Models for Multiple or Multilayer Networks
 Authors@R: c(person(c("Pavel", "N."), "Krivitsky", role=c("aut","cre"), email="pavel@statnet.org", comment=c(ORCID="0000-0002-9101-3362")),
              person(c("Mark", "S."), "Handcock", role=c("ctb"), email="handcock@stat.ucla.edu"),
diff --git a/R/InitErgmTerm.multinet.R b/R/InitErgmTerm.multinet.R
index c7bc862f..554d01f0 100644
--- a/R/InitErgmTerm.multinet.R
+++ b/R/InitErgmTerm.multinet.R
@@ -85,11 +85,12 @@ InitErgmTerm..subnets <- function(nw, arglist, ...){
 #' @param attrnames a list (or a selection index) for attributes to obtain; for combined networks, defaults to all.
 #' @param unit whether to obtain edge, vertex, or network attributes.
 #' @template ergmTerm-NetworkIDName
+#' @param store.nid whether to include columns with network ID and network name; the columns will be named with the arguments passed to `.NetworkID` and `.NetworkName`.
 #' @param ... additional arguments, currently passed to unlist()].
 #'
 #' @seealso [network::as_tibble.network()]
 #' @export
-as_tibble.combined_networks<-function(x,attrnames=(match.arg(unit)%in%c("vertices","networks")), ..., unit=c("edges", "vertices", "networks"), .NetworkID=".NetworkID", .NetworkName=".NetworkName"){
+as_tibble.combined_networks<-function(x,attrnames=(match.arg(unit)%in%c("vertices","networks")), ..., unit=c("edges", "vertices", "networks"), .NetworkID=".NetworkID", .NetworkName=".NetworkName", store.nid=FALSE){
   unit <- match.arg(unit)
   if(unit!="networks") return(NextMethod())
 
@@ -105,7 +106,14 @@ as_tibble.combined_networks<-function(x,attrnames=(match.arg(unit)%in%c("vertice
   if(is.logical(attrnames) || is.numeric(attrnames)) attrnames <- na.omit(names(al)[attrnames])
   else intersect(attrnames, names(al))
 
-  al[attrnames] %>% lapply(simplify_simple, ...) %>% as_tibble()
+  out <- al[attrnames] %>% lapply(simplify_simple, ...) %>% as_tibble()
+
+  if(store.nid){
+    out[[.NetworkID]] <- unique(x %v% .NetworkID)
+    out[[.NetworkName]] <- unique(x %v% .NetworkName)
+  }
+
+  out
 }
 
 get_lminfo <- function(nattrs, lm=~1, subset=TRUE, contrasts=NULL, offset=NULL, weights=1){
@@ -169,6 +177,10 @@ assert_LHS_Networks <- function(nw, nid, term_trace = TRUE, call = if(term_trace
 #' error. This can be avoided by pre-filtering with `subset`, which
 #' controls which networks are affected by the term.
 #'
+#' The formula may also reference `.NetworkID` and `.NetworkName`. In
+#' particular, `~0+factor(.NetworkID)` will evaluate `formula` on each
+#' network individually.
+#'
 #' @note Care should be taken to avoid multicollinearity when using
 #'   this operator. As with the [lm()] function, `lm` formulas have an
 #'   implicit intercept, which can be suppressed by specifying `~ 0 +
@@ -257,7 +269,7 @@ InitErgmTerm.N <- function(nw, arglist, ..., N.compact_stats=TRUE, .NetworkID=".
   nwl <- subnetwork_templates(nw, a$.NetworkID, a$.NetworkName)
   nwnames <- names(nwl)
   nn <- length(nwl)
-  nattrs <- as_tibble(nw, unit="networks", .NetworkID=a$.NetworkID, .NetworkName=a$.NetworkName)
+  nattrs <- as_tibble(nw, unit="networks", .NetworkID=a$.NetworkID, .NetworkName=a$.NetworkName, store.nid=TRUE)
 
   lmi <- get_lminfo(nattrs, lm=a$lm, subset=a$subset, contrasts=a$contrasts, offset=a$offset, weights=a$weights)
 
diff --git a/man/N-ergmTerm-c9b59cc1.Rd b/man/N-ergmTerm-c9b59cc1.Rd
index d6fd138c..0f396dbb 100644
--- a/man/N-ergmTerm-c9b59cc1.Rd
+++ b/man/N-ergmTerm-c9b59cc1.Rd
@@ -47,6 +47,10 @@ statistics. If \code{lm} refers to any network attributes for which some
 networks have missing values, the term will stop with an
 error. This can be avoided by pre-filtering with \code{subset}, which
 controls which networks are affected by the term.
+
+The formula may also reference \code{.NetworkID} and \code{.NetworkName}. In
+particular, \code{~0+factor(.NetworkID)} will evaluate \code{formula} on each
+network individually.
 }
 \note{
 Care should be taken to avoid multicollinearity when using
diff --git a/man/as_tibble.combined_networks.Rd b/man/as_tibble.combined_networks.Rd
index 951a0fed..fe41c471 100644
--- a/man/as_tibble.combined_networks.Rd
+++ b/man/as_tibble.combined_networks.Rd
@@ -10,7 +10,8 @@
   ...,
   unit = c("edges", "vertices", "networks"),
   .NetworkID = ".NetworkID",
-  .NetworkName = ".NetworkName"
+  .NetworkName = ".NetworkName",
+  store.nid = FALSE
 )
 }
 \arguments{
@@ -25,6 +26,8 @@
 \item{.NetworkID, .NetworkName}{Optional strings indicating the
 vertex attributes used to distinguish and name the networks;
 intended to be used by term developers.}
+
+\item{store.nid}{whether to include columns with network ID and network name; the columns will be named with the arguments passed to \code{.NetworkID} and \code{.NetworkName}.}
 }
 \description{
 A method to obtain a network attribute table from a
diff --git a/tests/testthat/test-multinet.R b/tests/testthat/test-multinet.R
index 84dbbd07..def60d27 100644
--- a/tests/testthat/test-multinet.R
+++ b/tests/testthat/test-multinet.R
@@ -12,17 +12,14 @@ data(samplk)
 logit <- function(p) log(p/(1-p))
 times <- 1:3
 
-samplk1%n%"t" <- times[1]
 samplk1%e%"w" <- floor(runif(network.edgecount(samplk1), 0, 4))
-samplk2%n%"t" <- times[2]
 samplk2%e%"w" <- floor(runif(network.edgecount(samplk2), 0, 4))
-samplk3%n%"t" <- times[3]
 samplk3%e%"w" <- floor(runif(network.edgecount(samplk3), 0, 4))
 samplkl <- list(samplk1, samplk2, samplk3)
 samplks <- Networks(samplk1, samplk2, samplk3)
 
 test_that("N() summary with an LM and noncompacted stats", {
-  summ.N <- summary(samplks~N(~edges+nodematch("cloisterville"), ~1+t), term.options=list(N.compact_stats=FALSE))
+  summ.N <- summary(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID), term.options=list(N.compact_stats=FALSE))
   summ.l <- unlist(lapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~edges+nodematch("cloisterville"), nw~., from.new="nw"))))
   names(summ.l) <- paste0("N#", rep(1:3, each=2), "~", names(summ.l))
   expect_equal(summ.N, summ.l)
@@ -30,21 +27,21 @@ test_that("N() summary with an LM and noncompacted stats", {
 
 
 test_that("N() summary with offset and compacted stats", {
-  summ.N <- summary(samplks~N(~edges, offset=~t))
+  summ.N <- summary(samplks~N(~edges, offset=~.NetworkID))
   summ.l <- sapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~edges, nw~., from.new="nw")))
   summ.l <- c(sum(summ.l), sum(summ.l*1:3))
   expect_equal(summ.N, summ.l, ignore_attr=TRUE)
 })
 
 test_that("Valued N() summary with an LM and noncompacted stats", {
-  summ.N <- summary(samplks~N(~sum+nodematch("cloisterville", form="sum"), ~1+t), term.options=list(N.compact_stats=FALSE), response="w")
+  summ.N <- summary(samplks~N(~sum+nodematch("cloisterville", form="sum"), ~1+.NetworkID), term.options=list(N.compact_stats=FALSE), response="w")
   summ.l <- unlist(lapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~sum+nodematch("cloisterville", form="sum"), nw~., from.new="nw"), response="w")))
   expect_equal(summ.N, summ.l, ignore_attr=TRUE)
 })
 
 
 test_that("Valued N() summary with offset and compacted stats", {
-  summ.N <- summary(samplks~N(~sum, offset=~t), response="w")
+  summ.N <- summary(samplks~N(~sum, offset=~.NetworkID), response="w")
   summ.l <- sapply(samplkl, function(nw) summary(statnet.common::nonsimp_update.formula(~sum, nw~., from.new="nw"), response="w"))
   summ.l <- c(sum(summ.l), sum(summ.l*1:3))
   expect_equal(summ.N, summ.l, ignore_attr=TRUE)
@@ -58,21 +55,21 @@ for(N.compact_stats in c(FALSE,TRUE)){
   
   test_that(paste("N() estimation with an LM and", testlab),{
     # Three networks, jointly.
-    ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+t), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats)))
+    ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats)))
   
     nr <- sapply(lapply(pl, `[[`, "response"),length)
   
     y <- unlist(lapply(pl, `[[`, "response"))
     x <- do.call(rbind,lapply(pl, `[[`, "predictor"))
-    x <- as.data.frame(cbind(x,t=rep(times, nr)))
+    x <- as.data.frame(cbind(x,.NetworkID=rep(times, nr)))
     w <- unlist(lapply(pl, `[[`, "weights"))
-    glmfit <- glm(y~t*nodematch.cloisterville,data=x,weights=w,family="binomial")
+    glmfit <- glm(y~.NetworkID*nodematch.cloisterville,data=x,weights=w,family="binomial")
     expect_equal(coef(glmfit),coef(ergmfit), ignore_attr=TRUE, tolerance=1e-7)
   })
   
   test_that(paste("N() estimation with an LM, subsetting, and", testlab),{
     # Ignore second (test subset).
-    ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+t, subset=quote(t!=2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats)))
+    ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID, subset=quote(.NetworkID!=2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats)))
     
     pl2 <- pl[-2]
     times2 <- times[-2]
@@ -81,15 +78,15 @@ for(N.compact_stats in c(FALSE,TRUE)){
     
     y <- unlist(lapply(pl2, `[[`, "response"))
     x <- do.call(rbind,lapply(pl2, `[[`, "predictor"))
-    x <- as.data.frame(cbind(x,t=rep(times2, nr)))
+    x <- as.data.frame(cbind(x,.NetworkID=rep(times2, nr)))
     w <- unlist(lapply(pl2, `[[`, "weights"))
-    glmfit <- glm(y~t*nodematch.cloisterville,data=x,weights=w,family="binomial")
+    glmfit <- glm(y~.NetworkID*nodematch.cloisterville,data=x,weights=w,family="binomial")
     expect_equal(coef(glmfit),coef(ergmfit), ignore_attr=TRUE, tolerance=1e-7)
   })
     
   test_that(paste("N() estimation with an LM, subsetting down to only 1 network, and", testlab),{
     # Ignore first and third (test subset).
-    expect_warning(ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+t, subset=quote(t==2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats))), ".*model is nonidentifiable.*")
+    expect_warning(ergmfit <- ergm(samplks~N(~edges+nodematch("cloisterville"), ~1+.NetworkID, subset=quote(.NetworkID==2)), control=control.ergm(term.options=list(N.compact_stats=N.compact_stats))), ".*model is nonidentifiable.*")
     
     pl2 <- pl[2]
     times2 <- times[2]
@@ -98,9 +95,9 @@ for(N.compact_stats in c(FALSE,TRUE)){
     
     y <- unlist(lapply(pl2, `[[`, "response"))
     x <- do.call(rbind,lapply(pl2, `[[`, "predictor"))
-    x <- as.data.frame(cbind(x,t=rep(times2, nr)))
+    x <- as.data.frame(cbind(x,.NetworkID=rep(times2, nr)))
     w <- unlist(lapply(pl2, `[[`, "weights"))
-    glmfit <- glm(y~t*nodematch.cloisterville,data=x,weights=w,family="binomial")
+    glmfit <- glm(y~.NetworkID*nodematch.cloisterville,data=x,weights=w,family="binomial")
     # Note that unlike glmift, ergm cannot detect nonidentifiability at
     # this time.
     if(N.compact_stats){