Skip to content

Commit

Permalink
Robustified MCMLE estimation and MCMC error calculation against the s…
Browse files Browse the repository at this point in the history
…ituation in which the observational process results in a constant sufficient statistic in the constrained MCMC sample and added a test.
  • Loading branch information
krivit committed Jul 9, 2024
1 parent 0ec3773 commit 7e7670b
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 11 deletions.
15 changes: 8 additions & 7 deletions R/ergm.MCMCse.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,14 @@ ergm.MCMCse <- function(model, theta, init, statsmatrices, statsmatrices.obs,
H <- crossprod(htmp, htmp)
} else prob <- rep.int(1 / nrow(xsim), nrow(xsim))

# Identify canonical parameters corresponding to non-offset statistics that do not vary
novar <- diag(H) < sqrt(.Machine$double.eps)

# Calculate the auto-covariance of the MCMC suff. stats.
# and hence the MCMC s.e.
cov.zbar <- spectrum0.mvar(gsims) * sum(prob^2)
imp.factor <- sum(prob^2)*length(prob)

# Identify canonical parameters corresponding to non-offset statistics that do not vary
novar <- rep(TRUE, nrow(H))
novar <- diag(H) < sqrt(.Machine$double.eps)

# Calculate the auto-covariance of the Conditional MCMC suff. stats.
# and hence the Conditional MCMC s.e.
if(!is.null(statsmatrices.obs)){
Expand All @@ -92,11 +91,13 @@ ergm.MCMCse <- function(model, theta, init, statsmatrices, statsmatrices.obs,
H.obs <- crossprod(htmp.obs, htmp.obs)
} else prob.obs <- rep.int(1 / nrow(xsim.obs), nrow(xsim.obs))

cov.zbar.obs <- spectrum0.mvar(gsims.obs) * sum(prob.obs^2)
imp.factor.obs <- sum(prob.obs^2)*length(prob.obs)

novar.obs <- diag(H.obs)<sqrt(.Machine$double.eps)
if(any(novar&!novar.obs)) warning("Non-varying statistics in the unconstrained sample vary in the constrained sample. This should not be happening.")

# Handle the corner case in which the constrained statistics do not vary.
cov.zbar.obs <- if(all(novar.obs)) matrix(0, length(novar.obs), length(novar.obs))
else spectrum0.mvar(gsims.obs) * sum(prob.obs^2)
imp.factor.obs <- sum(prob.obs^2)*length(prob.obs)
}else{
cov.zbar.obs <- cov.zbar
cov.zbar.obs[,] <- 0
Expand Down
14 changes: 11 additions & 3 deletions R/ergm.MCMLE.R
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,15 @@ ergm.MCMLE <- function(init, s, s.obs,
basepred <- sm[,!nochg,drop=FALSE] %*% etadiff[!nochg]
}
lw2w <- function(lw){w<-exp(lw-max(lw)); w/sum(w)}
hotel <- try(suppressWarnings(approx.hotelling.diff.test(esteqs, esteqs.obs)), silent=TRUE)
# Handle a corner case in which none of the constrained sample
# statistics vary. Then, Hotelling's test must be performed in
# the 1-sample mode.
novar.obs <- NVL3(esteq.obs, all(sweep(., 2L, .[1L,], `==`)), FALSE)
hotel <- try(suppressWarnings(
approx.hotelling.diff.test(esteqs,
if(!novar.obs) esteqs.obs,
mu0 = if(novar.obs) esteq.obs[1L,])
), silent=TRUE)
if(inherits(hotel, "try-error")){ # Within tolerance ellipsoid, but cannot be tested.
message("Unable to test for convergence; increasing sample size.")
.boost_samplesize(control$MCMLE.confidence.boost)
Expand All @@ -457,7 +465,7 @@ ergm.MCMLE <- function(init, s, s.obs,
esteq.w <- lw2w(esteq.lw)
estdiff <- -lweighted.mean(esteq, esteq.lw)
estcov <- hotel$covariance.x*sum(esteq.w^2)*length(esteq.w)
if(obs){
if(obs && !novar.obs){
esteq.obs.lw <- IS.lw(statsmatrix.obs, etadiff)
esteq.obs.w <- lw2w(esteq.obs.lw)
estdiff <- estdiff + lweighted.mean(esteq.obs, esteq.obs.lw)
Expand All @@ -478,7 +486,7 @@ ergm.MCMLE <- function(init, s, s.obs,
if(T2nullity && verbose) message("Estimated covariance matrix of the statistics has nullity ", format(T2nullity), ". Effective parameter number adjusted to ", format(T2param), ".")
nonconv.pval <- .ptsq(T2, T2param, hotel$parameter["df"], lower.tail=FALSE)
if(verbose) message("Test statistic: T^2 = ", format(T2),", with ",
format(T2param), " free parameters and ", format(hotel$parameter["df"]), " degrees of freedom.")
format(T2param), " free parameter(s) and ", format(hotel$parameter["df"]), " degrees of freedom.")
message("Convergence test p-value: ", fixed.pval(nonconv.pval, 4), ". ", appendLF=FALSE)
if(nonconv.pval < 1-control$MCMLE.confidence){
message("Converged with ",control$MCMLE.confidence*100,"% confidence.")
Expand Down
19 changes: 19 additions & 0 deletions tests/testthat/test-miss-dep.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
tolerance <- 4 # Result must be within 4*MCMCSE of truth.

data(florentine)

test_that("Missing data MLE with edge observational constraint", {
n.active <- network.dyadcount(flomarriage) - network.edgecount(flobusiness)
e.active <- network.edgecount(flomarriage) - network.edgecount(flomarriage&flobusiness)
p.active <- e.active/n.active
theta <- logit(p.active)
mcmcfit <- suppressWarnings(ergm(flomarriage~edges, constraints = ~fixedas(flobusiness), obs.constraints = ~edges))

## Point estimate
expect_lt(abs(theta-coef(mcmcfit))/sqrt(diag(vcov(mcmcfit, source="estimation"))), tolerance)

## Likelihood
# Relative to NULL model, whose likelihood is defined to be 0.
llk <- log(p.active)*e.active+log(1-p.active)*(n.active-e.active) - log(.5)*n.active
expect_lt(abs(llk-logLik(mcmcfit))/sqrt(attr(logLik(mcmcfit), "vcov")), tolerance)
})
2 changes: 1 addition & 1 deletion tests/testthat/test-miss.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ library(statnet.common)
opttest({
library(ergm)
theta0err<- 1 # Perturbation in the initial values
tolerance<-4 # Result must be within 5*MCMCSE of truth.
tolerance <- 4 # Result must be within 4*MCMCSE of truth.
bridge.target.se <- 0.005 # Log-likelihood MCMC standard error must be within this.

n<-20 # Number of nodes
Expand Down

0 comments on commit 7e7670b

Please sign in to comment.