From a6f60631a83488296af8435615b3ed44ba241aa6 Mon Sep 17 00:00:00 2001 From: "jouni.helske@jyu.fi" Date: Wed, 23 Apr 2014 07:28:04 +0300 Subject: [PATCH] Corrected subset assignment corrected subset assignment, tweaked convergence tolerances. --- ChangeLog | 2 +- DESCRIPTION | 2 +- R/approxSSM.R | 2 +- R/extract.SSModel.R | 4 +- R/hatvalues.KFS.R | 2 +- R/interval.R | 2 +- R/logLik.SSModel.R | 2 +- R/predict.SSModel.R | 65 ++++++++++++++++++++----- R/subset.SSModel.R | 4 +- TODO | 2 - man/Extract.SSModel.Rd | 6 +-- man/logLik.SSModel.Rd | 2 +- man/predict.SSModel.Rd | 7 ++- tests/arimatests.R | 33 ------------- tests/structTest.R | 48 ------------------ tests/testthat/testBasics.R | 25 ++++++++++ tests/testthat/{glmfits.R => testGLM.R} | 3 +- tests/testthat/testSSMarima.R | 13 +++++ tests/testthat/testStruct.R | 26 ++++++++++ 19 files changed, 137 insertions(+), 113 deletions(-) delete mode 100644 tests/arimatests.R delete mode 100644 tests/structTest.R create mode 100644 tests/testthat/testBasics.R rename tests/testthat/{glmfits.R => testGLM.R} (99%) create mode 100644 tests/testthat/testSSMarima.R create mode 100644 tests/testthat/testStruct.R diff --git a/ChangeLog b/ChangeLog index 96e87ac..1e5144f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -16,7 +16,7 @@ Changes from Version 1.0.3 to 1.0.4: silently. * Changed variable mu to m for mean filtering for non-Gaussian models without simulation just like in other cases. - * Changed convergence criterion in approximation algorithm from linear predictor based to + * Changed convergence criterion in Gaussian approximation algorithm from linear predictor based to deviance based. * Properly exported assigment using subset method. See ?subset.SSModel for details. diff --git a/DESCRIPTION b/DESCRIPTION index 0337454..dc93d47 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: KFAS Version: 1.0.4 -Date: 2014-03-27 +Date: 2014-04-23 Title: Kalman Filter and Smoother for Exponential Family State Space Models. Author: Jouni Helske Maintainer: Jouni Helske diff --git a/R/approxSSM.R b/R/approxSSM.R index e98b81d..c5ca0d2 100644 --- a/R/approxSSM.R +++ b/R/approxSSM.R @@ -48,7 +48,7 @@ approxSSM <- function(model, theta, maxiter = 50, tol = 1e-15) { ymiss <- is.na(model$y) storage.mode(ymiss) <- "integer" if(is.null(maxiter)) maxiter<-50 - + if(is.null(tol)) tol<-1e-8 # initial values for linear predictor theta if (missing(theta) || is.null(theta)) { theta <- sapply(1:p, function(i) diff --git a/R/extract.SSModel.R b/R/extract.SSModel.R index c944d65..08cdb4a 100644 --- a/R/extract.SSModel.R +++ b/R/extract.SSModel.R @@ -34,7 +34,7 @@ #' model$H #' logLik(model,check.model=TRUE) #with check.model=FALSE (default) R crashes! -`[<-.SSModel` <- function(x, element, states, etas, series, times, value) { +`[<-.SSModel` <- function(x, element, states, etas, series, times, ..., value) { element <- match.arg(arg = element, choices = c("y", "Z", "H", "T", "R", "Q", "a1", "P1", "P1inf", "u")) @@ -97,7 +97,7 @@ #' @method [ SSModel #' @S3method [ SSModel #' @rdname Extract.SSModel -`[.SSModel` <- function(x, element, states, etas, series, times) { +`[.SSModel` <- function(x, element, states, etas, series, times, ...) { # is.SSModel(x,return.logical=FALSE) element <- match.arg(arg = element, choices = c("y", "Z", "H", "T", "R", "Q", "a1", "P1", "P1inf", "u")) diff --git a/R/hatvalues.KFS.R b/R/hatvalues.KFS.R index 208b608..1bc5f52 100644 --- a/R/hatvalues.KFS.R +++ b/R/hatvalues.KFS.R @@ -15,7 +15,7 @@ #' @return Multivariate time series containing hat values. hatvalues.KFS<-function(model,...){ if(any(model$model$distribution != "gaussian")){ - app<-approxSSM(model$model,theta=model$call$theta,maxiter=model$call$maxiter) + app<-approxSSM(model$model,theta=model$call$theta,tol=model$call$convtol,maxiter=model$call$maxiter) if(is.null(model$V_theta)) stop("KFS was run without signal smoothing, cannot compute hat values.") hatv<-matrix(apply(model$V_theta/app$H, 3, diag), attr(model$model, "n"), attr(model$model, "p"), diff --git a/R/interval.R b/R/interval.R index 2331b69..00ea0f3 100644 --- a/R/interval.R +++ b/R/interval.R @@ -74,7 +74,7 @@ interval <- function(model, interval = c("confidence", "prediction"), level, typ pred <- lapply(1:p, function(j) cbind(fit = cbind(varmean$mean[, j], lwr = int[[j]][1, ], upr = int[[j]][2, ]), se.fit = sqrt(varmean$var[, j]))) } else { - pred <- lapply(1:p, function(j) cbind(varmean$mean[, j], lwr = int[[j]][1, ], upr = int[[j]][2, ])) + pred <- lapply(1:p, function(j) cbind(fit = varmean$mean[, j], lwr = int[[j]][1, ], upr = int[[j]][2, ])) } pred } diff --git a/R/logLik.SSModel.R b/R/logLik.SSModel.R index 9f1e1b2..00e5350 100644 --- a/R/logLik.SSModel.R +++ b/R/logLik.SSModel.R @@ -27,7 +27,7 @@ #' @return \item{}{log-likelihood of the state space model.} logLik.SSModel <- function(object, nsim = 0, antithetics = TRUE, theta, check.model = FALSE, transform = c("ldl","augment"), maxiter = 50, - seed, convtol=1e-15,...) { + seed, convtol=1e-8,...) { if (check.model) { if (!is.SSModel(object, na.check = TRUE)) { return(-.Machine$double.xmax) diff --git a/R/predict.SSModel.R b/R/predict.SSModel.R index f953f88..6c61dde 100644 --- a/R/predict.SSModel.R +++ b/R/predict.SSModel.R @@ -15,7 +15,11 @@ #' @S3method predict SSModel #' @aliases predict predict.SSModel #' @param object Object of class \code{SSModel}. -#' @param newdata A compatible \code{SSModel} object to be added in the end of the old object for which the predictions are required. +#' @param newdata A compatible \code{SSModel} object to be added in the end of the old object for +#' which the predictions are required. If omitted, predictions are either for the whole data (fitted values), +#' or if argument \code{n.ahead} is given, \code{n.ahead} time steps ahead. +#' @param n.ahead Number of steps ahead at which to predict. Only used if \code{newdata} is omitted. +#' Note that when using \code{n.ahead}, object cannot contain time varying system matrices. #' @param interval Type of interval calculation. #' @param level Confidence level for intervals. #' @param type Scale of the prediction, \code{'response'} or \code{'link'}. @@ -25,11 +29,29 @@ #' \dQuote{trend}, or \dQuote{regression}. These can be combined. Default is \dQuote{all}. #' @param nsim Number of independent samples used in importance sampling. Used only for non-Gaussian models. #' @param se.fit If TRUE, standard errors are computed. Default is FALSE. -#' @param prob if TRUE (default), the predictions in binomial/poisson case are probabilities/rates instead of counts. +#' @param prob if TRUE (default), the predictions in binomial case are probabilities instead of counts. +#' @param maxiter The maximum number of iterations used in approximation Default is 50. +#' Only used for non-Gaussian model. #' @param \dots Ignored. #' @return A matrix or list of matrices containing the predictions, and optionally standard errors. -predict.SSModel <- function(object, newdata, interval = c("none", "confidence", "prediction"), - level = 0.95, type = c("response", "link"), states=NULL,se.fit = FALSE, nsim = 0, +#' @examples +#' +#' \dontrun{ +#' set.seed(1) +#' x<-runif(n=100,min=1,max=3) +#' y<-rpois(n=100,lambda=exp(-1+x)) +#' model<-SSModel(y~x,distribution="poisson") +#' xnew<-seq(0.5,3.5,by=0.1) +#' newdata<-SSModel(rep(NA,length(xnew))~xnew,distribution="poisson") +#' pred<-predict(model,newdata=newdata,interval="prediction",level=0.9,nsim=1000) +#' plot(x=x,y=y,pch=19,ylim=c(0,25),xlim=c(0.5,3.5)) +#' matlines(x=xnew,y=pred,col=c(2,2,2),lty=c(1,2,2),type="l") +#' +#' model<-SSModel(Nile~SSMtrend(1,Q=1469),H=15099) +#' pred<-predict(model,n.ahead=10,interval="prediction",level=0.9) +#' } +predict.SSModel <- function(object, newdata, n.ahead, interval = c("none", "confidence", "prediction"), + level = 0.95, type = c("response", "link"), states=NULL, se.fit = FALSE, nsim = 0, prob = TRUE, maxiter=50, ...) { @@ -58,6 +80,7 @@ predict.SSModel <- function(object, newdata, interval = c("none", "confidence", } else states <- which(attr(object, "state_types") %in% states) } } + gaussianmodel <- all(object$distribution == "gaussian") if (!missing(newdata) && !is.null(newdata)) { @@ -79,12 +102,12 @@ predict.SSModel <- function(object, newdata, interval = c("none", "confidence", object$y <- rbind(object$y, newdata$y) tvo <- tvn <- logical(5) tvo[1] <- dim(object$Z)[3] > 1 - tvo[2] <- all(object$distribution == "gaussian") && (dim(object$H)[3] > 1) + tvo[2] <- gaussianmodel && (dim(object$H)[3] > 1) tvo[3] <- dim(object$T)[3] > 1 tvo[4] <- dim(object$R)[3] > 1 tvo[5] <- dim(object$Q)[3] > 1 tvn[1] <- dim(newdata$Z)[3] > 1 - tvn[2] <- all(object$distribution == "gaussian") && (dim(object$H)[3] > 1) + tvn[2] <- gaussianmodel && (dim(object$H)[3] > 1) tvn[3] <- dim(newdata$T)[3] > 1 tvn[4] <- dim(newdata$R)[3] > 1 tvn[5] <- dim(newdata$Q)[3] > 1 @@ -94,7 +117,7 @@ predict.SSModel <- function(object, newdata, interval = c("none", "confidence", n)) } - if (all(object$distribution == "gaussian") && (tvo[2] || tvn[2] || !identical(object$H, newdata$H))) { + if (gaussianmodel && (tvo[2] || tvn[2] || !identical(object$H, newdata$H))) { object$H <- array(data = c(array(object$H, dim = c(p, p, no)), array(newdata$H, dim = c(p, p, nn))), dim = c(p, p, n)) } else object$u <- array(data = c(array(object$u, dim = c(no, p)), array(newdata$u, dim = c(nn, p))), dim = c(n, p)) @@ -114,14 +137,30 @@ predict.SSModel <- function(object, newdata, interval = c("none", "confidence", dim = c(k, k, n)) } - if (tvo[5] || tvn[5] || !identical(object$Q, newdata$Q)) { - object$Q <- array(data = c(array(data = object$Q, dim = c(k, k, no)), array(data = newdata$Q, dim = c(k, k, nn))), - dim = c(k, k, n)) - } + } else { + if(!is.null(n.ahead) || !missing(n.ahead)){ + + tv <- logical(5) + tv[1] <- dim(object$Z)[3] > 1 + tv[2] <- gaussianmodel && (dim(object$H)[3] > 1) + tv[3] <- dim(object$T)[3] > 1 + tv[4] <- dim(object$R)[3] > 1 + tv[5] <- dim(object$Q)[3] > 1 + + if(!gaussianmodel) tvu<-any(c(apply(object$u,2,function(x) length(unique(x))>1))) else tvu<-FALSE + if(any(tv) || tvu) + stop("Model contains time varying system matrices, cannot use argument 'n.ahead'. Use 'newdata' instead.") + + timespan <- attr(object, "n") + 1:n.ahead + n<-attr(object, "n") <- attr(object, "n")+as.integer(n.ahead) + object$y<-window(object$y,end=end(object$y)+c(n.ahead,0),extend=TRUE) #!! + if(any(object$distribution!="gaussian")) object$u<-rbind(object$u,matrix(object$u[1,],nrow=n.ahead,ncol=ncol(object$u),byrow=TRUE)) + + } else timespan <- 1:attr(object, "n") - } else timespan <- 1:attr(object, "n") + } + - gaussianmodel <- all(object$distribution == "gaussian") if (!gaussianmodel && interval == "prediction") { if (type == "link") stop("Prediction intervals can only be computed at response scale.") diff --git a/R/subset.SSModel.R b/R/subset.SSModel.R index 705d8e4..160b83a 100644 --- a/R/subset.SSModel.R +++ b/R/subset.SSModel.R @@ -1,7 +1,7 @@ #' @method subset<- SSModel #' @S3method subset<- SSModel #' @rdname Extract.SSModel -`subset<-.SSModel` <- function(x, element, states, etas, series, times, value) { +`subset<-.SSModel` <- function(x, element, states, etas, series, times, ..., value) { # is.SSModel(x,return.logical=FALSE) element <- match.arg(arg = element, choices = c("y", "Z", "H", "T", "R", "Q", "a1", "P1", "P1inf", "u")) @@ -64,7 +64,7 @@ } #' @rdname Extract.SSModel #' @export -`subset<-` <- function(x, value,...) UseMethod("subset<-") +`subset<-` <- function(x, ..., value) UseMethod("subset<-") #' @method subset SSModel #' @S3method subset SSModel diff --git a/TODO b/TODO index b059006..805ee30 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,6 @@ TODO list * Add argument check.result for function ldl which warns if the decomposition was not successful (due to bad tolerance parameter). - * Rewrite residual computations using Fortran. - * Change tolerance criterion in approx.f95 from linear predictor based to deviance based. * Add more tests, at least for structural time series, ARIMA and mixed models and multiple tests for SSModel and auxiliary functions. * Study and possily implement marginal likelihood correction used in Francke et al. (2010) diff --git a/man/Extract.SSModel.Rd b/man/Extract.SSModel.Rd index 3fe7a57..eb5733a 100644 --- a/man/Extract.SSModel.Rd +++ b/man/Extract.SSModel.Rd @@ -6,11 +6,11 @@ \alias{subset<-.SSModel} \title{Extract or Replace Parts of a State Space Model} \usage{ -\method{[}{SSModel}(x, element, states, etas, series, times) <- value +\method{[}{SSModel}(x, element, states, etas, series, times, ...) <- value -\method{[}{SSModel}(x, element, states, etas, series, times) +\method{[}{SSModel}(x, element, states, etas, series, times, ...) -\method{subset}{SSModel}(x, element, states, etas, series, times) <- value +\method{subset}{SSModel}(x, element, states, etas, series, times, ...) <- value subset(x, ...) <- value diff --git a/man/logLik.SSModel.Rd b/man/logLik.SSModel.Rd index e39ce83..a7f69fb 100644 --- a/man/logLik.SSModel.Rd +++ b/man/logLik.SSModel.Rd @@ -5,7 +5,7 @@ \usage{ \method{logLik}{SSModel}(object, nsim = 0, antithetics = TRUE, theta, check.model = FALSE, transform = c("ldl", "augment"), maxiter = 50, - seed, convtol = 1e-15, ...) + seed, convtol = 1e-08, ...) } \arguments{ \item{object}{State space model of class \code{SSModel}.} diff --git a/man/predict.SSModel.Rd b/man/predict.SSModel.Rd index 3464903..d052275 100644 --- a/man/predict.SSModel.Rd +++ b/man/predict.SSModel.Rd @@ -38,8 +38,11 @@ Default is FALSE.} \item{prob}{if TRUE (default), the predictions in - binomial/poisson case are probabilities/rates instead of - counts.} + binomial case are probabilities instead of counts.} + + \item{maxiter}{The maximum number of iterations used in + approximation Default is 50. Only used for non-Gaussian + model.} \item{\dots}{Ignored.} } diff --git a/tests/arimatests.R b/tests/arimatests.R deleted file mode 100644 index 2a8799a..0000000 --- a/tests/arimatests.R +++ /dev/null @@ -1,33 +0,0 @@ -s <- 12 -phis <- 0.99 -phi1 <- 0.0001 -phi <- c(phi1,rep(0,s-2),phis,-phi1*phis) -theta <- 0.7 -out <- makeARIMA(phi,theta,NULL,SSinit="Ross") -min(eigen(out$Pn)$value) - -set.seed(1) - x <- arima.sim(100,model=list(ar=phi,ma=theta)) -KalmanLike(x,mod=out) -out<-arima(x,c(13,0,1) -model<-SSModel(x~-1+SSMarima(ar=phi,ma=theta),H=0) -logLik(model) -arima(x,order=c(1,0,1),seasonal=list(period=12,order=c(1,0,0)),include.mean=FALSE,init=c(phi1,theta,phis),method='ML', - SSinit="Ros") - -Q0ter <- function(phi,theta){ - p <- length(phi) - q <- length(theta) - r <- max(p,q+1) - T <- matrix(0,r,r) - if (p) T[1:p,1] <- phi - if (r) T[1:(r-1),2:r] <- diag(r-1) - V <- matrix(0,r,r) - ttheta <- c(1,theta) - V[1:(q+1),1:(q+1)] <- ttheta%x%t(ttheta) - V <- matrix(V,ncol=1) - S <- diag(r*r)-T%x%T - Q0 <- solve(S,V) - Q0 <- matrix(Q0,ncol=r) -} -microbenchmark(makeARIMA(phi,theta,NULL,SSinit="Ross")$Pn,SSMarima(ar=phi,ma=theta)$Q,Q0ter(phi,theta)) diff --git a/tests/structTest.R b/tests/structTest.R deleted file mode 100644 index 0326905..0000000 --- a/tests/structTest.R +++ /dev/null @@ -1,48 +0,0 @@ -context("Structural time series tests") - -test_that("SSModel works properly",{ - set.seed(123) - t12<-ts(cbind(t1=rnorm(100),t2=rnorm(100))) - t12[sample(size=50,1:200)]<-NA - expect_that( - model<-SSModel(t12~SSMcycle(period=10, type='common',Q=2) - +SSMcycle(period=10, type='distinct',P1=diag(c(1,1,2,2)),Q=diag(1:2)) - +SSMtrend(2,type="common",Q=diag(c(1,0.5))) - +SSMtrend(2,type="distinct",Q=list(diag(0.1,2),diag(0.01,2)),P1=diag(c(0.1,0.01,0.1,0.01))) - +SSMseasonal(period=4,type="common") - +SSMseasonal(period=4,type="distinct",Q=diag(c(2,3)),P1=diag(c(2,2,2,3,3,3))) - +SSMseasonal(period=5,type="common",sea.type="trig") - +SSMseasonal(period=5,type="distinct",sea.type="trig",Q=diag(c(0.1,0.2)), - P1=diag(rep(c(0.1,0.2),each=4))) - +SSMarima(ar=0.9,ma=0.2)+SSMregression(~-1+t2,index=1,Q=1) - ),not(gives_warning())) - expect_that(print(model),not(gives_warning())) - expect_that(logLik(model),not(gives_warning())) - expect_equivalent(logLik(model),-442.270604625564) - expect_that(out<-KFS(model,filtering=c("state","mean"),smoothing=c("state","mean","disturbance")), - not(gives_warning())) - expect_equal(out$d,11) - expect_equal(out$j,1) -}) - -test_that("StructTS and KFS give equivalent results",{ -require(graphics) -trees <- window(treering, start = 0) -fit <- StructTS(trees, type = "level") -# Construct model per StructTS: -model<-SSModel(trees~SSMtrend(degree=1,Q=fit$coef[1],P1=fit$model0$P,a1=fit$model0$a),H=fit$coef[2]) -out<-KFS(model) -expect_equivalent(c(fitted(fit)),coef(out,filtered=TRUE)[-1]) -expect_equivalent(fit$loglik,logLik(model)) - -fit2 <- StructTS(trees, type = "level",fixed=c(NA,1e-15)) -model<-SSModel(trees~SSMtrend(degree=1,Q=fit2$coef[1],P1=fit2$model0$P,a1=fit2$model0$a),H=fit2$coef[2]) -expect_less_than(abs(fit2$loglik-logLik(model)),1e-4) -fit2 <- StructTS(trees, type = "level",fixed=c(1e-15,NA)) -model<-SSModel(trees~SSMtrend(degree=1,Q=fit2$coef[1],P1=fit2$model0$P,a1=fit2$model0$a),H=fit2$coef[2]) -expect_less_than(abs(fit2$loglik-logLik(model)),1e-11) - -model<-SSModel(trees~SSMtrend(degree=1,Q=NA),H=NA) -fit2<-fitSSM(model,inits=c(0,0),method="BFGS") -expect_equivalent(c(fit2$model$H),fit$coef[2]) -expect_less_than(abs(c(fit2$model$Q)-fit$coef[1]),1e-9) diff --git a/tests/testthat/testBasics.R b/tests/testthat/testBasics.R new file mode 100644 index 0000000..e94a991 --- /dev/null +++ b/tests/testthat/testBasics.R @@ -0,0 +1,25 @@ +test_that("SSModel works properly",{ + set.seed(123) + d<-data.frame(x=rnorm(100)) + t12<-ts(cbind(t1=rnorm(100)+d$x,t2=rnorm(100))) + t12[sample(size=50,1:200)]<-NA + expect_that( + model<-SSModel(t12~SSMcycle(period=10, type='common',Q=2) + +SSMcycle(period=10, type='distinct',P1=diag(c(1,1,2,2)),Q=diag(1:2)) + +SSMtrend(2,type="common",Q=diag(c(1,0.5))) + +SSMtrend(2,type="distinct",Q=list(diag(0.1,2),diag(0.01,2)),P1=diag(c(0.1,0.01,0.1,0.01))) + +SSMseasonal(period=4,type="common") + +SSMseasonal(period=4,type="distinct",Q=diag(c(2,3)),P1=diag(c(2,2,2,3,3,3))) + +SSMseasonal(period=5,type="common",sea.type="trig") + +SSMseasonal(period=5,type="distinct",sea.type="trig",Q=diag(c(0.1,0.2)), + P1=diag(rep(c(0.1,0.2),each=4))) + +SSMarima(ar=0.9,ma=0.2)+SSMregression(~-1+x,index=1,Q=1,data=d) + ),not(gives_warning())) + expect_that(print(model),not(gives_warning())) + expect_that(logLik(model),not(gives_warning())) + expect_equivalent(logLik(model),-443.011122738032) + expect_that(out<-KFS(model,filtering=c("state","mean"),smoothing=c("state","mean","disturbance")), + not(gives_warning())) + expect_equal(out$d,11) + expect_equal(out$j,1) +}) \ No newline at end of file diff --git a/tests/testthat/glmfits.R b/tests/testthat/testGLM.R similarity index 99% rename from tests/testthat/glmfits.R rename to tests/testthat/testGLM.R index c1f3dc1..c8bbdac 100644 --- a/tests/testthat/glmfits.R +++ b/tests/testthat/testGLM.R @@ -1,5 +1,6 @@ context("KFAS and glm comparison") +require(MASS) # Test for Gaussian ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14) trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69) @@ -61,7 +62,7 @@ while(abs(theta-theta0)/theta0>1e-7 && i