From d5989db6ed02757d058fc02cb6fad364bb8cde0d Mon Sep 17 00:00:00 2001 From: Jouni Helske Date: Sat, 28 Dec 2024 14:27:42 +0200 Subject: [PATCH] better handling of generic error from NLopt --- DESCRIPTION | 2 +- R/dnm_fanhmm.R | 30 ++++++++++--- R/dnm_mnhmm.R | 30 ++++++++++--- R/dnm_nhmm.R | 30 ++++++++++--- R/em_dnm_fanhmm.R | 30 ++++++++++--- R/em_dnm_mnhmm.R | 30 ++++++++++--- R/em_dnm_nhmm.R | 30 ++++++++++--- R/utilities.R | 4 +- src/fanhmm_EM.cpp | 49 ++++++++++++-------- src/mnhmm_EM.cpp | 111 +++++++++++++++++++++++++++++++--------------- src/nhmm_EM.cpp | 101 +++++++++++++++++++++++++---------------- 11 files changed, 319 insertions(+), 128 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5621403..13261e3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,7 +66,7 @@ License: GPL (>= 2) Encoding: UTF-8 BugReports: https://github.com/helske/seqHMM/issues VignetteBuilder: knitr -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Config/testthat/edition: 3 SystemRequirements: GNU make Biarch: true diff --git a/R/dnm_fanhmm.R b/R/dnm_fanhmm.R index 613825f..ff40605 100644 --- a/R/dnm_fanhmm.R +++ b/R/dnm_fanhmm.R @@ -103,9 +103,18 @@ dnm_fanhmm <- function(model, inits, init_sd, restarts, lambda, bound, control, x0 = init, eval_f = objectivef, lb = -rep(bound, length(init)), ub = rep(bound, length(init)), opts = control_restart ) - if (fit$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(fit$solution)$gradient^2)) - if (grad_norm < 1e-6) fit$status <- 7 + if (fit$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(fit$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(fit$objective - objectivef(init)$objective) / + (abs(fit$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(fit$objective)) || + relative_change < control_restart$ftol_rel) { + fit$status <- 7 + } } p() fit @@ -149,9 +158,18 @@ dnm_fanhmm <- function(model, inits, init_sd, restarts, lambda, bound, control, x0 = init, eval_f = objectivef, lb = -rep(bound, length(init)), ub = rep(bound, length(init)), opts = control ) - if (out$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(out$solution)$gradient^2)) - if (grad_norm < 1e-6) out$status <- 6 + if (out$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(out$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(out$objective - objectivef(init)$objective) / + (abs(out$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(out$objective)) || + relative_change < control$ftol_rel) { + out$status <- 7 + } } if (out$status < 0) { warning_( diff --git a/R/dnm_mnhmm.R b/R/dnm_mnhmm.R index 283b2f5..c983b07 100644 --- a/R/dnm_mnhmm.R +++ b/R/dnm_mnhmm.R @@ -135,9 +135,18 @@ dnm_mnhmm <- function(model, inits, init_sd, restarts, lambda, bound, control, x0 = init, eval_f = objectivef, lb = -rep(bound, length(init)), ub = rep(bound, length(init)), opts = control_restart ) - if (fit$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(fit$solution)$gradient^2)) - if (grad_norm < 1e-6) fit$status <- 7 + if (fit$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(fit$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(fit$objective - objectivef(init)$objective) / + (abs(fit$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(fit$objective)) || + relative_change < control_restart$ftol_rel) { + fit$status <- 7 + } } p() fit @@ -169,9 +178,18 @@ dnm_mnhmm <- function(model, inits, init_sd, restarts, lambda, bound, control, x0 = init, eval_f = objectivef, lb = -rep(bound, length(init)), ub = rep(bound, length(init)), opts = control ) - if (out$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(out$solution)$gradient^2)) - if (grad_norm < 1e-6) out$status <- 7 + if (out$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(out$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(out$objective - objectivef(init)$objective) / + (abs(out$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(out$objective)) || + relative_change < control$ftol_rel) { + out$status <- 7 + } } if (out$status < 0) { warning_( diff --git a/R/dnm_nhmm.R b/R/dnm_nhmm.R index 67966e2..86a4c90 100644 --- a/R/dnm_nhmm.R +++ b/R/dnm_nhmm.R @@ -101,9 +101,18 @@ dnm_nhmm <- function(model, inits, init_sd, restarts, lambda, bound, control, x0 = init, eval_f = objectivef, lb = -rep(bound, length(init)), ub = rep(bound, length(init)), opts = control_restart ) - if (fit$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(fit$solution)$gradient^2)) - if (grad_norm < 1e-6) fit$status <- 7 + if (fit$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(fit$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(fit$objective - objectivef(init)$objective) / + (abs(fit$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(fit$objective)) || + relative_change < control_restart$ftol_rel) { + fit$status <- 7 + } } p() fit @@ -135,9 +144,18 @@ dnm_nhmm <- function(model, inits, init_sd, restarts, lambda, bound, control, x0 = init, eval_f = objectivef, lb = -rep(bound, length(init)), ub = rep(bound, length(init)), opts = control ) - if (out$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(out$solution)$gradient^2)) - if (grad_norm < 1e-6) out$status <- 6 + if (out$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(out$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(out$objective - objectivef(init)$objective) / + (abs(out$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(out$objective)) || + relative_change < control$ftol_rel) { + out$status <- 7 + } } if (out$status < 0) { warning_( diff --git a/R/em_dnm_fanhmm.R b/R/em_dnm_fanhmm.R index 2ae4346..eb842d6 100644 --- a/R/em_dnm_fanhmm.R +++ b/R/em_dnm_fanhmm.R @@ -116,9 +116,18 @@ em_dnm_fanhmm <- function(model, inits, init_sd, restarts, lambda, x0 = init, eval_f = objectivef, lb = -rep(bound, length(init)), ub = rep(bound, length(init)), opts = control_restart ) - if (fit$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(fit$solution)$gradient^2)) - if (grad_norm < 1e-6) fit$status <- 7 + if (fit$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(fit$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(fit$objective - objectivef(init)$objective) / + (abs(fit$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(fit$objective)) || + relative_change < control_restart$ftol_rel) { + fit$status <- 7 + } } p() fit @@ -197,9 +206,18 @@ em_dnm_fanhmm <- function(model, inits, init_sd, restarts, lambda, lb = -rep(bound, length(unlist(init))), ub = rep(bound, length(unlist(init))), opts = control ) - if (out$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(out$solution)$gradient^2)) - if (grad_norm < 1e-6) out$status <- 6 + if (out$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(out$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(out$objective - objectivef(init)$objective) / + (abs(out$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(out$objective)) || + relative_change < control$ftol_rel) { + out$status <- 7 + } } if (out$status < 0) { warning_( diff --git a/R/em_dnm_mnhmm.R b/R/em_dnm_mnhmm.R index 9ba52b2..7683afb 100644 --- a/R/em_dnm_mnhmm.R +++ b/R/em_dnm_mnhmm.R @@ -165,9 +165,18 @@ em_dnm_mnhmm <- function(model, inits, init_sd, restarts, lambda, x0 = init, eval_f = objectivef, lb = -rep(bound, length(init)), ub = rep(bound, length(init)), opts = control_restart ) - if (fit$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(fit$solution)$gradient^2)) - if (grad_norm < 1e-6) fit$status <- 7 + if (fit$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(fit$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(fit$objective - objectivef(init)$objective) / + (abs(fit$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(fit$objective)) || + relative_change < control_restart$ftol_rel) { + fit$status <- 7 + } } p() fit @@ -255,9 +264,18 @@ em_dnm_mnhmm <- function(model, inits, init_sd, restarts, lambda, lb = -rep(bound, length(unlist(init))), ub = rep(bound, length(unlist(init))), opts = control ) - if (out$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(out$solution)$gradient^2)) - if (grad_norm < 1e-6) out$status <- 6 + if (out$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(out$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(out$objective - objectivef(init)$objective) / + (abs(out$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(out$objective)) || + relative_change < control$ftol_rel) { + out$status <- 7 + } } if (out$status < 0) { warning_( diff --git a/R/em_dnm_nhmm.R b/R/em_dnm_nhmm.R index 7fe39a1..539d55a 100644 --- a/R/em_dnm_nhmm.R +++ b/R/em_dnm_nhmm.R @@ -134,9 +134,18 @@ em_dnm_nhmm <- function(model, inits, init_sd, restarts, lambda, x0 = init, eval_f = objectivef, lb = -rep(bound, length(init)), ub = rep(bound, length(init)), opts = control_restart ) - if (fit$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(fit$solution)$gradient^2)) - if (grad_norm < 1e-6) fit$status <- 7 + if (fit$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(fit$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(fit$objective - objectivef(init)$objective) / + (abs(fit$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(fit$objective)) || + relative_change < control_restart$ftol_rel) { + fit$status <- 7 + } } p() fit @@ -218,9 +227,18 @@ em_dnm_nhmm <- function(model, inits, init_sd, restarts, lambda, lb = -rep(bound, length(unlist(init))), ub = rep(bound, length(unlist(init))), opts = control ) - if (out$status == -1 && need_grad) { - grad_norm <- sqrt(sum(objectivef(out$solution)$gradient^2)) - if (grad_norm < 1e-6) out$status <- 6 + if (out$status == -1) { + grad_norm <- if (need_grad) { + max(abs(objectivef(out$solution)$gradient)) + } else { + 1 + } + relative_change <- abs(out$objective - objectivef(init)$objective) / + (abs(out$objective) + 1e-12) + if ((grad_norm < 1e-8 && is.finite(out$objective)) || + relative_change < control$ftol_rel) { + out$status <- 7 + } } if (out$status < 0) { warning_( diff --git a/R/utilities.R b/R/utilities.R index fc6a73a..14741a5 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -76,7 +76,9 @@ return_msg <- function(code) { if (code == 7) { msg <- paste0( "NLopt terminated with generic error code -1. ", - "Gradient norm was less than 1e-6, so likely converged successfully." + "Maximum absolute value of gradient was less than 1e-6 or relative ", + "change of log-likelihood less than ftol_rel so likely converged ", + "successfully." ) } paste0(x, msg) diff --git a/src/fanhmm_EM.cpp b/src/fanhmm_EM.cpp index 82a2d05..7cc7c6e 100644 --- a/src/fanhmm_EM.cpp +++ b/src/fanhmm_EM.cpp @@ -103,21 +103,27 @@ void fanhmm_sc::mstep_A(const double ftol_abs, const double ftol_rel, nlopt_set_upper_bounds1(opt, bound); double minf; int return_code; - + double ll; + arma::vec grad(n); for (arma::uword s = 0; s < S; s++) { current_s = s; x = arma::join_cols( arma::vectorise(eta_A.slice(s)), arma::vectorise(rho_A(s)) ); + ll = objective_A(x, grad); mstep_iter = 0; - return_code = nlopt_optimize(opt, x.memptr(), &minf); - // nlopt_optimize can return generic failure code due to small gradients - if (return_code == -1) { - arma::vec grad(x.n_elem); - objective_A(x, grad); - if (arma::norm(grad) < 1e-6) { - return_code = 7; + if (arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll)) { + return_code = 1; // already converged (L-BFGS gradient tolerance) + } else { + return_code = nlopt_optimize(opt, x.memptr(), &minf); + // nlopt_optimize can return generic failure code due to small gradients + if (return_code == -1) { + double ll_new = objective_A(x, grad); + double relative_change = abs(ll_new - ll) / (std::abs(ll) + 1e-12); + if ((arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll_new)) || relative_change < ftol_rel) { + return_code = 7; + } } } if (print_level > 2) { @@ -249,20 +255,27 @@ void fanhmm_sc::mstep_B(const double ftol_abs, const double ftol_rel, nlopt_set_upper_bounds1(opt, bound); double minf; int return_code; + double ll; + arma::vec grad(n); for (arma::uword s = 0; s < S; s++) { current_s = s; x = arma::join_cols( arma::vectorise(eta_B.slice(s)), arma::vectorise(rho_B(s)) ); + ll = objective_B(x, grad); mstep_iter = 0; - return_code = nlopt_optimize(opt, x.memptr(), &minf); - // nlopt_optimize can return generic failure code due to small gradients - if (return_code == -1) { - arma::vec grad(x.n_elem); - objective_B(x, grad); - if (arma::norm(grad) < 1e-6) { - return_code = 7; + if (arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll)) { + return_code = 1; // already converged (L-BFGS gradient tolerance) + } else { + return_code = nlopt_optimize(opt, x.memptr(), &minf); + // nlopt_optimize can return generic failure code due to small gradients + if (return_code == -1) { + double ll_new = objective_B(x, grad); + double relative_change = abs(ll_new - ll) / (std::abs(ll) + 1e-12); + if ((arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll_new)) || relative_change < ftol_rel) { + return_code = 7; + } } } if (print_level > 2) { @@ -390,7 +403,7 @@ Rcpp::List EM_LBFGS_fanhmm_singlechannel( // Minimize obj(E_pi, E_A, E_B, eta_pi, eta_A, eta_B, x, X_A, X_B) // with respect to eta_pi, eta_A, eta_B model.mstep_pi( - ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_abs_m, maxeval_m, bound, + ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_rel_m, maxeval_m, bound, print_level_m ); if (model.mstep_return_code != 0) { @@ -399,7 +412,7 @@ Rcpp::List EM_LBFGS_fanhmm_singlechannel( absolute_change, absolute_x_change, relative_x_change); } model.mstep_A( - ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_abs_m, maxeval_m, bound, + ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_rel_m, maxeval_m, bound, print_level_m ); if (model.mstep_return_code != 0) { @@ -408,7 +421,7 @@ Rcpp::List EM_LBFGS_fanhmm_singlechannel( absolute_change, absolute_x_change, relative_x_change); } model.mstep_B( - ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_abs_m, maxeval_m, bound, + ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_rel_m, maxeval_m, bound, print_level_m ); if (model.mstep_return_code != 0) { diff --git a/src/mnhmm_EM.cpp b/src/mnhmm_EM.cpp index 41b8534..55dca75 100644 --- a/src/mnhmm_EM.cpp +++ b/src/mnhmm_EM.cpp @@ -71,13 +71,22 @@ void mnhmm_base::mstep_omega(const double xtol_abs, const double ftol_abs, nlopt_set_lower_bounds1(opt, -bound); nlopt_set_upper_bounds1(opt, bound); double minf; + int return_code; + double ll; + arma::vec grad(eta_omega.n_elem); + ll = objective_omega(x, grad); mstep_iter = 0; - int return_code = nlopt_optimize(opt, x.memptr(), &minf); - if (return_code == -1) { - arma::vec grad(x.n_elem); - objective_omega(x, grad); - if (arma::norm(grad) < 1e-6) { - return_code = 7; + if (arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll)) { + return_code = 1; // already converged (L-BFGS gradient tolerance) + } else { + return_code = nlopt_optimize(opt, x.memptr(), &minf); + // nlopt_optimize can return generic failure code due to small gradients + if (return_code == -1) { + double ll_new = objective_omega(x, grad); + double relative_change = abs(ll_new - ll) / (std::abs(ll) + 1e-12); + if ((arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll_new)) || relative_change < ftol_rel) { + return_code = 7; + } } } if (print_level > 2) { @@ -118,8 +127,8 @@ double mnhmm_base::objective_pi(const arma::vec& x, arma::vec& grad) { return value + 0.5 * lambda * std::pow(arma::norm(x, 2), 2); } -void mnhmm_base::mstep_pi(const double xtol_abs, const double ftol_abs, - const double xtol_rel, const double ftol_rel, +void mnhmm_base::mstep_pi(const double ftol_abs, const double ftol_rel, + const double xtol_abs, const double xtol_rel, const arma::uword maxeval, const double bound, const arma::uword print_level) { mstep_return_code = 0; @@ -155,16 +164,24 @@ void mnhmm_base::mstep_pi(const double xtol_abs, const double ftol_abs, double minf; int return_code; int mstep_iter; + double ll; + arma::vec grad(eta_pi(0).n_elem); for (arma::uword d = 0; d < D; d++) { current_d = d; - arma::vec x(eta_pi(d).memptr(), eta_pi.n_elem, false, true); + arma::vec x(eta_pi(d).memptr(), eta_pi(d).n_elem, false, true); + ll = objective_pi(x, grad); mstep_iter = 0; - return_code = nlopt_optimize(opt, x.memptr(), &minf); - if (return_code == -1) { - arma::vec grad(x.n_elem); - objective_pi(x, grad); - if (arma::norm(grad) < 1e-6) { - return_code = 7; + if (arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll)) { + return_code = 1; // already converged (L-BFGS gradient tolerance) + } else { + return_code = nlopt_optimize(opt, x.memptr(), &minf); + // nlopt_optimize can return generic failure code due to small gradients + if (return_code == -1) { + double ll_new = objective_pi(x, grad); + double relative_change = abs(ll_new - ll) / (std::abs(ll) + 1e-12); + if ((arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll_new)) || relative_change < ftol_rel) { + return_code = 7; + } } } if (print_level > 2) { @@ -265,18 +282,26 @@ void mnhmm_base::mstep_A(const double ftol_abs, const double ftol_rel, double minf; int return_code; int mstep_iter; + double ll; + arma::vec grad(eta_A(0).slice(0).n_elem); for (arma::uword d = 0; d < D; d++) { current_d = d; for (arma::uword s = 0; s < S; s++) { current_s = s; arma::vec x(eta_A(d).slice(s).memptr(), eta_A(d).slice(s).n_elem, false, true); + ll = objective_A(x, grad); mstep_iter = 0; - return_code = nlopt_optimize(opt, x.memptr(), &minf); - if (return_code == -1) { - arma::vec grad(x.n_elem); - objective_A(x, grad); - if (arma::norm(grad) < 1e-6) { - return_code = 7; + if (arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll)) { + return_code = 1; // already converged (L-BFGS gradient tolerance) + } else { + return_code = nlopt_optimize(opt, x.memptr(), &minf); + // nlopt_optimize can return generic failure code due to small gradients + if (return_code == -1) { + double ll_new = objective_A(x, grad); + double relative_change = abs(ll_new - ll) / (std::abs(ll) + 1e-12); + if ((arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll_new)) || relative_change < ftol_rel) { + return_code = 7; + } } } if (print_level > 2) { @@ -385,18 +410,26 @@ void mnhmm_sc::mstep_B(const double ftol_abs, const double ftol_rel, double minf; int return_code; int mstep_iter; + double ll; + arma::vec grad(eta_B(0).slice(0).n_elem); for (arma::uword d = 0; d < D; d++) { current_d = d; for (arma::uword s = 0; s < S; s++) { current_s = s; arma::vec x(eta_B(d).slice(s).memptr(), eta_B(d).slice(s).n_elem, false, true); + ll = objective_B(x, grad); mstep_iter = 0; - return_code = nlopt_optimize(opt, x.memptr(), &minf); - if (return_code == -1) { - arma::vec grad(x.n_elem); - objective_B(x, grad); - if (arma::norm(grad) < 1e-6) { - return_code = 7; + if (arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll)) { + return_code = 1; // already converged (L-BFGS gradient tolerance) + } else { + return_code = nlopt_optimize(opt, x.memptr(), &minf); + // nlopt_optimize can return generic failure code due to small gradients + if (return_code == -1) { + double ll_new = objective_B(x, grad); + double relative_change = abs(ll_new - ll) / (std::abs(ll) + 1e-12); + if ((arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll_new)) || relative_change < ftol_rel) { + return_code = 7; + } } } if (print_level > 2) { @@ -499,6 +532,7 @@ void mnhmm_mc::mstep_B(const double ftol_abs, const double ftol_rel, double minf; int return_code; int mstep_iter; + double ll; for (arma::uword c = 0; c < C; c++) { arma::vec x(eta_B(c, 0).slice(0).n_elem); nlopt_opt opt = nlopt_create(NLOPT_LD_LBFGS, x.n_elem); @@ -511,18 +545,25 @@ void mnhmm_mc::mstep_B(const double ftol_abs, const double ftol_rel, nlopt_set_lower_bounds1(opt, -bound); nlopt_set_upper_bounds1(opt, bound); current_c = c; + arma::vec grad(eta_B(c, 0).slice(0).n_elem); for (arma::uword d = 0; d < D; d++) { current_d = d; for (arma::uword s = 0; s < S; s++) { current_s = s; arma::vec x(eta_B(c, d).slice(s).memptr(), eta_B(c, d).slice(s).n_elem, false, true); + ll = objective_B(x, grad); mstep_iter = 0; - return_code = nlopt_optimize(opt, x.memptr(), &minf); - if (return_code == -1) { - arma::vec grad(x.n_elem); - objective_B(x, grad); - if (arma::norm(grad) < 1e-6) { - return_code = 7; + if (arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll)) { + return_code = 1; // already converged (L-BFGS gradient tolerance) + } else { + return_code = nlopt_optimize(opt, x.memptr(), &minf); + // nlopt_optimize can return generic failure code due to small gradients + if (return_code == -1) { + double ll_new = objective_B(x, grad); + double relative_change = abs(ll_new - ll) / (std::abs(ll) + 1e-12); + if ((arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll_new)) || relative_change < ftol_rel) { + return_code = 7; + } } } if (print_level > 2) { @@ -735,7 +776,7 @@ Rcpp::List EM_LBFGS_mnhmm_singlechannel( penalty_term = 0.5 * lambda * std::pow(arma::norm(new_pars, 2), 2); ll_new -= penalty_term; ll_new /= model.n_obs; - relative_change = (ll_new - ll) / (std::abs(ll) + 1e-8); + relative_change = (ll_new - ll) / (std::abs(ll) + 1e-12); absolute_change = ll_new - ll; absolute_x_change = arma::max(arma::abs(new_pars - pars)); relative_x_change = arma::norm(new_pars - pars, 1) / arma::norm(pars, 1); @@ -985,7 +1026,7 @@ Rcpp::List EM_LBFGS_mnhmm_multichannel( penalty_term = 0.5 * lambda * std::pow(arma::norm(new_pars, 2), 2); ll_new -= penalty_term; ll_new /= model.n_obs; - relative_change = (ll_new - ll) / (std::abs(ll) + 1e-8); + relative_change = (ll_new - ll) / (std::abs(ll) + 1e-12); absolute_change = ll_new - ll; absolute_x_change = arma::max(arma::abs(new_pars - pars)); relative_x_change = arma::norm(new_pars - pars, 1) / arma::norm(pars, 1); diff --git a/src/nhmm_EM.cpp b/src/nhmm_EM.cpp index b7887fd..48ff3c9 100644 --- a/src/nhmm_EM.cpp +++ b/src/nhmm_EM.cpp @@ -36,8 +36,8 @@ double nhmm_base::objective_pi(const arma::vec& x, arma::vec& grad) { grad += lambda * x; return value + 0.5 * lambda * std::pow(arma::norm(x, 2), 2); } -void nhmm_base::mstep_pi(const double xtol_abs, const double ftol_abs, - const double xtol_rel, const double ftol_rel, +void nhmm_base::mstep_pi(const double ftol_abs, const double ftol_rel, + const double xtol_abs, const double xtol_rel, const arma::uword maxeval, const double bound, const arma::uword print_level) { mstep_return_code = 0; @@ -69,14 +69,22 @@ void nhmm_base::mstep_pi(const double xtol_abs, const double ftol_abs, nlopt_set_lower_bounds1(opt, -bound); nlopt_set_upper_bounds1(opt, bound); double minf; + int return_code; + double ll; + arma::vec grad(eta_pi.n_elem); + ll = objective_pi(x, grad); mstep_iter = 0; - int return_code = nlopt_optimize(opt, x.memptr(), &minf); - // nlopt_optimize can return generic failure code due to small gradients - if (return_code == -1) { - arma::vec grad(x.n_elem); - objective_pi(x, grad); - if (arma::norm(grad) < 1e-6) { - return_code = 7; + if (arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll)) { + return_code = 1; // already converged (L-BFGS gradient tolerance) + } else { + return_code = nlopt_optimize(opt, x.memptr(), &minf); + // nlopt_optimize can return generic failure code due to small gradients + if (return_code == -1) { + double ll_new = objective_pi(x, grad); + double relative_change = abs(ll_new - ll) / (std::abs(ll) + 1e-12); + if ((arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll_new)) || relative_change < ftol_rel) { + return_code = 7; + } } } if (print_level > 2) { @@ -169,18 +177,23 @@ void nhmm_base::mstep_A(const double ftol_abs, const double ftol_rel, nlopt_set_upper_bounds1(opt, bound); double minf; int return_code; - + double ll; + arma::vec grad(eta_A.slice(0).n_elem); for (arma::uword s = 0; s < S; s++) { current_s = s; arma::vec x(eta_A.slice(s).memptr(), eta_A.slice(s).n_elem, false, true); + ll = objective_A(x, grad); mstep_iter = 0; - return_code = nlopt_optimize(opt, x.memptr(), &minf); - // nlopt_optimize can return generic failure code due to small gradients - if (return_code == -1) { - arma::vec grad(x.n_elem); - objective_A(x, grad); - if (arma::norm(grad) < 1e-6) { - return_code = 7; + if (arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll)) { + return_code = 1; // already converged (L-BFGS gradient tolerance) + } else { + return_code = nlopt_optimize(opt, x.memptr(), &minf); + if (return_code == -1) { + double ll_new = objective_A(x, grad); + double relative_change = abs(ll_new - ll) / (std::abs(ll) + 1e-12); + if ((arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll_new)) || relative_change < ftol_rel) { + return_code = 7; + } } } if (print_level > 2) { @@ -236,7 +249,6 @@ double nhmm_sc::objective_B(const arma::vec& x, arma::vec& grad) { } } grad += lambda * x; - return value + 0.5 * lambda * std::pow(arma::norm(x, 2), 2); } void nhmm_sc::mstep_B(const double ftol_abs, const double ftol_rel, @@ -281,17 +293,25 @@ void nhmm_sc::mstep_B(const double ftol_abs, const double ftol_rel, nlopt_set_upper_bounds1(opt, bound); double minf; int return_code; + double ll; + arma::vec grad(eta_B.slice(0).n_elem); for (arma::uword s = 0; s < S; s++) { current_s = s; arma::vec x(eta_B.slice(s).memptr(), eta_B.slice(s).n_elem, false, true); + ll = objective_B(x, grad); mstep_iter = 0; - return_code = nlopt_optimize(opt, x.memptr(), &minf); - // nlopt_optimize can return generic failure code due to small gradients - if (return_code == -1) { - arma::vec grad(x.n_elem); - objective_B(x, grad); - if (arma::norm(grad) < 1e-6) { - return_code = 7; + if (arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll)) { + return_code = 1; // already converged (L-BFGS gradient tolerance) + } else { + return_code = nlopt_optimize(opt, x.memptr(), &minf); + if (return_code == -1) { + // nlopt_optimize can return generic failure code, check if still converged + // doesn't really matter if fails if at least one M-step is succesful (partial M-step) + double ll_new = objective_B(x, grad); + double relative_change = std::abs(ll_new - ll) / (std::abs(ll) + 1e-12); + if ((arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll_new)) || relative_change < ftol_rel) { + return_code = 7; + } } } if (print_level > 2) { @@ -387,7 +407,9 @@ void nhmm_mc::mstep_B(const double ftol_abs, const double ftol_rel, }; double minf; int return_code; + double ll; for (arma::uword c = 0; c < C; c++) { + arma::vec grad(eta_B(c).slice(0).n_elem); nlopt_opt opt = nlopt_create(NLOPT_LD_LBFGS, eta_B(c).slice(0).n_elem); nlopt_set_min_objective(opt, objective_B_wrapper, this); nlopt_set_xtol_abs1(opt, xtol_abs); @@ -401,13 +423,18 @@ void nhmm_mc::mstep_B(const double ftol_abs, const double ftol_rel, for (arma::uword s = 0; s < S; s++) { current_s = s; arma::vec x(eta_B(c).slice(s).memptr(), eta_B(c).slice(s).n_elem, false, true); + ll = objective_B(x, grad); mstep_iter = 0; - return_code = nlopt_optimize(opt, x.memptr(), &minf); + if (arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll)) { + return_code = 1; // already converged (L-BFGS gradient tolerance) + } else { + return_code = nlopt_optimize(opt, x.memptr(), &minf); + } // nlopt_optimize can return generic failure code due to small gradients if (return_code == -1) { - arma::vec grad(x.n_elem); - objective_B(x, grad); - if (arma::norm(grad) < 1e-6) { + double ll_new = objective_B(x, grad); + double relative_change = abs(ll_new - ll) / (std::abs(ll) + 1e-12); + if ((arma::norm(grad, "inf") < 1e-8 && std::isfinite(ll_new)) || relative_change < ftol_rel) { return_code = 7; } } @@ -524,7 +551,7 @@ Rcpp::List EM_LBFGS_nhmm_singlechannel( // Minimize obj(E_pi, E_A, E_B, eta_pi, eta_A, eta_B, x, X_A, X_B) // with respect to eta_pi, eta_A, eta_B model.mstep_pi( - ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_abs_m, maxeval_m, bound, + ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_rel_m, maxeval_m, bound, print_level_m ); if (model.mstep_return_code != 0) { @@ -533,7 +560,7 @@ Rcpp::List EM_LBFGS_nhmm_singlechannel( absolute_change, absolute_x_change, relative_x_change); } model.mstep_A( - ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_abs_m, maxeval_m, bound, + ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_rel_m, maxeval_m, bound, print_level_m ); if (model.mstep_return_code != 0) { @@ -542,7 +569,7 @@ Rcpp::List EM_LBFGS_nhmm_singlechannel( absolute_change, absolute_x_change, relative_x_change); } model.mstep_B( - ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_abs_m, maxeval_m, bound, + ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_rel_m, maxeval_m, bound, print_level_m ); if (model.mstep_return_code != 0) { @@ -587,7 +614,7 @@ Rcpp::List EM_LBFGS_nhmm_singlechannel( ll_new -= penalty_term; ll_new /= model.n_obs; - relative_change = (ll_new - ll) / (std::abs(ll) + 1e-8); + relative_change = (ll_new - ll) / (std::abs(ll) + 1e-12); absolute_change = ll_new - ll; absolute_x_change = arma::max(arma::abs(pars_new - pars)); relative_x_change = arma::norm(pars_new - pars, 1) / arma::norm(pars, 1); @@ -737,7 +764,7 @@ Rcpp::List EM_LBFGS_nhmm_multichannel( // Minimize obj(E_pi, E_A, E_B, eta_pi, eta_A, eta_B, x, X_A, X_B) // with respect to eta_pi, eta_A, eta_B model.mstep_pi( - ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_abs_m, maxeval_m, bound, + ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_rel_m, maxeval_m, bound, print_level_m ); if (model.mstep_return_code != 0) { @@ -746,7 +773,7 @@ Rcpp::List EM_LBFGS_nhmm_multichannel( absolute_change, absolute_x_change, relative_x_change); } model.mstep_A( - ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_abs_m, maxeval_m, bound, + ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_rel_m, maxeval_m, bound, print_level_m ); if (model.mstep_return_code != 0) { @@ -755,7 +782,7 @@ Rcpp::List EM_LBFGS_nhmm_multichannel( absolute_change, absolute_x_change, relative_x_change); } model.mstep_B( - ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_abs_m, maxeval_m, bound, + ftol_abs_m, ftol_rel_m, xtol_abs_m, xtol_rel_m, maxeval_m, bound, print_level_m ); if (model.mstep_return_code != 0) { @@ -804,7 +831,7 @@ Rcpp::List EM_LBFGS_nhmm_multichannel( ll_new -= penalty_term; ll_new /= model.n_obs; - relative_change = (ll_new - ll) / (std::abs(ll) + 1e-8); + relative_change = (ll_new - ll) / (std::abs(ll) + 1e-12); absolute_change = ll_new - ll; absolute_x_change = arma::max(arma::abs(pars_new - pars)); relative_x_change = arma::norm(pars_new - pars, 1) / arma::norm(pars, 1);