diff --git a/numericalnim.nimble b/numericalnim.nimble index 3aed9a0..59ddbec 100644 --- a/numericalnim.nimble +++ b/numericalnim.nimble @@ -1,5 +1,5 @@ # Package Information -version = "0.8.5" +version = "0.8.6" author = "Hugo Granström" description = "A collection of numerical methods written in Nim. Current features: integration, ode, optimization." license = "MIT" diff --git a/src/numericalnim/optimize.nim b/src/numericalnim/optimize.nim index 1d45662..74b3b7e 100644 --- a/src/numericalnim/optimize.nim +++ b/src/numericalnim/optimize.nim @@ -1,6 +1,8 @@ import std/[strformat, sequtils, math, deques] import arraymancer -import ./differentiate +import + ./differentiate, + ./utils when not defined(nimHasEffectsOf): {.pragma: effectsOf.} @@ -126,9 +128,9 @@ proc secant*(f: proc(x: float64): float64, start: array[2, float64], precision: raise newException(ArithmeticError, "Maximum iterations for Secant method exceeded") return xCurrent -############################## -## Multidimensional methods ## -############################## +# ######################## # +# Multidimensional methods # +# ######################## # type LineSearchCriterion* = enum Armijo, Wolfe, WolfeStrong, NoLineSearch @@ -146,15 +148,28 @@ type LBFGSOptions*[U] = object savedIterations*: int -proc optimOptions*[U](tol: U = U(1e-6), alpha: U = U(1), lambda0: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] = +proc optimOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] = + ## Returns a vanilla OptimOptions + ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`. + ## - alpha: The step size. + ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. + ## Else a more accurate but slowe second order finite difference scheme will be used. + ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached. + ## - lineSearchCriterion: Which line search method to use. result.tol = tol result.alpha = alpha - result.lambda0 = lambda0 result.fastMode = fastMode result.maxIterations = maxIterations result.lineSearchCriterion = lineSearchCriterion proc steepestDescentOptions*[U](tol: U = U(1e-6), alpha: U = U(0.001), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] = + ## Returns a Steepest Descent OptimOptions + ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`. + ## - alpha: The step size. + ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. + ## Else a more accurate but slowe second order finite difference scheme will be used. + ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached. + ## - lineSearchCriterion: Which line search method to use. result.tol = tol result.alpha = alpha result.fastMode = fastMode @@ -162,6 +177,13 @@ proc steepestDescentOptions*[U](tol: U = U(1e-6), alpha: U = U(0.001), fastMode: result.lineSearchCriterion = lineSearchCriterion proc newtonOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] = + ## Returns a Newton OptimOptions + ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`. + ## - alpha: The step size. + ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. + ## Else a more accurate but slowe second order finite difference scheme will be used. + ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached. + ## - lineSearchCriterion: Which line search method to use. result.tol = tol result.alpha = alpha result.fastMode = fastMode @@ -169,6 +191,13 @@ proc newtonOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false result.lineSearchCriterion = lineSearchCriterion proc bfgsOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] = + ## Returns a BFGS OptimOptions + ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`. + ## - alpha: The step size. + ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. + ## Else a more accurate but slowe second order finite difference scheme will be used. + ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached. + ## - lineSearchCriterion: Which line search method to use. result.tol = tol result.alpha = alpha result.fastMode = fastMode @@ -176,6 +205,14 @@ proc bfgsOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, result.lineSearchCriterion = lineSearchCriterion proc lbfgsOptions*[U](savedIterations: int = 10, tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, LBFGSOptions[U]] = + ## Returns a LBFGS OptimOptions + ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`. + ## - alpha: The step size. + ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. + ## Else a more accurate but slowe second order finite difference scheme will be used. + ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached. + ## - lineSearchCriterion: Which line search method to use. + ## - savedIterations: Number of past iterations to save. The higher the value, the better but slower steps. result.tol = tol result.alpha = alpha result.fastMode = fastMode @@ -184,6 +221,14 @@ proc lbfgsOptions*[U](savedIterations: int = 10, tol: U = U(1e-6), alpha: U = U( result.algoOptions.savedIterations = savedIterations proc levmarqOptions*[U](lambda0: U = U(1), tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, LevMarqOptions[U]] = + ## Returns a levmarq OptimOptions + ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`. + ## - alpha: The step size. + ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. + ## Else a more accurate but slowe second order finite difference scheme will be used. + ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached. + ## - lineSearchCriterion: Which line search method to use. + ## - lambda0: Starting value of dampening parameter result.tol = tol result.alpha = alpha result.fastMode = fastMode @@ -253,7 +298,15 @@ template analyticOrNumericGradient(analytic, f, x, options: untyped): untyped = analytic(x) proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = steepestDescentOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] = - ## Minimize scalar-valued function f. + ## Steepest descent method for optimization. + ## + ## Inputs: + ## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar. + ## - options: Options object (see `steepestDescentOptions` for constructing one) + ## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead. + ## + ## Returns: + ## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached. var alpha = options.alpha var x = x0.clone() var fNorm = abs(f(x0)) @@ -275,6 +328,15 @@ proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], result = x proc newton*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = newtonOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] = + ## Newton's method for optimization. + ## + ## Inputs: + ## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar. + ## - options: Options object (see `newtonOptions` for constructing one) + ## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead. + ## + ## Returns: + ## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached. var alpha = options.alpha var x = x0.clone() var fNorm = abs(f(x)) @@ -342,6 +404,15 @@ proc bfgs_old*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: result = x proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = bfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] = + ## BFGS (Broyden–Fletcher–Goldfarb–Shanno) method for optimization. + ## + ## Inputs: + ## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar. + ## - options: Options object (see `bfgsOptions` for constructing one) + ## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead. + ## + ## Returns: + ## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached. # Use gemm and gemv with preallocated Tensors and setting beta = 0 var alpha = options.alpha var x = x0.clone() @@ -421,7 +492,16 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: O #echo iters, " iterations done!" result = x -proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], m: int = 10, options: OptimOptions[U, LBFGSOptions[U]] = lbfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] = +proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, LBFGSOptions[U]] = lbfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] = + ## LBFGS (Limited-memory Broyden–Fletcher–Goldfarb–Shanno) method for optimization. + ## + ## Inputs: + ## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar. + ## - options: Options object (see `lbfgsOptions` for constructing one) + ## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead. + ## + ## Returns: + ## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached. var alpha = options.alpha var x = x0.clone() let xLen = x.shape[0] @@ -429,7 +509,7 @@ proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], m: int = var gradient = 0.01*analyticOrNumericGradient(analyticGradient, f, x0, options) var gradNorm = vectorNorm(gradient) var iters: int - #let m = 10 # number of past iterations to save + let m = options.algoOptions.savedIterations # number of past iterations to save var sk_queue = initDeque[Tensor[U]](m) var yk_queue = initDeque[Tensor[T]](m) # the problem is the first iteration as the gradient is huge and no adjustments are made @@ -475,7 +555,20 @@ proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], m: int = #echo iters, " iterations done!" result = x -proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U]()): Tensor[U] = +proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U](), yError: Tensor[T] = ones_like(yData)): Tensor[U] = + ## Levenberg-Marquardt for non-linear least square solving. Basically it fits parameters of a function to data samples. + ## + ## Input: + ## - f: The function you want to fit the data to. The first argument should be a 1D Tensor with the values of the parameters + ## and the second argument is the value if the independent variable to evaluate the function at. + ## - params0: The starting guess for the parameter values as a 1D Tensor. + ## - yData: The measured values of the dependent variable as 1D Tensor. + ## - xData: The values of the independent variable as 1D Tensor. + ## - options: Object with all the options like `tol` and `lambda0`. (see `levmarqOptions`) + ## - yError: The uncertainties of the `yData` as 1D Tensor. Ideally these should be the 1σ standard deviation. + ## + ## Returns: + ## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached. assert xData.rank == 1 assert yData.rank == 1 assert params0.rank == 1 @@ -486,8 +579,8 @@ proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Te let residualFunc = # proc that returns the residual vector proc (params: Tensor[U]): Tensor[T] = - result = map2_inline(xData, yData): - f(params, x) - y + result = map3_inline(xData, yData, yError): + (f(params, x) - y) / z let errorFunc = # proc that returns the scalar error proc (params: Tensor[U]): T = @@ -524,6 +617,51 @@ proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Te result = params +proc inv[T](t: Tensor[T]): Tensor[T] = + result = solve(t, eye[T](t.shape[0])) + +proc getDiag[T](t: Tensor[T]): Tensor[T] = + let n = t.shape[0] + result = newTensor[T](n) + for i in 0 ..< n: + result[i] = t[i,i] + +proc paramUncertainties*[U; T](params: Tensor[U], fitFunc: proc(params: Tensor[U], x: U): T, yData: Tensor[T], xData: Tensor[U], yError: Tensor[T], returnFullCov = false): Tensor[T] = + ## Returns the whole covariance matrix or only the diagonal elements for the parameters in `params`. + ## + ## Inputs: + ## - params: The parameters in a 1D Tensor that the uncertainties are wanted for. + ## - fitFunc: The function used for fitting the parameters. (see `levmarq` for more) + ## - yData: The measured values of the dependent variable as 1D Tensor. + ## - xData: The values of the independent variable as 1D Tensor. + ## - yError: The uncertainties of the `yData` as 1D Tensor. Ideally these should be the 1σ standard deviation. + ## - returnFullConv: If true, the full covariance matrix will be returned as a 2D Tensor, else only the diagonal elements will be returned as a 1D Tensor. + ## + ## Returns: + ## + ## The uncertainties of the parameters in the form of a covariance matrix (or only the diagonal elements). + ## + ## Note: it is the covariance that is returned, so if you want the standard deviation you have to + ## take the square root of it. + proc fError(params: Tensor[U]): T = + let yCurve = xData.map_inline: + fitFunc(params, x) + result = chi2(yData, yCurve, yError) + + let dof = xData.size - params.size + let sigma2 = fError(params) / T(dof) + let H = tensorHessian(fError, params) + let cov = sigma2 * H.inv() + + if returnFullCov: + result = cov + else: + result = cov.getDiag() + + + + + when isMainModule: import benchy diff --git a/src/numericalnim/utils.nim b/src/numericalnim/utils.nim index d713e7c..9640821 100644 --- a/src/numericalnim/utils.nim +++ b/src/numericalnim/utils.nim @@ -303,6 +303,22 @@ proc hermiteInterpolate*[T](x: openArray[float], t: openArray[float], raise newException(ValueError, &"{a} not in interval {min(t)} - {max(t)}") + +proc chi2*[T](yData, yFit, yError: seq[T] or Tensor[T]): T = + when yData is Tensor: + assert yData.rank == 1 + assert yFit.rank == 1 + assert yError.rank == 1 + assert yData.size == yFit.size + assert yFit.size == yError.size + let N = yData.size + else: + let N = yData.len + result = T(0) + for i in 0 ..< N: + let temp = (yData[i] - yFit[i]) / yError[i] + result += temp * temp + proc delete*[T](s: var seq[T], idx: seq[int]) = ## Deletes the elements of seq s at indices idx. ## idx must not contain duplicates! diff --git a/tests/test_optimize.nim b/tests/test_optimize.nim index a9bcbaf..fa1222b 100644 --- a/tests/test_optimize.nim +++ b/tests/test_optimize.nim @@ -144,4 +144,19 @@ suite "Multi-dim": for x in abs(paramsSol - correctParams): check x < 1.3e-3 + test "levmarq with yError": + let yError = ones_like(yData) * 1e-2 + let paramsSol = levmarq(fitFunc, params0, xData, yData, yError=yError) + for x in abs(paramsSol - correctParams): + check x < 1.3e-3 + + test "paramUncertainties": + let yError = ones_like(yData) * 1e-2 + let paramsSol = levmarq(fitFunc, params0, xData, yData, yError=yError) + + let uncertainties = paramUncertainties(paramsSol, fitFunc, yData, xData, yError).sqrt() + + for (unc, err) in zip(uncertainties, abs(paramsSol - correctParams)): + check abs(unc / err) in 0.79 .. 3.6 + diff --git a/tests/test_utils.nim b/tests/test_utils.nim index 8edd411..008a365 100644 --- a/tests/test_utils.nim +++ b/tests/test_utils.nim @@ -1,4 +1,4 @@ -import unittest, math, sequtils, algorithm +import unittest, math, sequtils, algorithm, random import arraymancer import ./numericalnim @@ -91,3 +91,26 @@ test "meshgrid": let grid = meshgrid(x, y, z) check grid == [[0, 2, 4], [1, 2, 4], [0, 3, 4], [1, 3, 4], [0, 2, 5], [1, 2, 5], [0, 3, 5], [1, 3, 5]].toTensor +test "chi2 Tensor": + randomize(1337) + let N = 1000 + let sigma = 1.23 + let yMeasure = newSeqWith(N, gauss(0.0, sigma)).toTensor + let yCorrect = zeros[float](N) + let yError = ones[float](N) * sigma + let chi = chi2(yMeasure, yCorrect, yError) + # Check that the mean χ² is around 1 + check chi / N.float in 0.90 .. 1.1 + +test "chi2 Seq": + randomize(1337) + let N = 1000 + let sigma = 1.23 + let yMeasure = newSeqWith(N, gauss(0.0, sigma)) + let yCorrect = newSeqWith(N, 0.0) + let yError = newSeqWith(N, sigma) + let chi = chi2(yMeasure, yCorrect, yError) + # Check that the mean χ² is around 1 + check chi / N.float in 0.90 .. 1.1 + +