Skip to content

Commit

Permalink
Merge pull request #36 from SciNim/addCurveFitUncertainties
Browse files Browse the repository at this point in the history
add chi2 + add uncertainties to levmarq
  • Loading branch information
HugoGranstrom committed Jan 5, 2023
2 parents 9d2f6ab + fd4cd18 commit 334683a
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 14 deletions.
2 changes: 1 addition & 1 deletion numericalnim.nimble
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Package Information
version = "0.8.5"
version = "0.8.6"
author = "Hugo Granström"
description = "A collection of numerical methods written in Nim. Current features: integration, ode, optimization."
license = "MIT"
Expand Down
162 changes: 150 additions & 12 deletions src/numericalnim/optimize.nim
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import std/[strformat, sequtils, math, deques]
import arraymancer
import ./differentiate
import
./differentiate,
./utils

when not defined(nimHasEffectsOf):
{.pragma: effectsOf.}
Expand Down Expand Up @@ -126,9 +128,9 @@ proc secant*(f: proc(x: float64): float64, start: array[2, float64], precision:
raise newException(ArithmeticError, "Maximum iterations for Secant method exceeded")
return xCurrent

##############################
## Multidimensional methods ##
##############################
# ######################## #
# Multidimensional methods #
# ######################## #

type LineSearchCriterion* = enum
Armijo, Wolfe, WolfeStrong, NoLineSearch
Expand All @@ -146,36 +148,71 @@ type
LBFGSOptions*[U] = object
savedIterations*: int

proc optimOptions*[U](tol: U = U(1e-6), alpha: U = U(1), lambda0: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
proc optimOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
## Returns a vanilla OptimOptions
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
## - alpha: The step size.
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
## Else a more accurate but slowe second order finite difference scheme will be used.
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
## - lineSearchCriterion: Which line search method to use.
result.tol = tol
result.alpha = alpha
result.lambda0 = lambda0
result.fastMode = fastMode
result.maxIterations = maxIterations
result.lineSearchCriterion = lineSearchCriterion

proc steepestDescentOptions*[U](tol: U = U(1e-6), alpha: U = U(0.001), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
## Returns a Steepest Descent OptimOptions
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
## - alpha: The step size.
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
## Else a more accurate but slowe second order finite difference scheme will be used.
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
## - lineSearchCriterion: Which line search method to use.
result.tol = tol
result.alpha = alpha
result.fastMode = fastMode
result.maxIterations = maxIterations
result.lineSearchCriterion = lineSearchCriterion

proc newtonOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
## Returns a Newton OptimOptions
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
## - alpha: The step size.
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
## Else a more accurate but slowe second order finite difference scheme will be used.
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
## - lineSearchCriterion: Which line search method to use.
result.tol = tol
result.alpha = alpha
result.fastMode = fastMode
result.maxIterations = maxIterations
result.lineSearchCriterion = lineSearchCriterion

proc bfgsOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
## Returns a BFGS OptimOptions
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
## - alpha: The step size.
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
## Else a more accurate but slowe second order finite difference scheme will be used.
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
## - lineSearchCriterion: Which line search method to use.
result.tol = tol
result.alpha = alpha
result.fastMode = fastMode
result.maxIterations = maxIterations
result.lineSearchCriterion = lineSearchCriterion

proc lbfgsOptions*[U](savedIterations: int = 10, tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, LBFGSOptions[U]] =
## Returns a LBFGS OptimOptions
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
## - alpha: The step size.
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
## Else a more accurate but slowe second order finite difference scheme will be used.
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
## - lineSearchCriterion: Which line search method to use.
## - savedIterations: Number of past iterations to save. The higher the value, the better but slower steps.
result.tol = tol
result.alpha = alpha
result.fastMode = fastMode
Expand All @@ -184,6 +221,14 @@ proc lbfgsOptions*[U](savedIterations: int = 10, tol: U = U(1e-6), alpha: U = U(
result.algoOptions.savedIterations = savedIterations

proc levmarqOptions*[U](lambda0: U = U(1), tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, LevMarqOptions[U]] =
## Returns a levmarq OptimOptions
## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
## - alpha: The step size.
## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used.
## Else a more accurate but slowe second order finite difference scheme will be used.
## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
## - lineSearchCriterion: Which line search method to use.
## - lambda0: Starting value of dampening parameter
result.tol = tol
result.alpha = alpha
result.fastMode = fastMode
Expand Down Expand Up @@ -253,7 +298,15 @@ template analyticOrNumericGradient(analytic, f, x, options: untyped): untyped =
analytic(x)

proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = steepestDescentOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
## Minimize scalar-valued function f.
## Steepest descent method for optimization.
##
## Inputs:
## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
## - options: Options object (see `steepestDescentOptions` for constructing one)
## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
##
## Returns:
## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
var alpha = options.alpha
var x = x0.clone()
var fNorm = abs(f(x0))
Expand All @@ -275,6 +328,15 @@ proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U],
result = x

proc newton*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = newtonOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
## Newton's method for optimization.
##
## Inputs:
## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
## - options: Options object (see `newtonOptions` for constructing one)
## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
##
## Returns:
## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
var alpha = options.alpha
var x = x0.clone()
var fNorm = abs(f(x))
Expand Down Expand Up @@ -342,6 +404,15 @@ proc bfgs_old*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha:
result = x

proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = bfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
## BFGS (Broyden–Fletcher–Goldfarb–Shanno) method for optimization.
##
## Inputs:
## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
## - options: Options object (see `bfgsOptions` for constructing one)
## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
##
## Returns:
## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
# Use gemm and gemv with preallocated Tensors and setting beta = 0
var alpha = options.alpha
var x = x0.clone()
Expand Down Expand Up @@ -421,15 +492,24 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: O
#echo iters, " iterations done!"
result = x

proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], m: int = 10, options: OptimOptions[U, LBFGSOptions[U]] = lbfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, LBFGSOptions[U]] = lbfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
## LBFGS (Limited-memory Broyden–Fletcher–Goldfarb–Shanno) method for optimization.
##
## Inputs:
## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
## - options: Options object (see `lbfgsOptions` for constructing one)
## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
##
## Returns:
## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
var alpha = options.alpha
var x = x0.clone()
let xLen = x.shape[0]
var fNorm = abs(f(x))
var gradient = 0.01*analyticOrNumericGradient(analyticGradient, f, x0, options)
var gradNorm = vectorNorm(gradient)
var iters: int
#let m = 10 # number of past iterations to save
let m = options.algoOptions.savedIterations # number of past iterations to save
var sk_queue = initDeque[Tensor[U]](m)
var yk_queue = initDeque[Tensor[T]](m)
# the problem is the first iteration as the gradient is huge and no adjustments are made
Expand Down Expand Up @@ -475,7 +555,20 @@ proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], m: int =
#echo iters, " iterations done!"
result = x

proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U]()): Tensor[U] =
proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U](), yError: Tensor[T] = ones_like(yData)): Tensor[U] =
## Levenberg-Marquardt for non-linear least square solving. Basically it fits parameters of a function to data samples.
##
## Input:
## - f: The function you want to fit the data to. The first argument should be a 1D Tensor with the values of the parameters
## and the second argument is the value if the independent variable to evaluate the function at.
## - params0: The starting guess for the parameter values as a 1D Tensor.
## - yData: The measured values of the dependent variable as 1D Tensor.
## - xData: The values of the independent variable as 1D Tensor.
## - options: Object with all the options like `tol` and `lambda0`. (see `levmarqOptions`)
## - yError: The uncertainties of the `yData` as 1D Tensor. Ideally these should be the 1σ standard deviation.
##
## Returns:
## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
assert xData.rank == 1
assert yData.rank == 1
assert params0.rank == 1
Expand All @@ -486,8 +579,8 @@ proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Te

let residualFunc = # proc that returns the residual vector
proc (params: Tensor[U]): Tensor[T] =
result = map2_inline(xData, yData):
f(params, x) - y
result = map3_inline(xData, yData, yError):
(f(params, x) - y) / z

let errorFunc = # proc that returns the scalar error
proc (params: Tensor[U]): T =
Expand Down Expand Up @@ -524,6 +617,51 @@ proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Te
result = params


proc inv[T](t: Tensor[T]): Tensor[T] =
result = solve(t, eye[T](t.shape[0]))

proc getDiag[T](t: Tensor[T]): Tensor[T] =
let n = t.shape[0]
result = newTensor[T](n)
for i in 0 ..< n:
result[i] = t[i,i]

proc paramUncertainties*[U; T](params: Tensor[U], fitFunc: proc(params: Tensor[U], x: U): T, yData: Tensor[T], xData: Tensor[U], yError: Tensor[T], returnFullCov = false): Tensor[T] =
## Returns the whole covariance matrix or only the diagonal elements for the parameters in `params`.
##
## Inputs:
## - params: The parameters in a 1D Tensor that the uncertainties are wanted for.
## - fitFunc: The function used for fitting the parameters. (see `levmarq` for more)
## - yData: The measured values of the dependent variable as 1D Tensor.
## - xData: The values of the independent variable as 1D Tensor.
## - yError: The uncertainties of the `yData` as 1D Tensor. Ideally these should be the 1σ standard deviation.
## - returnFullConv: If true, the full covariance matrix will be returned as a 2D Tensor, else only the diagonal elements will be returned as a 1D Tensor.
##
## Returns:
##
## The uncertainties of the parameters in the form of a covariance matrix (or only the diagonal elements).
##
## Note: it is the covariance that is returned, so if you want the standard deviation you have to
## take the square root of it.
proc fError(params: Tensor[U]): T =
let yCurve = xData.map_inline:
fitFunc(params, x)
result = chi2(yData, yCurve, yError)

let dof = xData.size - params.size
let sigma2 = fError(params) / T(dof)
let H = tensorHessian(fError, params)
let cov = sigma2 * H.inv()

if returnFullCov:
result = cov
else:
result = cov.getDiag()






when isMainModule:
import benchy
Expand Down
16 changes: 16 additions & 0 deletions src/numericalnim/utils.nim
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,22 @@ proc hermiteInterpolate*[T](x: openArray[float], t: openArray[float],
raise newException(ValueError, &"{a} not in interval {min(t)} - {max(t)}")



proc chi2*[T](yData, yFit, yError: seq[T] or Tensor[T]): T =
when yData is Tensor:
assert yData.rank == 1
assert yFit.rank == 1
assert yError.rank == 1
assert yData.size == yFit.size
assert yFit.size == yError.size
let N = yData.size
else:
let N = yData.len
result = T(0)
for i in 0 ..< N:
let temp = (yData[i] - yFit[i]) / yError[i]
result += temp * temp

proc delete*[T](s: var seq[T], idx: seq[int]) =
## Deletes the elements of seq s at indices idx.
## idx must not contain duplicates!
Expand Down
15 changes: 15 additions & 0 deletions tests/test_optimize.nim
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,19 @@ suite "Multi-dim":
for x in abs(paramsSol - correctParams):
check x < 1.3e-3

test "levmarq with yError":
let yError = ones_like(yData) * 1e-2
let paramsSol = levmarq(fitFunc, params0, xData, yData, yError=yError)
for x in abs(paramsSol - correctParams):
check x < 1.3e-3

test "paramUncertainties":
let yError = ones_like(yData) * 1e-2
let paramsSol = levmarq(fitFunc, params0, xData, yData, yError=yError)

let uncertainties = paramUncertainties(paramsSol, fitFunc, yData, xData, yError).sqrt()

for (unc, err) in zip(uncertainties, abs(paramsSol - correctParams)):
check abs(unc / err) in 0.79 .. 3.6


25 changes: 24 additions & 1 deletion tests/test_utils.nim
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import unittest, math, sequtils, algorithm
import unittest, math, sequtils, algorithm, random
import arraymancer
import ./numericalnim

Expand Down Expand Up @@ -91,3 +91,26 @@ test "meshgrid":
let grid = meshgrid(x, y, z)
check grid == [[0, 2, 4], [1, 2, 4], [0, 3, 4], [1, 3, 4], [0, 2, 5], [1, 2, 5], [0, 3, 5], [1, 3, 5]].toTensor

test "chi2 Tensor":
randomize(1337)
let N = 1000
let sigma = 1.23
let yMeasure = newSeqWith(N, gauss(0.0, sigma)).toTensor
let yCorrect = zeros[float](N)
let yError = ones[float](N) * sigma
let chi = chi2(yMeasure, yCorrect, yError)
# Check that the mean χ² is around 1
check chi / N.float in 0.90 .. 1.1

test "chi2 Seq":
randomize(1337)
let N = 1000
let sigma = 1.23
let yMeasure = newSeqWith(N, gauss(0.0, sigma))
let yCorrect = newSeqWith(N, 0.0)
let yError = newSeqWith(N, sigma)
let chi = chi2(yMeasure, yCorrect, yError)
# Check that the mean χ² is around 1
check chi / N.float in 0.90 .. 1.1


0 comments on commit 334683a

Please sign in to comment.