Merge pull request #36 from SciNim/addCurveFitUncertainties

add chi2 + add uncertainties to levmarq
SciNim · Jan 5, 2023 · 334683a · 334683a
2 parents 9d2f6ab + fd4cd18
commit 334683a
Show file tree

Hide file tree

Showing 5 changed files with 206 additions and 14 deletions.
diff --git a/numericalnim.nimble b/numericalnim.nimble
@@ -1,5 +1,5 @@
 # Package Information
-version = "0.8.5"
+version = "0.8.6"
 author = "Hugo Granström"
 description = "A collection of numerical methods written in Nim. Current features: integration, ode, optimization."
 license = "MIT"

diff --git a/src/numericalnim/optimize.nim b/src/numericalnim/optimize.nim
@@ -1,6 +1,8 @@
 import std/[strformat, sequtils, math, deques]
 import arraymancer
-import ./differentiate
+import
+    ./differentiate,
+    ./utils
 
 when not defined(nimHasEffectsOf):
   {.pragma: effectsOf.}
@@ -126,9 +128,9 @@ proc secant*(f: proc(x: float64): float64, start: array[2, float64], precision:
             raise newException(ArithmeticError, "Maximum iterations for Secant method exceeded")
     return xCurrent
 
-##############################
-## Multidimensional methods ##
-##############################
+# ######################## #
+# Multidimensional methods #
+# ######################## #
 
 type LineSearchCriterion* = enum
     Armijo, Wolfe, WolfeStrong, NoLineSearch
@@ -146,36 +148,71 @@ type
     LBFGSOptions*[U] = object
         savedIterations*: int
 
-proc optimOptions*[U](tol: U = U(1e-6), alpha: U = U(1), lambda0: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
+proc optimOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
+    ## Returns a vanilla OptimOptions
+    ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
+    ## - alpha: The step size.
+    ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. 
+    ##   Else a more accurate but slowe second order finite difference scheme will be used.
+    ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
+    ## - lineSearchCriterion: Which line search method to use.
     result.tol = tol
     result.alpha = alpha
-    result.lambda0 = lambda0
     result.fastMode = fastMode
     result.maxIterations = maxIterations
     result.lineSearchCriterion = lineSearchCriterion
 
 proc steepestDescentOptions*[U](tol: U = U(1e-6), alpha: U = U(0.001), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
+    ## Returns a Steepest Descent OptimOptions
+    ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
+    ## - alpha: The step size.
+    ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. 
+    ##   Else a more accurate but slowe second order finite difference scheme will be used.
+    ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
+    ## - lineSearchCriterion: Which line search method to use.
     result.tol = tol
     result.alpha = alpha
     result.fastMode = fastMode
     result.maxIterations = maxIterations
     result.lineSearchCriterion = lineSearchCriterion
 
 proc newtonOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
+    ## Returns a Newton OptimOptions
+    ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
+    ## - alpha: The step size.
+    ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. 
+    ##   Else a more accurate but slowe second order finite difference scheme will be used.
+    ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
+    ## - lineSearchCriterion: Which line search method to use.
     result.tol = tol
     result.alpha = alpha
     result.fastMode = fastMode
     result.maxIterations = maxIterations
     result.lineSearchCriterion = lineSearchCriterion
 
 proc bfgsOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
+    ## Returns a BFGS OptimOptions
+    ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
+    ## - alpha: The step size.
+    ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. 
+    ##   Else a more accurate but slowe second order finite difference scheme will be used.
+    ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
+    ## - lineSearchCriterion: Which line search method to use.
     result.tol = tol
     result.alpha = alpha
     result.fastMode = fastMode
     result.maxIterations = maxIterations
     result.lineSearchCriterion = lineSearchCriterion
 
 proc lbfgsOptions*[U](savedIterations: int = 10, tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, LBFGSOptions[U]] =
+    ## Returns a LBFGS OptimOptions
+    ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
+    ## - alpha: The step size.
+    ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. 
+    ##   Else a more accurate but slowe second order finite difference scheme will be used.
+    ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
+    ## - lineSearchCriterion: Which line search method to use.
+    ## - savedIterations: Number of past iterations to save. The higher the value, the better but slower steps.
     result.tol = tol
     result.alpha = alpha
     result.fastMode = fastMode
@@ -184,6 +221,14 @@ proc lbfgsOptions*[U](savedIterations: int = 10, tol: U = U(1e-6), alpha: U = U(
     result.algoOptions.savedIterations = savedIterations
 
 proc levmarqOptions*[U](lambda0: U = U(1), tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, LevMarqOptions[U]] =
+    ## Returns a levmarq OptimOptions
+    ## - tol: The tolerance used. This is the criteria for convergence: `gradNorm < tol*(1 + fNorm)`.
+    ## - alpha: The step size.
+    ## - fastMode: If true, a faster first order accurate finite difference approximation of the derivative will be used. 
+    ##   Else a more accurate but slowe second order finite difference scheme will be used.
+    ## - maxIteration: The maximum number of iteration before returning if convergence haven't been reached.
+    ## - lineSearchCriterion: Which line search method to use.
+    ## - lambda0: Starting value of dampening parameter
     result.tol = tol
     result.alpha = alpha
     result.fastMode = fastMode
@@ -253,7 +298,15 @@ template analyticOrNumericGradient(analytic, f, x, options: untyped): untyped =
         analytic(x)
 
 proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = steepestDescentOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
-    ## Minimize scalar-valued function f. 
+    ## Steepest descent method for optimization.
+    ## 
+    ## Inputs:
+    ## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
+    ## - options: Options object (see `steepestDescentOptions` for constructing one)
+    ## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
+    ## 
+    ## Returns:
+    ## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
     var alpha = options.alpha
     var x = x0.clone()
     var fNorm = abs(f(x0))
@@ -275,6 +328,15 @@ proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U],
     result = x
 
 proc newton*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = newtonOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
+    ## Newton's method for optimization.
+    ## 
+    ## Inputs:
+    ## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
+    ## - options: Options object (see `newtonOptions` for constructing one)
+    ## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
+    ## 
+    ## Returns:
+    ## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
     var alpha = options.alpha
     var x = x0.clone()
     var fNorm = abs(f(x))
@@ -342,6 +404,15 @@ proc bfgs_old*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha:
     result = x
 
 proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = bfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
+    ## BFGS (Broyden–Fletcher–Goldfarb–Shanno) method for optimization.
+    ## 
+    ## Inputs:
+    ## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
+    ## - options: Options object (see `bfgsOptions` for constructing one)
+    ## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
+    ## 
+    ## Returns:
+    ## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
     # Use gemm and gemv with preallocated Tensors and setting beta = 0
     var alpha = options.alpha
     var x = x0.clone()
@@ -421,15 +492,24 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: O
     #echo iters, " iterations done!"
     result = x
 
-proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], m: int = 10, options: OptimOptions[U, LBFGSOptions[U]] = lbfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
+proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, LBFGSOptions[U]] = lbfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
+    ## LBFGS (Limited-memory  Broyden–Fletcher–Goldfarb–Shanno) method for optimization.
+    ## 
+    ## Inputs:
+    ## - f: The function to optimize. It should take as input a 1D Tensor of the input variables and return a scalar.
+    ## - options: Options object (see `lbfgsOptions` for constructing one)
+    ## - analyticGradient: The analytic gradient of `f` taking in and returning a 1D Tensor. If not provided, a finite difference approximation will be performed instead.
+    ## 
+    ## Returns:
+    ## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
     var alpha = options.alpha
     var x = x0.clone()
     let xLen = x.shape[0]
     var fNorm = abs(f(x))
     var gradient = 0.01*analyticOrNumericGradient(analyticGradient, f, x0, options)
     var gradNorm = vectorNorm(gradient)
     var iters: int
-    #let m = 10 # number of past iterations to save
+    let m = options.algoOptions.savedIterations # number of past iterations to save
     var sk_queue = initDeque[Tensor[U]](m)
     var yk_queue = initDeque[Tensor[T]](m)
     # the problem is the first iteration as the gradient is huge and no adjustments are made
@@ -475,7 +555,20 @@ proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], m: int =
     #echo iters, " iterations done!"
     result = x
 
-proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U]()): Tensor[U] =
+proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U](), yError: Tensor[T] = ones_like(yData)): Tensor[U] =
+    ## Levenberg-Marquardt for non-linear least square solving. Basically it fits parameters of a function to data samples.
+    ## 
+    ## Input:
+    ## - f: The function you want to fit the data to. The first argument should be a 1D Tensor with the values of the parameters
+    ##      and the second argument is the value if the independent variable to evaluate the function at.
+    ## - params0: The starting guess for the parameter values as a 1D Tensor.
+    ## - yData: The measured values of the dependent variable as 1D Tensor.
+    ## - xData: The values of the independent variable as 1D Tensor.
+    ## - options: Object with all the options like `tol` and `lambda0`. (see `levmarqOptions`)
+    ## - yError: The uncertainties of the `yData` as 1D Tensor. Ideally these should be the 1σ standard deviation.
+    ## 
+    ## Returns:
+    ## - The final solution for the parameters. Either because a (local) minimum was found or because the maximum number of iterations was reached.
     assert xData.rank == 1
     assert yData.rank == 1
     assert params0.rank == 1
@@ -486,8 +579,8 @@ proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Te
 
     let residualFunc = # proc that returns the residual vector
         proc (params: Tensor[U]): Tensor[T] =
-            result = map2_inline(xData, yData):
-                f(params, x) - y
+            result = map3_inline(xData, yData, yError):
+                (f(params, x) - y) / z 
 
     let errorFunc = # proc that returns the scalar error
         proc (params: Tensor[U]): T =
@@ -524,6 +617,51 @@ proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Te
     result = params
 
 
+proc inv[T](t: Tensor[T]): Tensor[T] =
+    result = solve(t, eye[T](t.shape[0]))
+
+proc getDiag[T](t: Tensor[T]): Tensor[T] =
+    let n = t.shape[0]
+    result = newTensor[T](n)
+    for i in 0 ..< n:
+      result[i] = t[i,i]
+
+proc paramUncertainties*[U; T](params: Tensor[U], fitFunc: proc(params: Tensor[U], x: U): T, yData: Tensor[T], xData: Tensor[U], yError: Tensor[T], returnFullCov = false): Tensor[T] =
+    ## Returns the whole covariance matrix or only the diagonal elements for the parameters in `params`.
+    ## 
+    ## Inputs:
+    ## - params: The parameters in a 1D Tensor that the uncertainties are wanted for.
+    ## - fitFunc: The function used for fitting the parameters. (see `levmarq` for more)
+    ## - yData: The measured values of the dependent variable as 1D Tensor.
+    ## - xData: The values of the independent variable as 1D Tensor.
+    ## - yError: The uncertainties of the `yData` as 1D Tensor. Ideally these should be the 1σ standard deviation.
+    ## - returnFullConv: If true, the full covariance matrix will be returned as a 2D Tensor, else only the diagonal elements will be returned as a 1D Tensor.
+    ## 
+    ## Returns:
+    ## 
+    ## The uncertainties of the parameters in the form of a covariance matrix (or only the diagonal elements). 
+    ## 
+    ## Note: it is the covariance that is returned, so if you want the standard deviation you have to
+    ## take the square root of it.
+    proc fError(params: Tensor[U]): T =
+        let yCurve = xData.map_inline:
+            fitFunc(params, x)
+        result = chi2(yData, yCurve, yError)
+
+    let dof = xData.size - params.size
+    let sigma2 = fError(params) / T(dof)
+    let H = tensorHessian(fError, params)
+    let cov = sigma2 * H.inv()
+
+    if returnFullCov:
+        result = cov
+    else:
+        result = cov.getDiag()
+
+
+
+
+
 
 when isMainModule:
     import benchy

diff --git a/src/numericalnim/utils.nim b/src/numericalnim/utils.nim
@@ -303,6 +303,22 @@ proc hermiteInterpolate*[T](x: openArray[float], t: openArray[float],
           raise newException(ValueError, &"{a} not in interval {min(t)} - {max(t)}")
 
 
+
+proc chi2*[T](yData, yFit, yError: seq[T] or Tensor[T]): T =
+  when yData is Tensor:
+    assert yData.rank == 1
+    assert yFit.rank == 1
+    assert yError.rank == 1
+    assert yData.size == yFit.size
+    assert yFit.size == yError.size
+    let N = yData.size
+  else:
+    let N = yData.len
+  result = T(0)
+  for i in 0 ..< N:
+    let temp = (yData[i] - yFit[i]) / yError[i]
+    result += temp * temp
+
 proc delete*[T](s: var seq[T], idx: seq[int]) =
   ## Deletes the elements of seq s at indices idx.
   ## idx must not contain duplicates!

diff --git a/tests/test_optimize.nim b/tests/test_optimize.nim
@@ -144,4 +144,19 @@ suite "Multi-dim":
         for x in abs(paramsSol - correctParams):
             check x < 1.3e-3
 
+    test "levmarq with yError":
+        let yError = ones_like(yData) * 1e-2
+        let paramsSol = levmarq(fitFunc, params0, xData, yData, yError=yError)
+        for x in abs(paramsSol - correctParams):
+            check x < 1.3e-3
+
+    test "paramUncertainties":
+        let yError = ones_like(yData) * 1e-2
+        let paramsSol = levmarq(fitFunc, params0, xData, yData, yError=yError)
+
+        let uncertainties = paramUncertainties(paramsSol, fitFunc, yData, xData, yError).sqrt()
+
+        for (unc, err) in zip(uncertainties, abs(paramsSol - correctParams)):
+            check abs(unc / err) in 0.79 .. 3.6
+
 
diff --git a/tests/test_utils.nim b/tests/test_utils.nim
@@ -1,4 +1,4 @@
-import unittest, math, sequtils, algorithm
+import unittest, math, sequtils, algorithm, random
 import arraymancer
 import ./numericalnim
 
@@ -91,3 +91,26 @@ test "meshgrid":
   let grid = meshgrid(x, y, z)
   check grid == [[0, 2, 4], [1, 2, 4], [0, 3, 4], [1, 3, 4], [0, 2, 5], [1, 2, 5], [0, 3, 5], [1, 3, 5]].toTensor
 
+test "chi2 Tensor":
+  randomize(1337)
+  let N = 1000
+  let sigma = 1.23
+  let yMeasure = newSeqWith(N, gauss(0.0, sigma)).toTensor
+  let yCorrect = zeros[float](N)
+  let yError = ones[float](N) * sigma
+  let chi = chi2(yMeasure, yCorrect, yError)
+  # Check that the mean χ² is around 1 
+  check chi / N.float in 0.90 .. 1.1
+
+test "chi2 Seq":
+  randomize(1337)
+  let N = 1000
+  let sigma = 1.23
+  let yMeasure = newSeqWith(N, gauss(0.0, sigma))
+  let yCorrect = newSeqWith(N, 0.0)
+  let yError = newSeqWith(N, sigma)
+  let chi = chi2(yMeasure, yCorrect, yError)
+  # Check that the mean χ² is around 1 
+  check chi / N.float in 0.90 .. 1.1
+
+