From 3608e1f9f8a512542c99aaa556e8095b4d663edf Mon Sep 17 00:00:00 2001 From: KathiRath Date: Thu, 18 Feb 2021 16:36:06 +0100 Subject: [PATCH] Submitted version (SympGPR) --- .gitignore | 13 + python/01_pendulum/explicit/Makefile | 13 + python/01_pendulum/explicit/func_expl.py | 144 + python/01_pendulum/explicit/init_func.py | 81 + python/01_pendulum/explicit/kernels.py | 15 + python/01_pendulum/explicit/kernels_sum.f90 | 208 + python/01_pendulum/explicit/main.py | 159 + python/01_pendulum/implicit/Makefile | 13 + python/01_pendulum/implicit/func.py | 199 + python/01_pendulum/implicit/init_func.py | 81 + python/01_pendulum/implicit/kernels.f90 | 231 + python/01_pendulum/implicit/kernels.py | 15 + python/01_pendulum/implicit/main.py | 196 + python/02_pert_pendulum/Makefile | 13 + python/02_pert_pendulum/calc_poincare.py | 111 + python/02_pert_pendulum/func.py | 272 + python/02_pert_pendulum/init_func.py | 81 + python/02_pert_pendulum/kernels.f90 | 231 + python/02_pert_pendulum/main.py | 111 + python/03_henon_heiles/Makefile | 4 + python/03_henon_heiles/func.py | 259 + python/03_henon_heiles/henon.f90 | 89 + python/03_henon_heiles/henon_mod.f90 | 15 + python/03_henon_heiles/init_func.py | 80 + python/03_henon_heiles/kernels_sq.f90 | 217 + python/03_henon_heiles/main.py | 227 + python/03_henon_heiles/make_henon.mk | 9 + python/03_henon_heiles/make_kernels.mk | 10 + python/03_henon_heiles/test_henon.py | 19 + python/03_henon_heiles/vode/ChangeRecord.txt | 356 + python/03_henon_heiles/vode/dvode_f90_m.f90 | 18960 ++++++++++++++++ python/03_henon_heiles/vode/example1.f90 | 117 + python/03_henon_heiles/vode/example2.f90 | 118 + python/03_henon_heiles/vode/quickstart.tex | 95 + .../03_henon_heiles/vode/vode_f90_license.txt | 40 + python/04_standard_map/Makefile | 13 + python/04_standard_map/func.py | 286 + python/04_standard_map/init_func.py | 81 + python/04_standard_map/kernels.f90 | 231 + python/04_standard_map/kernels.py | 15 + .../kernels_expl_per_q_sq_p.f90 | 206 + python/04_standard_map/main.py | 210 + python/05_tokamak/SplittedSympGPR/Makefile | 3 + .../SplittedSympGPR/calc_fieldlines.py | 128 + python/05_tokamak/SplittedSympGPR/common.py | 153 + .../05_tokamak/SplittedSympGPR/field_test.py | 47 + .../05_tokamak/SplittedSympGPR/fieldlines.f90 | 174 + .../SplittedSympGPR/fieldlines_fast.py | 54 + .../SplittedSympGPR/fieldlines_mid.py | 67 + .../SplittedSympGPR/fieldlines_rk.py | 77 + python/05_tokamak/SplittedSympGPR/func.py | 315 + .../05_tokamak/SplittedSympGPR/init_func.py | 81 + python/05_tokamak/SplittedSympGPR/kernels.f90 | 231 + python/05_tokamak/SplittedSympGPR/main.py | 135 + .../SplittedSympGPR/make_fieldlines.mk | 13 + .../SplittedSympGPR/make_kernels.mk | 10 + python/05_tokamak/SplittedSympGPR/minpack.f90 | 6036 +++++ python/05_tokamak/SplittedSympGPR/plotting.py | 63 + python/05_tokamak/SympGPR/Makefile | 3 + python/05_tokamak/SympGPR/calc_fieldlines.py | 128 + python/05_tokamak/SympGPR/common.py | 153 + python/05_tokamak/SympGPR/field_test.py | 47 + python/05_tokamak/SympGPR/fieldlines.f90 | 174 + python/05_tokamak/SympGPR/fieldlines_fast.py | 54 + python/05_tokamak/SympGPR/fieldlines_mid.py | 67 + python/05_tokamak/SympGPR/fieldlines_rk.py | 77 + python/05_tokamak/SympGPR/func.py | 246 + python/05_tokamak/SympGPR/init_func.py | 81 + python/05_tokamak/SympGPR/kernels.f90 | 231 + python/05_tokamak/SympGPR/kernels.py | 15 + python/05_tokamak/SympGPR/main.py | 114 + python/05_tokamak/SympGPR/make_fieldlines.mk | 13 + python/05_tokamak/SympGPR/make_kernels.mk | 10 + python/05_tokamak/SympGPR/minpack.f90 | 6036 +++++ python/05_tokamak/SympGPR/plotting.py | 63 + 75 files changed, 38943 insertions(+) create mode 100644 .gitignore create mode 100644 python/01_pendulum/explicit/Makefile create mode 100644 python/01_pendulum/explicit/func_expl.py create mode 100644 python/01_pendulum/explicit/init_func.py create mode 100644 python/01_pendulum/explicit/kernels.py create mode 100644 python/01_pendulum/explicit/kernels_sum.f90 create mode 100644 python/01_pendulum/explicit/main.py create mode 100644 python/01_pendulum/implicit/Makefile create mode 100644 python/01_pendulum/implicit/func.py create mode 100644 python/01_pendulum/implicit/init_func.py create mode 100644 python/01_pendulum/implicit/kernels.f90 create mode 100644 python/01_pendulum/implicit/kernels.py create mode 100644 python/01_pendulum/implicit/main.py create mode 100644 python/02_pert_pendulum/Makefile create mode 100644 python/02_pert_pendulum/calc_poincare.py create mode 100644 python/02_pert_pendulum/func.py create mode 100644 python/02_pert_pendulum/init_func.py create mode 100644 python/02_pert_pendulum/kernels.f90 create mode 100644 python/02_pert_pendulum/main.py create mode 100644 python/03_henon_heiles/Makefile create mode 100644 python/03_henon_heiles/func.py create mode 100644 python/03_henon_heiles/henon.f90 create mode 100644 python/03_henon_heiles/henon_mod.f90 create mode 100644 python/03_henon_heiles/init_func.py create mode 100644 python/03_henon_heiles/kernels_sq.f90 create mode 100644 python/03_henon_heiles/main.py create mode 100644 python/03_henon_heiles/make_henon.mk create mode 100644 python/03_henon_heiles/make_kernels.mk create mode 100644 python/03_henon_heiles/test_henon.py create mode 100644 python/03_henon_heiles/vode/ChangeRecord.txt create mode 100644 python/03_henon_heiles/vode/dvode_f90_m.f90 create mode 100644 python/03_henon_heiles/vode/example1.f90 create mode 100644 python/03_henon_heiles/vode/example2.f90 create mode 100644 python/03_henon_heiles/vode/quickstart.tex create mode 100644 python/03_henon_heiles/vode/vode_f90_license.txt create mode 100644 python/04_standard_map/Makefile create mode 100644 python/04_standard_map/func.py create mode 100644 python/04_standard_map/init_func.py create mode 100644 python/04_standard_map/kernels.f90 create mode 100644 python/04_standard_map/kernels.py create mode 100644 python/04_standard_map/kernels_expl_per_q_sq_p.f90 create mode 100644 python/04_standard_map/main.py create mode 100644 python/05_tokamak/SplittedSympGPR/Makefile create mode 100644 python/05_tokamak/SplittedSympGPR/calc_fieldlines.py create mode 100644 python/05_tokamak/SplittedSympGPR/common.py create mode 100644 python/05_tokamak/SplittedSympGPR/field_test.py create mode 100644 python/05_tokamak/SplittedSympGPR/fieldlines.f90 create mode 100644 python/05_tokamak/SplittedSympGPR/fieldlines_fast.py create mode 100644 python/05_tokamak/SplittedSympGPR/fieldlines_mid.py create mode 100644 python/05_tokamak/SplittedSympGPR/fieldlines_rk.py create mode 100644 python/05_tokamak/SplittedSympGPR/func.py create mode 100644 python/05_tokamak/SplittedSympGPR/init_func.py create mode 100644 python/05_tokamak/SplittedSympGPR/kernels.f90 create mode 100644 python/05_tokamak/SplittedSympGPR/main.py create mode 100644 python/05_tokamak/SplittedSympGPR/make_fieldlines.mk create mode 100644 python/05_tokamak/SplittedSympGPR/make_kernels.mk create mode 100644 python/05_tokamak/SplittedSympGPR/minpack.f90 create mode 100644 python/05_tokamak/SplittedSympGPR/plotting.py create mode 100644 python/05_tokamak/SympGPR/Makefile create mode 100644 python/05_tokamak/SympGPR/calc_fieldlines.py create mode 100644 python/05_tokamak/SympGPR/common.py create mode 100644 python/05_tokamak/SympGPR/field_test.py create mode 100644 python/05_tokamak/SympGPR/fieldlines.f90 create mode 100644 python/05_tokamak/SympGPR/fieldlines_fast.py create mode 100644 python/05_tokamak/SympGPR/fieldlines_mid.py create mode 100644 python/05_tokamak/SympGPR/fieldlines_rk.py create mode 100644 python/05_tokamak/SympGPR/func.py create mode 100644 python/05_tokamak/SympGPR/init_func.py create mode 100644 python/05_tokamak/SympGPR/kernels.f90 create mode 100644 python/05_tokamak/SympGPR/kernels.py create mode 100644 python/05_tokamak/SympGPR/main.py create mode 100644 python/05_tokamak/SympGPR/make_fieldlines.mk create mode 100644 python/05_tokamak/SympGPR/make_kernels.mk create mode 100644 python/05_tokamak/SympGPR/minpack.f90 create mode 100644 python/05_tokamak/SympGPR/plotting.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8948c47 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +__pycache__ +*.dll +*.so +*.pyc +*.pyd +*~ +*.mod +outcmaes +*.o +*.pyf +*.dylib +*.npz +*.png \ No newline at end of file diff --git a/python/01_pendulum/explicit/Makefile b/python/01_pendulum/explicit/Makefile new file mode 100644 index 0000000..b817385 --- /dev/null +++ b/python/01_pendulum/explicit/Makefile @@ -0,0 +1,13 @@ +FC = gfortran +FFLAGS = -Wall -march=native -O2 -g -fbacktrace +PYTHON = python3 +NAME = kernels_sum + +all: $(NAME).f90 + $(PYTHON) -m numpy.f2py -m $(NAME) -c $(NAME).f90 --f90flags='$(FFLAGS)' -lgomp + +$(NAME).f90: init_func.py + $(PYTHON) init_func.py + +clean: + rm -f *.x *.so *.o *.mod *.pyf *.pyd *.dylib *.dll $(NAME).f90 diff --git a/python/01_pendulum/explicit/func_expl.py b/python/01_pendulum/explicit/func_expl.py new file mode 100644 index 0000000..9b94f3e --- /dev/null +++ b/python/01_pendulum/explicit/func_expl.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Apr 21 16:47:00 2020 + +@author: Katharina Rath +""" + +import numpy as np +from scipy.linalg import solve_triangular +import scipy +from scipy.integrate import solve_ivp + + +from kernels_sum import * + + +def f_kern(x, y, x0, y0, l): + return kern_num(x,y,x0,y0,l[0], l[1]) + +def dkdx(x, y, x0, y0, l): + return dkdx_num(x,y,x0,y0,l[0], l[1]) + +def dkdy(x, y, x0, y0, l): + return dkdy_num(x,y,x0,y0,l[0], l[1]) + +def dkdx0(x, y, x0, y0, l): + return dkdx0_num(x,y,x0,y0,l[0], l[1]) + +def dkdy0(x, y, x0, y0, l): + return dkdy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdx0(x, y, x0, y0, l): + return d2kdxdx0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydy0(x, y, x0, y0, l): + return d2kdydy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdy0(x, y, x0, y0, l): + return d2kdxdy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydx0(x, y, x0, y0, l): + return d2kdxdy0(x, y, x0, y0, l) + + +def intode(y, t, dtsymp): + ys = np.zeros([2, len(t)]) + ys[0,0] = y[0] + ys[1,0] = y[1] + + for kt in range(1,len(t)): + ys[1, kt]=ys[1, kt-1] - dtsymp*(np.sin(ys[0, kt-1] + np.pi)) + ys[0, kt]=ys[0, kt-1] + dtsymp*ys[1, kt] + return ys.T + +def build_K(xin, x0in, hyp, K): + # set up covariance matrix with derivative observations, Eq. (38) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0]//2 + N0 = K.shape[1]//2 + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = d2kdxdx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,lk] = d2kdxdy0( + x0[lk], y0[lk], x[k], y[k], l) + K[k,N0+lk] = d2kdydx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,N0+lk] = d2kdydy0( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def energy(x, U0): + return x[1]**2/2 + U0*(1 + np.cos(x[0])) + + +# compute log-likelihood according to RW, p.19 +def solve_cholesky(L, b): + return solve_triangular( + L.T, solve_triangular(L, b, lower=True, check_finite=False), + lower=False, check_finite=False) + +# negative log-posterior +def nll_chol(hyp, x, y): + K = np.empty((len(x), len(x))) + build_K(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(len(x))) + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + ret = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + return ret + + +def calcQ(x,y, xtrain, l, Kyinv, ztrain, Ntest): + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack(([x], [y])), l, Kstar) + qGP = Kstar.T.dot(Kyinv.dot(ztrain)) + f = qGP[1] + + return f, qGP[0] + +def calcP(x,y, l, xtrain, ztrain, Kyinv, Ntest): + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack(([x], [y])), l, Kstar) + pGP = Kstar.T.dot(Kyinv.dot(ztrain)) + res = -pGP[0] + return res + +def applymap(l, Q0map, P0map, xtrain, ztrain, Kyinv, Ntest, nm): + pmap = np.zeros([nm, Ntest]) + qmap = np.zeros([nm, Ntest]) + #set initial conditions + pmap[0,:] = P0map + qmap[0,:] = Q0map + for i in range(0,nm-1): + for k in range(0, Ntest): + pmap[i+1, k] = pmap[i, k] + calcP(qmap[i,k], pmap[i, k], l, xtrain, ztrain, Kyinv, Ntest) + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + qmap[i+1, k], dpmap = calcQ(qmap[i,k], pmap[i+1,k],xtrain, l, Kyinv, ztrain, Ntest) + qmap[i+1, k] = np.mod(qmap[i+1,k] + qmap[i, k], 2.0*np.pi) + return qmap, pmap + +def dydt_ivp(t, y): + ydot = np.zeros([2]) + ydot[0] = y[1] # dx/dt + ydot[1] = -np.sin(y[0]+np.pi)# - eps*np.sin(y_[0] - om*(t))#- 0.5*np.pi) # dpx/dt + return ydot + +def integrate_pendulum(q0, p0, t): + ysint = np.zeros([len(t), 2, len(q0)]) # initial values for y + ysint = [] + for ik in range(len(q0)): + res_int = solve_ivp(dydt_ivp, [t[0], t[-1]], np.array((q0[ik], p0[ik])), t_eval = t, method='LSODA', rtol = 1e-13, atol = 1e-16) + temp = res_int.y + ysint.append(temp) + return ysint + diff --git a/python/01_pendulum/explicit/init_func.py b/python/01_pendulum/explicit/init_func.py new file mode 100644 index 0000000..cf60597 --- /dev/null +++ b/python/01_pendulum/explicit/init_func.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed May 6 14:50:32 2020 + +@author: ert +""" +#%% +from sympy import * +from sympy.utilities.autowrap import autowrap, ufuncify +from sympy.utilities.lambdify import lambdastr, lambdify, implemented_function +import shutil + +tmp = '.' +#try: +# shutil.rmtree(tmp) +#except: +# pass + +#%% # Kernel functions (kernel, 1st and second derivatives) +x, y, xa, ya, xb, yb, l, lx, ly = symbols(r'x y x_a y_a x_b, y_b, l, lx, ly') + + +kern_sqexp = exp(-x**2/(2.0*l**2)) +kern_sqexp_sin = exp(-sin(0.5*x)**2/(2.0*l**2)) + +kern_y = kern_sqexp.subs(x, ya-yb).subs(l, ly) +kern_x = kern_sqexp_sin.subs(x, xa-xb).subs(l, lx) +kern = (kern_x+kern_y).simplify() + +dkdxa = diff(kern, xa).simplify() +dkdxb = diff(kern, xb).simplify() +dkdya = diff(kern, ya).simplify() +dkdyb = diff(kern, yb).simplify() +dkdxadxb = diff(kern, xa, xb).simplify() +dkdyadyb = diff(kern, ya, yb).simplify() +dkdxadyb = diff(kern, xa, yb).simplify() + +d3kdxdx0dy0 = diff(kern, xa, xb, yb).simplify() +d3kdydy0dy0 = diff(kern, ya, yb, yb).simplify() +d3kdxdy0dy0 = diff(kern, xa, yb, yb).simplify() + +dkdlx = diff(kern, lx).simplify() +dkdly = diff(kern, ly).simplify() + +d3kdxdx0dlx = diff(kern, xa, xb, lx).simplify() +d3kdydy0dlx = diff(kern, ya, yb, lx).simplify() +d3kdxdy0dlx = diff(kern, xa, yb, lx).simplify() + +d3kdxdx0dly = diff(kern, xa, xb, ly).simplify() +d3kdydy0dly = diff(kern, ya, yb, ly).simplify() +d3kdxdy0dly = diff(kern, xa, yb, ly).simplify() + +#%% +from sympy.utilities.codegen import codegen +seq = (xa, ya, xb, yb, lx, ly) +[(name, code), (h_name, header)] = \ +codegen([('kern_num', kern), + ('dkdx_num', dkdxa), + ('dkdy_num', dkdya), + ('dkdx0_num', dkdxb), + ('dkdy0_num', dkdyb), + ('d2kdxdx0_num', dkdxadxb), + ('d2kdydy0_num', dkdyadyb), + ('d2kdxdy0_num', dkdxadyb), + ('d3kdxdx0dy0_num', d3kdxdx0dy0), + ('d3kdydy0dy0_num', d3kdydy0dy0), + ('d3kdxdy0dy0_num', d3kdxdy0dy0), + ('dkdlx_num', dkdlx), + ('dkdly_num', dkdly), + ('d3kdxdx0dlx_num', d3kdxdx0dlx), + ('d3kdydy0dlx_num', d3kdydy0dlx), + ('d3kdxdy0dlx_num', d3kdxdy0dlx), + ('d3kdxdx0dly_num', d3kdxdx0dly), + ('d3kdydy0dly_num', d3kdydy0dly), + ('d3kdxdy0dly_num', d3kdxdy0dly), + ], "F95", "kernels_sum", + argument_sequence=seq, + header=False, empty=False) +with open(name, 'w') as f: + f.write(code) diff --git a/python/01_pendulum/explicit/kernels.py b/python/01_pendulum/explicit/kernels.py new file mode 100644 index 0000000..8b9329f --- /dev/null +++ b/python/01_pendulum/explicit/kernels.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun Aug 9 22:30:35 2020 + +@author: manal +""" + +def __bootstrap__(): + global __bootstrap__, __loader__, __file__ + import sys, pkg_resources, imp + __file__ = pkg_resources.resource_filename(__name__,'kernels_sum.cpython-36m-x86_64-linux-gnu.so') + __loader__ = None; del __bootstrap__, __loader__ + imp.load_dynamic(__name__,__file__) +__bootstrap__() \ No newline at end of file diff --git a/python/01_pendulum/explicit/kernels_sum.f90 b/python/01_pendulum/explicit/kernels_sum.f90 new file mode 100644 index 0000000..76eacee --- /dev/null +++ b/python/01_pendulum/explicit/kernels_sum.f90 @@ -0,0 +1,208 @@ +REAL*8 function kern_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +kern_num = exp((-0.5d0*y_a**2 + 1.0d0*y_a*y_b - 0.5d0*y_b**2)/ly**2) + & + exp(-0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2) +end function +REAL*8 function dkdx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx_num = -0.5d0*exp(-0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)*sin( & + 0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy_num = 1.0d0*(-y_a + y_b)*exp((-0.5d0*y_a**2 + 1.0d0*y_a*y_b - 0.5d0 & + *y_b**2)/ly**2)/ly**2 +end function +REAL*8 function dkdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx0_num = 0.5d0*exp(-0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)*sin( & + 0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy0_num = 1.0d0*(y_a - y_b)*exp((-0.5d0*y_a**2 + 1.0d0*y_a*y_b - 0.5d0 & + *y_b**2)/ly**2)/ly**2 +end function +REAL*8 function d2kdxdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdx0_num = (0.25d0*lx**2*cos(1.0d0*x_a - 1.0d0*x_b) - 0.25d0*sin( & + 0.5d0*x_a - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp( & + -0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)/lx**4 +end function +REAL*8 function d2kdydy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdydy0_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp((-0.5d0*y_a**2 + 1.0d0 & + *y_a*y_b - 0.5d0*y_b**2)/ly**2)/ly**4 +end function +INTEGER*4 function d2kdxdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdy0_num = 0 +end function +INTEGER*4 function d3kdxdx0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dy0_num = 0 +end function +REAL*8 function d3kdydy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dy0_num = (3.0d0*ly**2 - 1.0d0*(y_a - y_b)**2)*(y_a - y_b)*exp(( & + -0.5d0*y_a**2 + 1.0d0*y_a*y_b - 0.5d0*y_b**2)/ly**2)/ly**6 +end function +INTEGER*4 function d3kdxdy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dy0_num = 0 +end function +REAL*8 function dkdlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdlx_num = 1.0d0*exp(-0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)*sin( & + 0.5d0*x_a - 0.5d0*x_b)**2/lx**3 +end function +REAL*8 function dkdly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdly_num = (y_a**2 - 2.0d0*y_a*y_b + y_b**2)*exp((-0.5d0*y_a**2 + 1.0d0 & + *y_a*y_b - 0.5d0*y_b**2)/ly**2)/ly**3 +end function +REAL*8 function d3kdxdx0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dlx_num = (-0.5d0*lx**4*cos(1.0d0*x_a - 1.0d0*x_b) + lx**2*( & + 0.75d0*cos(1.0d0*x_a - 1.0d0*x_b) + 0.5d0)*sin(0.5d0*x_a - 0.5d0* & + x_b)**2 - 0.25d0*sin(0.5d0*x_a - 0.5d0*x_b)**4*cos(0.5d0*x_a - & + 0.5d0*x_b)**2)*exp(-0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)/lx & + **7 +end function +INTEGER*4 function d3kdydy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dlx_num = 0 +end function +INTEGER*4 function d3kdxdy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dlx_num = 0 +end function +INTEGER*4 function d3kdxdx0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dly_num = 0 +end function +REAL*8 function d3kdydy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dly_num = (-2.0d0*ly**4 + ly**2*(1.0d0*y_a**2 - 2.0d0*y_a*y_b + & + 1.0d0*y_b**2 + 4.0d0*(y_a - y_b)**2) + (y_a - y_b)**2*(-1.0d0*y_a & + **2 + 2.0d0*y_a*y_b - 1.0d0*y_b**2))*exp((-0.5d0*y_a**2 + 1.0d0* & + y_a*y_b - 0.5d0*y_b**2)/ly**2)/ly**7 +end function +INTEGER*4 function d3kdxdy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dly_num = 0 +end function diff --git a/python/01_pendulum/explicit/main.py b/python/01_pendulum/explicit/main.py new file mode 100644 index 0000000..25c4c01 --- /dev/null +++ b/python/01_pendulum/explicit/main.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Apr 21 16:45:03 2020 + +@author: Katharina Rath +""" +#%% +import numpy as np +from scipy.optimize import minimize +from func_expl import (integrate_pendulum, build_K, energy, + applymap, nll_chol) +from sklearn.metrics import mean_squared_error +import ghalton +import random +import tkinter +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt + +#%% init parameters +Nm = 70 #mapping time (Nm = 70 for Fig. 4) +N = 20 #training data (N = 20 for Fig. 4) +U0 = 1 +nm = 1000 # how often the map should be applied +dtsymp = 0.001 #integration step for producing training data +sig2_n = 1e-10 #noise**2 in observations +Ntest = 15 # number of testpoints + +#define boundaries for training data +qmin = 0.0 +qmax = 2*np.pi +pmin = -3 +pmax = 3 + +#define boundaries for test data +qminmap = np.array(np.pi - 2.8) +qmaxmap = np.array(np.pi + 1.5) +pminmap = np.array(-2.3) +pmaxmap = np.array(1.8) + +#set initial conditions (q0, p0) randomly for test data +random.seed(1) +q0map = np.linspace(qminmap,qmaxmap,Ntest) +p0map = np.linspace(pminmap,pmaxmap,Ntest) +q0map = random.sample(list(q0map), Ntest) +p0map = random.sample(list(p0map), Ntest) +Q0map = np.array(q0map) +P0map = np.array(p0map) + + +#%% calculate training data + +# specify initial conditions (q0, p0), sampled from Halton sequence +seq = ghalton.Halton(2) +samples = seq.get(N)*np.array([qmax-qmin, pmax-pmin]) + np.array([qmin, pmin]) + +q = samples[:,0] +p = samples[:,1] + +t0 = 0.0 # starting time +t1 = dtsymp*Nm # end time +t = np.linspace(t0,t1,Nm) # integration points in time + +#integrate pendulum to provide data +ysint = integrate_pendulum(q, p, t) + +#set training data +# xtrain = (q, P), ztrain = grad F = (p-P, Q-q) +P = np.empty((N)) +Q = np.empty((N)) +for ik in range(0, N): + P[ik] = ysint[ik][1,-1] + Q[ik] = ysint[ik][0,-1] + +zqtrain = Q - q +zptrain = p - P + +xtrain = np.hstack((q, P)) +ztrain = np.concatenate((zptrain, zqtrain)) + +#%% +# set up GP +# as indicated in Algorithm 2: explicit symplectic GP map +# Step 1: symplectic GP regression of -Delta p and Delta q over mixed variables (q,P) according to Eq. 41 +# hyperparameter optimization for lengthscales (lq, lp) and GP fitting +log10l0 = np.array((-1., -1.), dtype=float) +sig = 2*np.amax(np.abs(ztrain))**2 + +def nll_transform(log10hyp, sig, sig2n, x, y): + hyp = 10**log10hyp + return nll_chol(np.hstack((hyp, sig, [sig2n])), x, y) + +res = minimize(nll_transform, log10l0, args = (sig, sig2_n, xtrain, ztrain.T.flatten()), method='L-BFGS-B') +sol1 = 10**res.x +l = [np.abs(sol1[0]), np.abs(sol1[1])] +print('Optimized lengthscales for mixed expl. GP: lq =', "{:.2f}".format(l[0]), 'lp = ', "{:.2f}".format(l[1])) +hyp = np.hstack((l, sig)) +K = np.empty((2*N, 2*N)) +build_K(xtrain,xtrain, hyp, K) +Kyinv = np.linalg.inv(K + sig2_n*np.eye(K.shape[0])) + +#%% training error +Eftrain = K.dot(Kyinv.dot(ztrain)) +outtrain = mean_squared_error(ztrain, Eftrain) +print('training error', "{:.1e}".format(outtrain)) + +#%% Map application +qmap, pmap = applymap( + hyp, Q0map, P0map, xtrain, ztrain, Kyinv, Ntest, nm) +#calculate energy +H = energy([qmap, pmap], U0) + +#%% Quality of map +t0 = 0.0 # starting time +t1 = dtsymp*Nm*(nm+20) # end time +t = np.linspace(t0,t1,Nm*(nm+20)) # integration points in time + +#integrate test data approx. one bounce period +yinttest = np.array(integrate_pendulum(qmap[0,:], pmap[0,:], t)).T +yinttest[:,0,:] = np.mod(yinttest[:,0,:], 2*np.pi) + +#%% plot results + +plt.rc('xtick',labelsize=20) +plt.rc('ytick',labelsize=20) +plt.figure(figsize = [10, 3]) +plt.subplot(1,3,1) +for i in range(0, Ntest): + plt.plot(qmap[:,i], pmap[:,i], 'k^', markersize = 0.5) +plt.ylim([-2.8, 2.8]) +plt.xlabel('$q$', fontsize = 20) +plt.ylabel('$p$', fontsize = 20) +plt.tight_layout() + +plt.subplot(1,3,2) +for i in range(0, Ntest): + plt.plot(yinttest[:,0,i], yinttest[:,1,i], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) +plt.ylim([-2.8, 2.8]) +plt.xlabel('$q$', fontsize = 20) +plt.ylabel('$p$', fontsize = 20) +plt.tight_layout() + +plt.subplot(1,3,3) +for i in range(0, Ntest): + plt.plot(yinttest[:,0,i], yinttest[:,1,i], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) + plt.plot(qmap[:,i], pmap[:,i], 'k^', markersize = 0.5) +plt.ylim([-2.8, 2.8]) +plt.xlabel('$q$', fontsize = 20) +plt.ylabel('$p$', fontsize = 20) +plt.tight_layout() + +plt.figure() +for i in range(0, qmap.shape[1]): + plt.plot(H[:,i]/(np.mean(H[:,i]))) +plt.xlabel('t', fontsize = 20) +plt.ylabel(r'H/$\bar{H}$', fontsize = 20) +plt.title('Energy Expl. SympGPR') +plt.tight_layout() +plt.show(block=True) \ No newline at end of file diff --git a/python/01_pendulum/implicit/Makefile b/python/01_pendulum/implicit/Makefile new file mode 100644 index 0000000..e4e38fc --- /dev/null +++ b/python/01_pendulum/implicit/Makefile @@ -0,0 +1,13 @@ +FC = gfortran +FFLAGS = -Wall -march=native -O2 -g -fbacktrace +PYTHON = python3 +NAME = kernels + +all: $(NAME).f90 + $(PYTHON) -m numpy.f2py -m $(NAME) -c $(NAME).f90 --f90flags='$(FFLAGS)' -lgomp + +$(NAME).f90: init_func.py + $(PYTHON) init_func.py + +clean: + rm -f *.x *.so *.o *.mod *.pyf *.pyd *.dylib *.dll $(NAME).f90 diff --git a/python/01_pendulum/implicit/func.py b/python/01_pendulum/implicit/func.py new file mode 100644 index 0000000..4b0847d --- /dev/null +++ b/python/01_pendulum/implicit/func.py @@ -0,0 +1,199 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Apr 21 16:47:00 2020 + +@author: Katharina Rath +""" + +import numpy as np +from scipy.optimize import newton +from scipy.linalg import solve_triangular +import scipy +from sklearn.metrics import mean_squared_error + +from scipy.integrate import solve_ivp + +from kernels import * + +def f_kern(x, y, x0, y0, l): + return kern_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdx0(x, y, x0, y0, l): + return d2kdxdx0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydy0(x, y, x0, y0, l): + return d2kdydy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdy0(x, y, x0, y0, l): + return d2kdxdy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydx0(x, y, x0, y0, l): + return d2kdxdy0(x, y, x0, y0, l) + +def intode(y, t, dtsymp): + ys = np.zeros([2, len(t)]) + ys[0,0] = y[0] + ys[1,0] = y[1] + + for kt in range(1,len(t)): + ys[1, kt]=ys[1, kt-1] - dtsymp*(np.sin(ys[0, kt-1] + np.pi)) + ys[0, kt]=ys[0, kt-1] + dtsymp*ys[1, kt] + return ys.T + +def build_K(xin, x0in, hyp, K): + # set up covariance matrix with derivative observations, Eq. (38) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0]//2 + N0 = K.shape[1]//2 + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = d2kdxdx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,lk] = d2kdxdy0( + x0[lk], y0[lk], x[k], y[k], l) + K[k,N0+lk] = d2kdydx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,N0+lk] = d2kdydy0( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def buildKreg(xin, x0in, hyp, K): + # set up "usual" covariance matrix for GP on regular grid (q,p) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0] + N0 = K.shape[1] + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = f_kern( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + + + +def gpsolve(Ky, ft): + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_triangular( + L.T, solve_triangular(L, ft, lower=True, check_finite=False), + lower=False, check_finite=False) + + return L, alpha + +# compute log-likelihood according to RW, p.19 +def solve_cholesky(L, b): + return solve_triangular( + L.T, solve_triangular(L, b, lower=True, check_finite=False), + lower=False, check_finite=False) + +# negative log-posterior +def nll_chol(hyp, x, y, N, buildK = build_K): + K = np.empty((N, N)) + build_K(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + ret = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + return ret + +def energy(x, U0): + return x[1]**2/2 + U0*(1 + np.cos(x[0])) + +def guessP(x, y, hypp, xtrainp, ztrainp, Kyinvp, N): + Ntest = 1 + Kstar = np.empty((Ntest, int(len(xtrainp)/2))) + buildKreg(np.hstack((x,y)), xtrainp, hypp, Kstar) + Ef = Kstar.dot(Kyinvp.dot(ztrainp)) + return Ef + +def calcQ(x,y, xtrain, l, Kyinv, ztrain): + # get \Delta q from GP on mixed grid. + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack(([x], [y])), l, Kstar) + qGP = Kstar.T.dot(Kyinv.dot(ztrain)) + dq = qGP[1] + return dq + +def Pnewton(P, x, y, l, xtrain, Kyinv, ztrain): + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack((x, P)), l, Kstar) + pGP = Kstar.T.dot(Kyinv.dot(ztrain)) + f = pGP[0] - y + P + return f + +def calcP(x,y, l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest): + # as P is given in an implicit relation, use newton to solve for P (Eq. (42)) + # use the GP on regular grid (q,p) for a first guess for P + pgss = guessP([x], [y], hypp, xtrainp, ztrainp, Kyinvp, Ntest) + res, r = newton(Pnewton, pgss, full_output=True, maxiter=5, disp=False, + args = (np.array([x]), np.array([y]), l, xtrain, Kyinv, ztrain)) + return res + +def applymap(nm, Ntest, l, hypp, Q0map, P0map, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv): + # Application of symplectic map + #init + pmap = np.zeros([nm, Ntest]) + qmap = np.zeros([nm, Ntest]) + #set initial conditions + pmap[0,:] = P0map + qmap[0,:] = Q0map + + # loop through all test points and all time steps + for i in range(0,nm-1): + for k in range(0, Ntest): + # set new P including Newton for implicit Eq (42) + pmap[i+1, k] = calcP(qmap[i,k], pmap[i, k], l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest) + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + # then: set new Q via calculating \Delta q and adding q (Eq. (43)) + qmap[i+1, k] = calcQ(qmap[i,k], pmap[i+1,k], xtrain, l, Kyinv, ztrain) + qmap[i+1, k] = np.mod(qmap[i+1,k] + qmap[i, k], 2.0*np.pi) + return qmap, pmap + +def dydt_ivp(t, y): + ydot = np.zeros([2]) + ydot[0] = y[1] + ydot[1] = -np.sin(y[0]+np.pi) + return ydot + +def integrate_pendulum(q0, p0, t): + ysint = [] + for ik in range(len(q0)): + res_int = solve_ivp(dydt_ivp, [t[0], t[-1]], np.array((q0[ik], p0[ik])),t_eval = t, method='LSODA', rtol = 1e-13, atol = 1e-16) + temp = res_int.y + ysint.append(temp) + return ysint + + +def quality(qmap, pmap, H, ysint, Ntest, Nm): + #geom. distance + gd = np.zeros([Ntest]) + for lk in range(0,Ntest): + gd[lk] = mean_squared_error(([qmap[1, lk], pmap[1, lk]]),ysint[Nm, :, lk]) + stdgd = np.std(gd[:]) + # Energy oscillation + Eosc = np.zeros([Ntest]) + for lk in range(0, Ntest): + Eosc[lk] = np.std(H[:,lk])/np.mean(H[:,lk]) + return Eosc, gd, stdgd + + +def symplEuler_pendulum(q0, p0, t, dt): + #yint = np.zeros([len(t), 2, len(q0)]) # initial values for y + ysint = np.zeros([len(t), 2, len(q0)]) # initial values for y + + for k in range(len(q0)): + # yint[:, :, k] = spint.odeint(dydt, [q0[k], p0[k]], t) + ysint[:,:, k] = intode([q0[k], p0[k]], t, dt) + # ysint[:,0, k] = np.mod(ysint[:,0, k], 2*np.pi) + return ysint \ No newline at end of file diff --git a/python/01_pendulum/implicit/init_func.py b/python/01_pendulum/implicit/init_func.py new file mode 100644 index 0000000..15ce043 --- /dev/null +++ b/python/01_pendulum/implicit/init_func.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed May 6 14:50:32 2020 + +@author: ert +""" +#%% +from sympy import * +from sympy.utilities.autowrap import autowrap, ufuncify +from sympy.utilities.lambdify import lambdastr, lambdify, implemented_function +import shutil + +tmp = '.' +#try: +# shutil.rmtree(tmp) +#except: +# pass + +#%% # Kernel functions (kernel, 1st and second derivatives) +x, y, xa, ya, xb, yb, l, lx, ly = symbols(r'x y x_a y_a x_b, y_b, l, lx, ly') + + +kern_sqexp = exp(-x**2/(2.0*l**2)) +kern_sqexp_sin = exp(-sin(0.5*x)**2/(2.0*l**2)) + +kern_y = kern_sqexp.subs(x, ya-yb).subs(l, ly) +kern_x = kern_sqexp_sin.subs(x, xa-xb).subs(l, lx) +kern = (kern_x*kern_y).simplify() + +dkdxa = diff(kern, xa).simplify() +dkdxb = diff(kern, xb).simplify() +dkdya = diff(kern, ya).simplify() +dkdyb = diff(kern, yb).simplify() +dkdxadxb = diff(kern, xa, xb).simplify() +dkdyadyb = diff(kern, ya, yb).simplify() +dkdxadyb = diff(kern, xa, yb).simplify() + +d3kdxdx0dy0 = diff(kern, xa, xb, yb).simplify() +d3kdydy0dy0 = diff(kern, ya, yb, yb).simplify() +d3kdxdy0dy0 = diff(kern, xa, yb, yb).simplify() + +dkdlx = diff(kern, lx).simplify() +dkdly = diff(kern, ly).simplify() + +d3kdxdx0dlx = diff(kern, xa, xb, lx).simplify() +d3kdydy0dlx = diff(kern, ya, yb, lx).simplify() +d3kdxdy0dlx = diff(kern, xa, yb, lx).simplify() + +d3kdxdx0dly = diff(kern, xa, xb, ly).simplify() +d3kdydy0dly = diff(kern, ya, yb, ly).simplify() +d3kdxdy0dly = diff(kern, xa, yb, ly).simplify() + +#%% +from sympy.utilities.codegen import codegen +seq = (xa, ya, xb, yb, lx, ly) +[(name, code), (h_name, header)] = \ +codegen([('kern_num', kern), + ('dkdx_num', dkdxa), + ('dkdy_num', dkdya), + ('dkdx0_num', dkdxb), + ('dkdy0_num', dkdyb), + ('d2kdxdx0_num', dkdxadxb), + ('d2kdydy0_num', dkdyadyb), + ('d2kdxdy0_num', dkdxadyb), + ('d3kdxdx0dy0_num', d3kdxdx0dy0), + ('d3kdydy0dy0_num', d3kdydy0dy0), + ('d3kdxdy0dy0_num', d3kdxdy0dy0), + ('dkdlx_num', dkdlx), + ('dkdly_num', dkdly), + ('d3kdxdx0dlx_num', d3kdxdx0dlx), + ('d3kdydy0dlx_num', d3kdydy0dlx), + ('d3kdxdy0dlx_num', d3kdxdy0dlx), + ('d3kdxdx0dly_num', d3kdxdx0dly), + ('d3kdydy0dly_num', d3kdydy0dly), + ('d3kdxdy0dly_num', d3kdxdy0dly), + ], "F95", "kernels", + argument_sequence=seq, + header=False, empty=False) +with open(name, 'w') as f: + f.write(code) diff --git a/python/01_pendulum/implicit/kernels.f90 b/python/01_pendulum/implicit/kernels.f90 new file mode 100644 index 0000000..a6a970b --- /dev/null +++ b/python/01_pendulum/implicit/kernels.f90 @@ -0,0 +1,231 @@ +REAL*8 function kern_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +kern_num = exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0*sin(0.5d0*x_a - 0.5d0 & + *x_b)**2/lx**2) +end function +REAL*8 function dkdx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx_num = -0.5d0*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a & + - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy_num = 1.0d0*(-y_a + y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2* & + sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function dkdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx0_num = 0.5d0*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a & + - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy0_num = 1.0d0*(y_a - y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2* & + sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function d2kdxdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdx0_num = 0.25d0*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b) - sin(0.5d0*x_a & + - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp(0.5d0*(-lx**2* & + (y_a - y_b)**2 - ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly** & + 2))/lx**4 +end function +REAL*8 function d2kdydy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdydy0_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp(0.5d0*(-lx**2*(y_a - & + y_b)**2 - ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly & + **4 +end function +REAL*8 function d2kdxdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdy0_num = -0.5d0*(y_a - y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly & + **2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - & + 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/(lx**2*ly**2) +end function +REAL*8 function d3kdxdx0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dy0_num = 0.25d0*(y_a - y_b)*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b) - & + sin(0.5d0*x_a - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp( & + -0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)** & + 2)/(lx**2*ly**2))/(lx**4*ly**2) +end function +REAL*8 function d3kdydy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dy0_num = (3.0d0*ly**2 - (y_a - y_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx & + **2*ly**2))/ly**6 +end function +REAL*8 function d3kdxdy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dy0_num = 0.5d0*(1.0d0*ly**2 - (y_a - y_b)**2)*exp(-0.5d0*(lx**2 & + *(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly & + **2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/(lx** & + 2*ly**4) +end function +REAL*8 function dkdlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdlx_num = 1.0d0*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0*sin(0.5d0*x_a & + - 0.5d0*x_b)**2/lx**2)*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**3 +end function +REAL*8 function dkdly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdly_num = 1.0d0*(y_a - y_b)**2*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0 & + *sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)/ly**3 +end function +REAL*8 function d3kdxdx0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dlx_num = (-0.5d0*lx**4*cos(1.0d0*x_a - 1.0d0*x_b) + lx**2*( & + 0.75d0*cos(1.0d0*x_a - 1.0d0*x_b) + 0.5d0)*sin(0.5d0*x_a - 0.5d0* & + x_b)**2 - 0.25d0*sin(0.5d0*x_a - 0.5d0*x_b)**4*cos(0.5d0*x_a - & + 0.5d0*x_b)**2)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0 & + *x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/lx**7 +end function +REAL*8 function d3kdydy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dlx_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp(-0.5d0*(lx**2*(y_a & + - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))* & + sin(0.5d0*x_a - 0.5d0*x_b)**2/(lx**3*ly**4) +end function +REAL*8 function d3kdxdy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dlx_num = (lx**2 - 0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2)*(y_a - & + y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - & + 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/(lx**5*ly**2) +end function +REAL*8 function d3kdxdx0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dly_num = 0.25d0*(y_a - y_b)**2*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b & + ) - sin(0.5d0*x_a - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)* & + exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0* & + x_b)**2)/(lx**2*ly**2))/(lx**4*ly**3) +end function +REAL*8 function d3kdydy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dly_num = (-2.0d0*ly**4 + 5.0d0*ly**2*(y_a - y_b)**2 - 1.0d0*( & + y_a - y_b)**4)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0 & + *x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**7 +end function +REAL*8 function d3kdxdy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dly_num = (ly**2 - 0.5d0*(y_a - y_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx & + **2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b) & + /(lx**2*ly**5) +end function diff --git a/python/01_pendulum/implicit/kernels.py b/python/01_pendulum/implicit/kernels.py new file mode 100644 index 0000000..06f1f13 --- /dev/null +++ b/python/01_pendulum/implicit/kernels.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun Aug 9 22:30:35 2020 + +@author: manal +""" + +def __bootstrap__(): + global __bootstrap__, __loader__, __file__ + import sys, pkg_resources, imp + __file__ = pkg_resources.resource_filename(__name__,'kernels.cpython-38-x86_64-linux-gnu.so') + __loader__ = None; del __bootstrap__, __loader__ + imp.load_dynamic(__name__,__file__) +__bootstrap__() \ No newline at end of file diff --git a/python/01_pendulum/implicit/main.py b/python/01_pendulum/implicit/main.py new file mode 100644 index 0000000..aa2d948 --- /dev/null +++ b/python/01_pendulum/implicit/main.py @@ -0,0 +1,196 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Sep 29 11:42:52 2020 + +@author: Katharina Rath +""" + +import numpy as np +import random +import ghalton +from scipy.optimize import minimize +from func import (integrate_pendulum, build_K, buildKreg, applymap, nll_chol, energy, quality) +import tkinter +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +from sklearn.metrics import mean_squared_error +import scipy + +#%% init parameters +Nm = 200 #mapping time (Nm = 200 for Fig. 3; Nm = 900 for Fig. 5) +N = 20 #training data (N = 20 for Fig. 3; N = 30 for Fig. 5) +U0 = 1 +nm = 2000 # how often the map should be applied +dtsymp = 0.001 #integration step for producing training data +sig2_n = 1e-12 #noise**2 in observations +Ntest = 15 # number of testpoints + +#define boundaries for training data +qmin = 0.0 +qmax = 2*np.pi +pmin = -3.0 +pmax = 3.0 + +#define boundaries for test data +qminmap = np.array(np.pi - 2.8) +qmaxmap = np.array(np.pi + 1.5) +pminmap = np.array(-2.3) +pmaxmap = np.array(1.8) + +#set initial conditions (q0, p0) randomly for test data +random.seed(1) +q0map = np.linspace(qminmap,qmaxmap,Ntest) +p0map = np.linspace(pminmap,pmaxmap,Ntest) +q0map = random.sample(list(q0map), Ntest) +p0map = random.sample(list(p0map), Ntest) +Q0map = np.array(q0map) +P0map = np.array(p0map) + +#%% calculate training data + +# specify initial conditions (q0, p0), sampled from Halton sequence +seq = ghalton.Halton(2) +samples = seq.get(N)*np.array([qmax-qmin, pmax-pmin]) + np.array([qmin, pmin]) + +q = samples[:,0] +p = samples[:,1] + +t0 = 0.0 # starting time +t1 = dtsymp*Nm # end time +t = np.linspace(t0,t1,Nm) # integration points in time + +#integrate pendulum to provide data +ysint = integrate_pendulum(q, p, t) + +#set training data +# xtrain = (q, P), ztrain = grad F = (p-P, Q-q) +P = np.empty((N)) +Q = np.empty((N)) +for ik in range(0, N): + P[ik] = ysint[ik][1,-1] + Q[ik] = ysint[ik][0,-1] + +zqtrain = Q - q +zptrain = p - P + +xtrain = np.hstack((q, P)) +ztrain = np.concatenate((zptrain, zqtrain)) + + +#%% set up GP +# as indicated in Algorithm 1: Semi-implicit symplectic GP map +#hyperparameter optimization of length scales (lq, lp) +log10l0 = np.array((-1, -1), dtype = float) + +# Step 1: Usual GP regression of P over (q,p) +#fit GP + hyperparameter optimization to have a first guess for newton for P +xtrainp = np.hstack((q, p)) +ztrainp = P + +sigp = 2*np.amax(np.abs(ztrainp))**2 + +def nll_transform2(log10hyp, sig, sig2n, x, y, N): + hyp = 10**log10hyp + builder = lambda x, x0, hyp, K: buildKreg(x, x0, hyp, K, N) + return nll_chol(np.hstack((hyp, sig, [sig2n])), x, y, N, buildK=builder) +res = minimize(nll_transform2, np.array((log10l0)), args = (sigp, sig2_n, xtrainp, ztrainp.T.flatten(), N), method='L-BFGS-B', bounds = ((-10, 1), (-10, 1))) + + +lp = 10**res.x +hypp = np.hstack((lp, sigp)) +print('Optimized lengthscales for regular GP: lq =', "{:.2f}".format(lp[0]), 'lp = ', "{:.2f}".format(lp[1])) +# build K and its inverse +Kp = np.zeros((N, N)) +buildKreg(xtrainp, xtrainp, hypp, Kp) +Kyinvp = scipy.linalg.inv(Kp + sig2_n*np.eye(Kp.shape[0])) + +# Step 2: symplectic GP regression of -Delta p and Delta q over mixed variables (q,P) according to Eq. 41 +# hyperparameter optimization for lengthscales (lq, lp) and GP fitting +sig = 2*np.amax(np.abs(ztrain))**2 + +def nll_transform(log10hyp, sig, sig2n, x, y, N): + hyp = 10**log10hyp + builder = lambda x, x0, hyp, K: build_K(x, x0, hyp, K) + return nll_chol(np.hstack((hyp, sig, [sig2n])), x, y, N, buildK = builder) + +#log 10 -> BFGS +res = minimize(nll_transform, np.array((log10l0)), args = (sig, sig2_n, xtrain, ztrain.T.flatten(), 2*N), method='L-BFGS-B', bounds = ((-10, 1), (-10, 1))) +sol1 = 10**res.x +l = [np.abs(sol1[0]), np.abs(sol1[1])] +print('Optimized lengthscales for mixed GP: lq =', "{:.2f}".format(l[0]), 'lp = ', "{:.2f}".format(l[1])) +#%% +#build K(x,x') and regularized inverse with sig2_n +# K(x,x') corresponds to L(q,P,q',P') given in Eq. (38) +hyp = np.hstack((l, sig)) +K = np.empty((2*N, 2*N)) +build_K(xtrain, xtrain, hyp, K) +Kyinv = scipy.linalg.inv(K + sig2_n*np.eye(K.shape[0])) + +# caluate training error +Eftrain = K.dot(Kyinv.dot(ztrain)) +outtrain = mean_squared_error(ztrain, Eftrain) +print('training error', "{:.1e}".format(outtrain)) + +#%% Application of symplectic map +qmap, pmap = applymap( + nm, Ntest, hyp, hypp, Q0map, P0map, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv) + +#calculate energy +H = energy([qmap, pmap], U0) + +#%% Quality of map +t0 = 0.0 # starting time +t1 = dtsymp*Nm*(nm+20) # end time +t = np.linspace(t0,t1,Nm*(nm+20)) # integration points in time + +#integrate test data +yinttest = np.array(integrate_pendulum(qmap[0,:], pmap[0,:], t)).T +yinttest[:,0,:] = np.mod(yinttest[:,0,:], 2*np.pi) +#%% plot results + +plt.rc('xtick',labelsize=20) +plt.rc('ytick',labelsize=20) + +plt.figure(figsize = [10,3]) +plt.subplot(1,3,1) +for i in range(0, Ntest): + plt.plot(qmap[:,i], pmap[:,i], 'k^', markersize = 0.5) +plt.ylim([-2.8, 2.8]) +plt.xlabel('$q$', fontsize = 20) +plt.ylabel('$p$', fontsize = 20) +plt.tight_layout() + +plt.subplot(1,3,2) +for i in range(0, Ntest): + plt.plot(yinttest[:,0,i], yinttest[:,1,i], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) +plt.ylim([-2.8, 2.8]) +plt.xlabel('$q$', fontsize = 20) +plt.ylabel('$p$', fontsize = 20) +plt.tight_layout() + +plt.subplot(1,3,3) +for i in range(0, Ntest): + plt.plot(yinttest[:,0,i], yinttest[:,1,i], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) + plt.plot(qmap[:,i], pmap[:,i], 'k^', markersize = 0.5) +plt.ylim([-2.8, 2.8]) +plt.xlabel('$q$', fontsize = 20) +plt.ylabel('$p$', fontsize = 20) +plt.tight_layout() +#%% +plt.figure() +for i in range(0, qmap.shape[1]): + plt.plot(np.linspace(0, nm*dtsymp*Nm, nm), H[:,i]/np.mean(H[:,i])) +plt.xlabel('t') +# plt.ylim([0.5, 1.5]) +plt.ylabel(r'H/$\bar{H}$') +plt.title('energyGP') + +#%% +#calculate quality criteria as indicated in Eq. (40) and (41) +Eosc, gd, stdgd = quality(qmap, pmap, H, yinttest, Ntest, Nm) + +print('Geometric distance: ', "{:.1e}".format(np.mean(gd)), u"\u00B1", "{:.1e}".format(stdgd)) +print('Energy oscillation: ', "{:.1e}".format(np.mean(Eosc))) + +plt.show(block=True) \ No newline at end of file diff --git a/python/02_pert_pendulum/Makefile b/python/02_pert_pendulum/Makefile new file mode 100644 index 0000000..e4e38fc --- /dev/null +++ b/python/02_pert_pendulum/Makefile @@ -0,0 +1,13 @@ +FC = gfortran +FFLAGS = -Wall -march=native -O2 -g -fbacktrace +PYTHON = python3 +NAME = kernels + +all: $(NAME).f90 + $(PYTHON) -m numpy.f2py -m $(NAME) -c $(NAME).f90 --f90flags='$(FFLAGS)' -lgomp + +$(NAME).f90: init_func.py + $(PYTHON) init_func.py + +clean: + rm -f *.x *.so *.o *.mod *.pyf *.pyd *.dylib *.dll $(NAME).f90 diff --git a/python/02_pert_pendulum/calc_poincare.py b/python/02_pert_pendulum/calc_poincare.py new file mode 100644 index 0000000..889c0bc --- /dev/null +++ b/python/02_pert_pendulum/calc_poincare.py @@ -0,0 +1,111 @@ +import numpy as np +import ghalton +import time +nm = 200 #map applications +e = 0.5 +om = 0.5 +def zdot(t, z): #perturbed pendulum + x = z[:,0:1] + y = z[:,1:] + xdot=e*(0.3*x*np.sin(2*t) + 0.7*x*np.sin(3*t))+y + ydot=-e*(0.3*y*np.sin(2*t) + 0.7*y*np.sin(3*t)) - om**2*np.sin(x) + return np.hstack([xdot,ydot]) + +#%% +def gen_samples_circle(origin, radius, n_samples): + s_radius = .5*radius**2 + seq = ghalton.Halton(2) + samp = seq.get(n_samples)*np.array([s_radius, 2*np.pi]) + s = samp[:,0] + theta = samp[:,1] + x = origin[0]+np.sqrt(2*s)*np.cos(theta) + y = origin[1]+np.sqrt(2*s)*np.sin(theta) + return np.hstack([x.reshape(n_samples,1),y.reshape(n_samples,1)]) + +def gen_samples_pmap(origin,r1,nics,n_iterations,eps): + rkstep=1500 + latent_samples = gen_samples_circle(origin, r1,nics) + sample = latent_samples + out = np.zeros([nics,2]) + for i in range(n_iterations): + tempbin,sample = rk_pmap(sample,eps,rkstep) + out[:,:] = sample[:,:] + return [out,latent_samples] + + +#%% +def rk_pmap(z,eps,n_rk_steps = 100): + dphi = 2*np.pi/n_rk_steps + phi_current = 0 + z_current = 1.0*z + out = np.zeros([N, 2, n_rk_steps]) + for i in range(n_rk_steps): + k1 = zdot(phi_current, z_current) + k2 = zdot(phi_current + .5*dphi,z_current + .5*dphi*k1) + k3 = zdot(phi_current + .5*dphi,z_current + .5*dphi*k2) + k4 = zdot(phi_current + dphi, z_current + dphi * k3) + z_current = z_current + (1.0/6.0) * dphi * (k1 + 2*k2 + 2*k3 + k4) + phi_current = phi_current + dphi + #print(phi_current) + out[:,:,i] = z_current + return out, z_current + +N = 55 +radius=0.9 +[labels_raw, data_raw] = gen_samples_pmap([0,0], radius, N, 1, e) +rr=(labels_raw[:,0])**2+labels_raw[:,1]**2 +ind=np.argwhere(rr<=radius**2) +n_data=len(ind) +data=np.zeros([n_data,2]) +labels=np.zeros([n_data,2]) +data[:,0:1]=data_raw[ind,0] +data[:,1:2]=data_raw[ind,1] +labels[:,0:1]=labels_raw[ind,0] +labels[:,1:2]=labels_raw[ind,1] + +q = data[:, 0] + np.pi +p = data[:, 1] +Q = labels[:,0] + np.pi +P = labels[:,1] + +zqtrain = Q - q +zptrain = p - P + +xtrain = q.flatten() +ytrain = P.flatten() +xtrain = np.hstack((q, P)).T +ztrain1 = zptrain.flatten() +ztrain2 = zqtrain.flatten() +ztrain = np.concatenate((ztrain1, ztrain2)) + +N = n_data +print('Training data: ', N) + +#%% + +## Test data for perturbed pendulum +nics = 20 +Ntest = int(nics+nics//2) +xic = np.linspace(0.05,0.7,nics).reshape([nics,1]) +yic = 0.0*np.ones([nics,1]) +yic2 = np.linspace(0.3,0.6,nics//2).reshape([nics//2,1]) +xic2 = 0.0*np.ones([nics//2,1]) +zic = np.hstack([np.vstack([xic,xic2]),np.vstack([yic,yic2])]) +qs = zic[:,0] +ps = zic[:,1] +#%% +start = time.time() +yinttest = np.zeros([2,Ntest,nm]) +for i in range(Ntest): + temp = np.array([zic[i]]) + yinttest[:,i,0] = temp + for k in range(nm-1): + tempbin, temp = rk_pmap(temp, e, 100) + yinttest[:,i,k+1] = temp +end = time.time() +print('Time needed RK: ', end-start) + +# transform by + np.pi for symplectic GP map +qs = zic[:,0]+np.pi +ps = zic[:,1] +yinttest[0] = yinttest[0]+np.pi \ No newline at end of file diff --git a/python/02_pert_pendulum/func.py b/python/02_pert_pendulum/func.py new file mode 100644 index 0000000..511525e --- /dev/null +++ b/python/02_pert_pendulum/func.py @@ -0,0 +1,272 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Apr 21 16:47:00 2020 + +@author: Katharina Rath +""" + +import numpy as np +from scipy.optimize import newton +from scipy.linalg import solve_triangular +import scipy +from sklearn.metrics import mean_squared_error + + +from kernels import * + +def f_kern(x, y, x0, y0, l): + return kern_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdx0(x, y, x0, y0, l): + return d2kdxdx0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydy0(x, y, x0, y0, l): + return d2kdydy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdy0(x, y, x0, y0, l): + return d2kdxdy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydx0(x, y, x0, y0, l): + return d2kdxdy0(x, y, x0, y0, l) + +def build_K(xin, x0in, hyp, K): + # set up covariance matrix with derivative observations, Eq. (38) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0]//2 + N0 = K.shape[1]//2 + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = d2kdxdx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,lk] = d2kdxdy0( + x0[lk], y0[lk], x[k], y[k], l) + K[k,N0+lk] = d2kdydx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,N0+lk] = d2kdydy0( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def buildKreg(xin, x0in, hyp, K): + # set up "usual" covariance matrix for GP on regular grid (q,p) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0] + N0 = K.shape[1] + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = f_kern( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def build_dKreg(xin, x0in, hyp): + + l = hyp[:-1] + sig = hyp[-1] + N = len(xin)//2 + N0 = len(x0in)//2 + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + Kp = np.empty((N, N0)) + + dK = [] + for k in range(N): + for lk in range(N0): + Kp[k,lk] = sig*dkdlx_num( + x0[lk], y0[lk], x[k], y[k], l[0], l[1]) + + dK.append(Kp.copy()) + + for k in range(N): + for lk in range(N0): + Kp[k,lk] = sig*dkdly_num( + x0[lk], y0[lk], x[k], y[k], l[0], l[1]) + + dK.append(Kp.copy()) + return dK + +def build_dK(xin, x0in, hyp): + # set up covariance matrix + N = len(xin)//2 + N0 = len(x0in)//2 + l = hyp[:-1] + sig = hyp[-1] + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + k11 = np.empty((N0, N)) + k12 = np.empty((N0, N)) + k21 = np.empty((N0, N)) + k22 = np.empty((N0, N)) + + dK = [] + + for k in range(N0): + for lk in range(N): + k11[k,lk] = sig*d3kdxdx0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k21[k,lk] = sig*d3kdxdy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k12[k,lk] = sig*d3kdxdy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k22[k,lk] = sig*d3kdydy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + + dK.append(np.vstack([ + np.hstack([k11, k12]), + np.hstack([k21, k22]) + ])) + + for k in range(N0): + for lk in range(N): + k11[k,lk] = sig*d3kdxdx0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k21[k,lk] = sig*d3kdxdy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k12[k,lk] = sig*d3kdxdy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k22[k,lk] = sig*d3kdydy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + + dK.append(np.vstack([ + np.hstack([k11, k12]), + np.hstack([k21, k22]) + ])) + #dK[:,:] = sig*dK + return dK + + +def nll_grad_reg(hyp, x, y, N): + K = np.empty((N, N)) + buildKreg(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + Kyinv = np.linalg.inv(Ky) # invert GP matrix + alpha = Kyinv.dot(y) + nlp_val = 0.5*y.T.dot(alpha) + 0.5*np.linalg.slogdet(Ky)[1] + dK = build_dKreg(x, x, hyp[:-1]) + + nlp_grad = np.array([ + -0.5*alpha.T.dot(dK[0].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[0])), + -0.5*alpha.T.dot(dK[1].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[1])) + ]) + + return nlp_val, nlp_grad + +def nll_grad(hyp, x, y, N): + K = np.empty((N, N)) + build_K(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + Kyinv = np.linalg.inv(Ky) # invert GP matrix + alpha = Kyinv.dot(y) + nlp_val = 0.5*y.T.dot(alpha) + 0.5*np.linalg.slogdet(Ky)[1] + dK = build_dK(x, x, hyp[:-1]) + + nlp_grad = np.array([ + -0.5*alpha.T.dot(dK[0].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[0])), + -0.5*alpha.T.dot(dK[1].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[1])) + ]) + + return nlp_val, nlp_grad + + +def gpsolve(Ky, ft): + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_triangular( + L.T, solve_triangular(L, ft, lower=True, check_finite=False), + lower=False, check_finite=False) + + return L, alpha + +# compute log-likelihood according to RW, p.19 +def solve_cholesky(L, b): + return solve_triangular( + L.T, solve_triangular(L, b, lower=True, check_finite=False), + lower=False, check_finite=False) + +# negative log-posterior +def nll_chol(hyp, x, y, N, buildK = build_K): + K = np.empty((N, N)) + build_K(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + L = scipy.linalg.cholesky(Ky, lower = True, check_finite = False) + alpha = solve_cholesky(L, y) + ret = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + return ret + +def guessP(x, y, hypp, xtrainp, ztrainp, Kyinvp, N): + Ntest = 1 + Kstar = np.empty((Ntest, int(len(xtrainp)/2))) + buildKreg(np.hstack((x,y)), xtrainp, hypp, Kstar) + Ef = Kstar.dot(Kyinvp.dot(ztrainp)) + return Ef + +def calcQ(x,y, xtrain, l, Kyinv, ztrain): + # get \Delta q from GP on mixed grid. + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack(([x], [y])), l, Kstar) + qGP = Kstar.T.dot(Kyinv.dot(ztrain)) + dq = qGP[1] + return dq + +def Pnewton(P, x, y, l, xtrain, Kyinv, ztrain): + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack((x, P)), l, Kstar) + pGP = Kstar.T.dot(Kyinv.dot(ztrain)) + f = pGP[0] - y + P + return f + +def calcP(x,y, l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest): + # as P is given in an implicit relation, use newton to solve for P (Eq. (42)) + # use the GP on regular grid (q,p) for a first guess for P + pgss = guessP([x], [y], hypp, xtrainp, ztrainp, Kyinvp, Ntest) + res, r = newton(Pnewton, pgss, full_output=True, maxiter=5, disp=False, + args = (np.array([x]), np.array([y]), l, xtrain, Kyinv, ztrain)) + return res + +def applymap(nm, Ntest, l, hypp, Q0map, P0map, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv): + # Application of symplectic map + + pmap = np.zeros([nm, Ntest]) + qmap = np.zeros([nm, Ntest]) + #set initial conditions + pmap[0,:] = P0map + qmap[0,:] = Q0map + + # loop through all test points and all time steps + for i in range(0,nm-1): + for k in range(0, Ntest): + # set new P including Newton for implicit Eq (42) + pmap[i+1, k] = calcP(qmap[i,k], pmap[i, k], l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest) + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + # then: set new Q via calculating \Delta q and adding q (Eq. (43)) + qmap[i+1, k] = calcQ(qmap[i,k], pmap[i+1,k], xtrain, l, Kyinv, ztrain) + qmap[i+1, k] = np.mod(qmap[i+1,k] + qmap[i, k], 2.0*np.pi) + return qmap, pmap + +def quality(qmap, pmap, H, ysint, Ntest): + #geom. distance + gd = np.zeros([Ntest]) + for lk in range(0,Ntest): + gd[lk] = mean_squared_error(([qmap[1, lk], pmap[1, lk]]),ysint[:, lk, 1]) + stdgd = np.std(gd[:]) + # Energy oscillation + Eosc = np.zeros([Ntest]) + for lk in range(0, Ntest): + Eosc[lk] = np.std(H[:,lk])/np.mean(H[:,lk]) + return Eosc, gd, stdgd + + diff --git a/python/02_pert_pendulum/init_func.py b/python/02_pert_pendulum/init_func.py new file mode 100644 index 0000000..b429bad --- /dev/null +++ b/python/02_pert_pendulum/init_func.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed May 6 14:50:32 2020 + +@author: ert +""" +#%% +from sympy import * +from sympy.utilities.autowrap import autowrap, ufuncify +from sympy.utilities.lambdify import lambdastr, lambdify, implemented_function +import shutil + +tmp = '.' +#try: +# shutil.rmtree(tmp) +#except: +# pass + +#%% # Kernel functions (kernel, 1st and second derivatives) +x, y, xa, ya, xb, yb, l, lx, ly = symbols(r'x y x_a y_a x_b, y_b, l, lx, ly') + + +kern_sqexp = exp(-x**2/(2.0*l**2)) +kern_sqexp_sin = exp(-sin(0.5*x)**2/(2.0*l**2)) +# +kern_x = kern_sqexp_sin.subs(x, xa-xb).subs(l, lx) +kern_y = kern_sqexp.subs(x, ya-yb).subs(l, ly) +kern = ((kern_x)*(kern_y)).simplify() + +dkdxa = diff(kern, xa).simplify() +dkdxb = diff(kern, xb).simplify() +dkdya = diff(kern, ya).simplify() +dkdyb = diff(kern, yb).simplify() +dkdxadxb = diff(kern, xa, xb).simplify() +dkdyadyb = diff(kern, ya, yb).simplify() +dkdxadyb = diff(kern, xa, yb).simplify() + +d3kdxdx0dy0 = diff(kern, xa, xb, yb).simplify() +d3kdydy0dy0 = diff(kern, ya, yb, yb).simplify() +d3kdxdy0dy0 = diff(kern, xa, yb, yb).simplify() + +dkdlx = diff(kern, lx).simplify() +dkdly = diff(kern, ly).simplify() + +d3kdxdx0dlx = diff(kern, xa, xb, lx).simplify() +d3kdydy0dlx = diff(kern, ya, yb, lx).simplify() +d3kdxdy0dlx = diff(kern, xa, yb, lx).simplify() + +d3kdxdx0dly = diff(kern, xa, xb, ly).simplify() +d3kdydy0dly = diff(kern, ya, yb, ly).simplify() +d3kdxdy0dly = diff(kern, xa, yb, ly).simplify() + +#%% +from sympy.utilities.codegen import codegen +seq = (xa, ya, xb, yb, lx, ly) +[(name, code), (h_name, header)] = \ +codegen([('kern_num', kern), + ('dkdx_num', dkdxa), + ('dkdy_num', dkdya), + ('dkdx0_num', dkdxb), + ('dkdy0_num', dkdyb), + ('d2kdxdx0_num', dkdxadxb), + ('d2kdydy0_num', dkdyadyb), + ('d2kdxdy0_num', dkdxadyb), + ('d3kdxdx0dy0_num', d3kdxdx0dy0), + ('d3kdydy0dy0_num', d3kdydy0dy0), + ('d3kdxdy0dy0_num', d3kdxdy0dy0), + ('dkdlx_num', dkdlx), + ('dkdly_num', dkdly), + ('d3kdxdx0dlx_num', d3kdxdx0dlx), + ('d3kdydy0dlx_num', d3kdydy0dlx), + ('d3kdxdy0dlx_num', d3kdxdy0dlx), + ('d3kdxdx0dly_num', d3kdxdx0dly), + ('d3kdydy0dly_num', d3kdydy0dly), + ('d3kdxdy0dly_num', d3kdxdy0dly), + ], "F95", "kernels", + argument_sequence=seq, + header=False, empty=False) +with open(name, 'w') as f: + f.write(code) diff --git a/python/02_pert_pendulum/kernels.f90 b/python/02_pert_pendulum/kernels.f90 new file mode 100644 index 0000000..a6a970b --- /dev/null +++ b/python/02_pert_pendulum/kernels.f90 @@ -0,0 +1,231 @@ +REAL*8 function kern_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +kern_num = exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0*sin(0.5d0*x_a - 0.5d0 & + *x_b)**2/lx**2) +end function +REAL*8 function dkdx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx_num = -0.5d0*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a & + - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy_num = 1.0d0*(-y_a + y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2* & + sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function dkdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx0_num = 0.5d0*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a & + - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy0_num = 1.0d0*(y_a - y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2* & + sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function d2kdxdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdx0_num = 0.25d0*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b) - sin(0.5d0*x_a & + - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp(0.5d0*(-lx**2* & + (y_a - y_b)**2 - ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly** & + 2))/lx**4 +end function +REAL*8 function d2kdydy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdydy0_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp(0.5d0*(-lx**2*(y_a - & + y_b)**2 - ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly & + **4 +end function +REAL*8 function d2kdxdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdy0_num = -0.5d0*(y_a - y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly & + **2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - & + 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/(lx**2*ly**2) +end function +REAL*8 function d3kdxdx0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dy0_num = 0.25d0*(y_a - y_b)*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b) - & + sin(0.5d0*x_a - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp( & + -0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)** & + 2)/(lx**2*ly**2))/(lx**4*ly**2) +end function +REAL*8 function d3kdydy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dy0_num = (3.0d0*ly**2 - (y_a - y_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx & + **2*ly**2))/ly**6 +end function +REAL*8 function d3kdxdy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dy0_num = 0.5d0*(1.0d0*ly**2 - (y_a - y_b)**2)*exp(-0.5d0*(lx**2 & + *(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly & + **2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/(lx** & + 2*ly**4) +end function +REAL*8 function dkdlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdlx_num = 1.0d0*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0*sin(0.5d0*x_a & + - 0.5d0*x_b)**2/lx**2)*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**3 +end function +REAL*8 function dkdly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdly_num = 1.0d0*(y_a - y_b)**2*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0 & + *sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)/ly**3 +end function +REAL*8 function d3kdxdx0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dlx_num = (-0.5d0*lx**4*cos(1.0d0*x_a - 1.0d0*x_b) + lx**2*( & + 0.75d0*cos(1.0d0*x_a - 1.0d0*x_b) + 0.5d0)*sin(0.5d0*x_a - 0.5d0* & + x_b)**2 - 0.25d0*sin(0.5d0*x_a - 0.5d0*x_b)**4*cos(0.5d0*x_a - & + 0.5d0*x_b)**2)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0 & + *x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/lx**7 +end function +REAL*8 function d3kdydy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dlx_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp(-0.5d0*(lx**2*(y_a & + - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))* & + sin(0.5d0*x_a - 0.5d0*x_b)**2/(lx**3*ly**4) +end function +REAL*8 function d3kdxdy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dlx_num = (lx**2 - 0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2)*(y_a - & + y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - & + 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/(lx**5*ly**2) +end function +REAL*8 function d3kdxdx0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dly_num = 0.25d0*(y_a - y_b)**2*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b & + ) - sin(0.5d0*x_a - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)* & + exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0* & + x_b)**2)/(lx**2*ly**2))/(lx**4*ly**3) +end function +REAL*8 function d3kdydy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dly_num = (-2.0d0*ly**4 + 5.0d0*ly**2*(y_a - y_b)**2 - 1.0d0*( & + y_a - y_b)**4)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0 & + *x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**7 +end function +REAL*8 function d3kdxdy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dly_num = (ly**2 - 0.5d0*(y_a - y_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx & + **2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b) & + /(lx**2*ly**5) +end function diff --git a/python/02_pert_pendulum/main.py b/python/02_pert_pendulum/main.py new file mode 100644 index 0000000..3ad0a5b --- /dev/null +++ b/python/02_pert_pendulum/main.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Sep 29 11:42:52 2020 + +@author: Katharina Rath +""" + +import numpy as np +from scipy.optimize import minimize +from func import (build_K, buildKreg, applymap, nll_chol, nll_grad, nll_grad_reg, quality) +import tkinter +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +from sklearn.metrics import mean_squared_error +import scipy +import time +from calc_poincare import ztrain, xtrain, q, p, P, Q, N, qs, ps, Ntest, yinttest, nm +#%% init parameters +U0 = 1 +sig2_n = 1e-12 #noise**2 in observations + +#%% set up GP +# as indicated in Algorithm 1: Semi-implicit symplectic GP map +#hyperparameter optimization of length scales (lq, lp) +lp0 = np.array((0.5, 0.5), dtype = float) + +# Step 1: Usual GP regression of P over (q,p) +#fit GP + hyperparameter optimization to have a first guess for newton for P +xtrainp = np.hstack((q, p)).T +ztrainp = P + +sigp = 2*np.amax(np.abs(ztrainp))**2 + +def nll_transform2(log10hyp, sig, sig2n, x, y, N): + hyp = log10hyp + return nll_grad_reg(np.hstack((hyp, sig, [sig2n])), x, y, N) +res = minimize(nll_transform2, np.array((lp0)), args = (sigp, sig2_n, xtrainp, ztrainp.T.flatten(), N), method='L-BFGS-B', jac = True) + +lp = res.x +hypp = np.hstack((lp, sigp)) +print('Optimized lengthscales for regular GP: lq =', "{:.2f}".format(lp[0]), 'lp = ', "{:.2f}".format(lp[1])) + +# build K and its inverse +Kp = np.zeros((N, N)) +buildKreg(xtrainp, xtrainp, hypp, Kp) +Kyinvp = scipy.linalg.inv(Kp + sig2_n*np.eye(Kp.shape[0])) +#%% +# Step 2: symplectic GP regression of -Delta p and Delta q over mixed variables (q,P) according to Eq. 41 +# hyperparameter optimization for lengthscales (lq, lp) and GP fitting +l0 = np.array((1,1), dtype = float) +sig = 2*np.amax(np.abs(ztrain))**2 +def nll_transform_grad(log10hyp, sig, sig2n, x, y, N): + hyp = log10hyp + return nll_grad(np.hstack((hyp, sig, [sig2n])), x, y, N) + + +res = minimize(nll_transform_grad, np.array((l0)), args = (sig, sig2_n, xtrain, ztrain.T.flatten(), 2*N), method='L-BFGS-B',jac=True) +sol1 = res.x +l = [np.abs(sol1[0]), np.abs(sol1[1])] +print('Optimized lengthscales for mixed GP: lq =', "{:.2f}".format(l[0]), 'lp = ', "{:.2f}".format(l[1])) +print('Opt. Success', res.success) +print('NLL', res.fun) + +#%% +#build K(x,x') and regularized inverse with sig2_n +# K(x,x') corresponds to L(q,P,q',P') given in Eq. (38) +hyp = np.hstack((l, sig)) +K = np.empty((2*N, 2*N)) +build_K(xtrain, xtrain, hyp, K) +Kyinv = scipy.linalg.inv(K + sig2_n*np.eye(K.shape[0])) + +# caluate training error +Eftrain = K.dot(Kyinv.dot(ztrain)) +outtrain = mean_squared_error(ztrain, Eftrain) +print('training error', "{:.1e}".format(outtrain)) + + +#%% Application of symplectic map + +start = time.time() +qmap, pmap = applymap( + nm, Ntest, hyp, hypp, qs, ps, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv) +end = time.time() +print('Time needed SGPR: ', end-start) + + +#%% plot results +plt.figure(figsize = [10,3]) +plt.subplot(1,3,1) +for i in range(0, Ntest): + plt.plot(qmap[:,i], pmap[:,i], 'k^', markersize = 0.5) +plt.xlabel('$q$', fontsize = 20) +plt.ylabel('$p$', fontsize = 20) +plt.tight_layout() + +plt.subplot(1,3,2) +for i in range(0, Ntest): + plt.plot(yinttest[0,i,:], yinttest[1,i,:], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) +plt.xlabel('$q$', fontsize = 20) +plt.ylabel('$p$', fontsize = 20) +plt.tight_layout() + +plt.subplot(1,3,3) +for i in range(0, Ntest): + plt.plot(yinttest[0,i,:], yinttest[1,i,:], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) + plt.plot(qmap[:,i], pmap[:,i], 'k^', markersize = 0.5) +plt.xlabel('$q$', fontsize = 20) +plt.ylabel('$p$', fontsize = 20) +plt.tight_layout() +plt.show(block = True) \ No newline at end of file diff --git a/python/03_henon_heiles/Makefile b/python/03_henon_heiles/Makefile new file mode 100644 index 0000000..fc7ad38 --- /dev/null +++ b/python/03_henon_heiles/Makefile @@ -0,0 +1,4 @@ +include *.mk $(bar) + +clean: + rm -f *.x *.so *.o *.mod *.pyf *.pyd *.dylib *.dll \ No newline at end of file diff --git a/python/03_henon_heiles/func.py b/python/03_henon_heiles/func.py new file mode 100644 index 0000000..7cc6daf --- /dev/null +++ b/python/03_henon_heiles/func.py @@ -0,0 +1,259 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Apr 21 16:47:00 2020 + +@author: Katharina Rath +""" + +import numpy as np +from scipy.optimize import newton +from scipy.linalg import solve_triangular +import scipy +from sklearn.metrics import mean_squared_error + +from kernels_sq import * + +def f_kern(x, y, x0, y0, l): + return kern_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdx0(x, y, x0, y0, l): + return d2kdxdx0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydy0(x, y, x0, y0, l): + return d2kdydy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdy0(x, y, x0, y0, l): + return d2kdxdy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydx0(x, y, x0, y0, l): + return d2kdxdy0(x, y, x0, y0, l) + +def build_K(xin, x0in, hyp, K): + # set up covariance matrix with derivative observations, Eq. (38) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0]//2 + N0 = K.shape[1]//2 + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = d2kdxdx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,lk] = d2kdxdy0( + x0[lk], y0[lk], x[k], y[k], l) + K[k,N0+lk] = d2kdydx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,N0+lk] = d2kdydy0( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def buildKreg(xin, x0in, hyp, K): + # set up "usual" covariance matrix for GP on regular grid (q,p) + # print(hyp) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0] + N0 = K.shape[1] + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = f_kern( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def build_dK(xin, x0in, hyp): + # set up covariance matrix + N = len(xin)//2 + N0 = len(x0in)//2 + l = hyp[:-1] + sig = hyp[-1] + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + k11 = np.empty((N0, N)) + k12 = np.empty((N0, N)) + k21 = np.empty((N0, N)) + k22 = np.empty((N0, N)) + + dK = [] + + for k in range(N0): + for lk in range(N): + k11[k,lk] = sig*d3kdxdx0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k21[k,lk] = sig*d3kdxdy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k12[k,lk] = sig*d3kdxdy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k22[k,lk] = sig*d3kdydy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + + dK.append(np.vstack([ + np.hstack([k11, k12]), + np.hstack([k21, k22]) + ])) + + for k in range(N0): + for lk in range(N): + k11[k,lk] = sig*d3kdxdx0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k21[k,lk] = sig*d3kdxdy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k12[k,lk] = sig*d3kdxdy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k22[k,lk] = sig*d3kdydy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + + dK.append(np.vstack([ + np.hstack([k11, k12]), + np.hstack([k21, k22]) + ])) + + for k in range(N): + for lk in range(N0): + k11[k,lk] = d2kdxdx0( + x0[lk], y0[lk], x[k], y[k], l) + k21[k,lk] = d2kdxdy0( + x0[lk], y0[lk], x[k], y[k], l) + k12[k,lk] = d2kdydx0( + x0[lk], y0[lk], x[k], y[k], l) + k22[k,lk] = d2kdydy0( + x0[lk], y0[lk], x[k], y[k], l) + dK.append(np.vstack([ + np.hstack([k11, k12]), + np.hstack([k21, k22]) + ])) + #dK[:,:] = sig*dK + return dK + +def gpsolve(Ky, ft): + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_triangular( + L.T, solve_triangular(L, ft, lower=True, check_finite=False), + lower=False, check_finite=False) + + return L, alpha + +# compute log-likelihood according to RW, p.19 +def solve_cholesky(L, b): + return solve_triangular( + L.T, solve_triangular(L, b, lower=True, check_finite=False), + lower=False, check_finite=False) + +def nll_chol_reg(hyp, x, y, N): + K = np.empty((N, N)) + buildKreg(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + ret = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + return ret +# negative log-posterior +def nll_chol(hyp, x, y, N, buildK = build_K): + K = np.empty((N, N)) + build_K(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + ret = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + return ret + +def nll_grad(hyp, x, y, N): + K = np.empty((N, N)) + build_K(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + Kyinv = np.linalg.inv(Ky) # invert GP matrix + + # nlp_val = 0.5*y.T.dot(alpha) + 0.5*np.linalg.slogdet(Ky)[1] + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + nlp_val = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + dK = build_dK(x, x, hyp[:-1]) + alpha = Kyinv.dot(y) + # Rasmussen (5.9) + nlp_grad = np.array([ + -0.5*alpha.T.dot(dK[0].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[0])), + -0.5*alpha.T.dot(dK[1].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[1])), + -0.5*alpha.T.dot(dK[1].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[2])) + ]) + # aaKyinv = alpha.T.dot(alpha)-Kyinv + # nlp_grad = np.array([ + # -0.5*np.trace(aaKyinv.dot(dK[0])), + # -0.5*np.trace(aaKyinv.dot(dK[1])), + # -0.5*np.trace(aaKyinv.dot(dK[2])) + # ]) + return nlp_val, nlp_grad + +def guessP(x, y, hypp, xtrainp, ztrainp, Kyinvp, N): + Ntest = 1 + Kstar = np.empty((Ntest, int(len(xtrainp)/2))) + buildKreg(np.hstack((x,y)), xtrainp, hypp, Kstar) + Ef = Kstar.dot(Kyinvp.dot(ztrainp)) + return Ef + +def calcQ(x,y, xtrain, l, Kyinv, ztrain): + # get \Delta q from GP on mixed grid. + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack(([x], [y])), l, Kstar) + qGP = Kstar.T.dot(Kyinv.dot(ztrain)) + dq = qGP[1] + return dq + +def Pnewton(P, x, y, l, xtrain, Kyinv, ztrain): + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack((x, P)), l, Kstar) + pGP = Kstar.T.dot(Kyinv.dot(ztrain)) + f = pGP[0] - y + P + # print(pGP[0]) + return f + +def calcP(x,y, l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest): + # as P is given in an implicit relation, use newton to solve for P (Eq. (42)) + # use the GP on regular grid (q,p) for a first guess for P + pgss = guessP([x], [y], hypp, xtrainp, ztrainp, Kyinvp, Ntest) + res, r = newton(Pnewton, pgss, full_output=True, maxiter=10, disp=True, + args = (np.array([x]), np.array ([y]), l, xtrain, Kyinv, ztrain)) + return res + +def applymap_henon(nm, Ntest, l, hypp, Q0map, P0map, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv): + # Application of symplectic map + #init + pmap = np.zeros([nm, Ntest]) + qmap = np.zeros([nm, Ntest]) + #set initial conditions + pmap[0,:] = P0map + qmap[0,:] = Q0map + # loop through all test points and all time steps + for i in range(0,nm-1): + for k in range(0, Ntest): + # set new P including Newton for implicit Eq. (42) + pmap[i+1, k] = calcP(qmap[i,k], pmap[i, k], l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest) + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + # then: set new Q via calculating \Delta q and adding q (Eq. (43)) + dqmap = calcQ(qmap[i,k], pmap[i+1,k], xtrain, l, Kyinv, ztrain) + qmap[i+1, k] = dqmap + qmap[i, k] + return qmap, pmap + + + +def quality(qmap, pmap, H, ysint, Ntest, Nm): + #geom. distance + gd = np.zeros([Ntest]) + for lk in range(0,Ntest): + gd[lk] = mean_squared_error(([qmap[1, lk], pmap[1, lk]]),ysint[Nm, :, lk]) + stdgd = np.std(gd[:]) + # Energy oscillation + Eosc = np.zeros([Ntest]) + for lk in range(0, Ntest): + Eosc[lk] = np.std(H[:,lk])/np.mean(H[:,lk]) + return Eosc, gd, stdgd diff --git a/python/03_henon_heiles/henon.f90 b/python/03_henon_heiles/henon.f90 new file mode 100644 index 0000000..dfb6307 --- /dev/null +++ b/python/03_henon_heiles/henon.f90 @@ -0,0 +1,89 @@ +program main + use henon, only: n, ncut + implicit none + + real(8) :: z0(n) ! Initial conditions + real(8) :: tcut(ncut), zcut(n, ncut) ! Cut time and phase-position + integer :: icut ! Cut count + + z0(1) = 0d0 + z0(2) = 0.2d0 + z0(3) = 0.1d0 + z0(4) = 0.1d0 + + call integrate(z0, tcut, zcut, icut) +end program main + + +subroutine tstep(n, t, z, dzdt) + use henon, only: w1, w2, lam ! read-only + implicit none + + integer, intent(in) :: n + real(8), intent(in) :: t + real(8), intent(in) :: z(n) + real(8), intent(out) :: dzdt(n) + + dzdt(3) = -w1 * z(1) - 2*lam*z(1)*z(2) + dzdt(4) = -w2 * z(2) - lam*(z(1)**2 - z(2)**2) + dzdt(1) = w1 * z(3) + dzdt(2) = w2 * z(4) +end subroutine tstep + + +subroutine fcut(n, t, z, ng, gout) + implicit none +! For finding roots for Poincare cuts + + integer, intent(in) :: n, ng + real(8), intent(in) :: t, z(n) + real(8), intent(out) :: gout(ng) + + gout(1) = z(1) ! Find cut z(1) == 0 +end subroutine fcut + + +subroutine integrate(z0, tcut, zcut, icut) + use henon, only: n, ncut, tmax, tmacro ! read-only + use dvode_f90_m, only: vode_opts, set_normal_opts, dvode_f90 + implicit none + + external tstep, fcut + + real(8), intent(in) :: z0(n) ! Initial conditions + real(8), intent(out) :: tcut(ncut), zcut(n, ncut) ! Saving cuts + + integer, intent(out) :: icut ! Cut counter + real(8) :: t1, t2, z(n) ! Current timestep + + ! Options for VODE + real(8) :: atol(n), rtol, rstats(22) + integer(4) :: itask, istate, istats(31), nevents + type (vode_opts) :: options + + ! Set VODE options + rtol = 1d-12 + atol = 1d-13 + itask = 1 + istate = 1 + nevents = 1 ! One event for z(1) == 0 + options = set_normal_opts(abserr_vector=atol, relerr=rtol, nevents=nevents) + + z = z0 + icut = 0 + + do + t1 = t2 + t2 = t1 + tmacro + ! g_fcn specifies events + call dvode_f90(tstep, n, z, t1, t2, itask, istate, options, g_fcn = fcut) + + if (istate == 3 .and. z(3) > 0) then + icut = icut + 1 + if (icut > ncut) exit + tcut(icut) = t2 + zcut(:, icut) = z + end if + if (t2 > tmax) exit + end do +end subroutine integrate diff --git a/python/03_henon_heiles/henon_mod.f90 b/python/03_henon_heiles/henon_mod.f90 new file mode 100644 index 0000000..4ca5a52 --- /dev/null +++ b/python/03_henon_heiles/henon_mod.f90 @@ -0,0 +1,15 @@ +module henon + implicit none + save + + integer, parameter :: n = 4 ! Number of phase-space dimensions + integer, parameter :: ncut = 5024 ! Maximum number of cuts + + real(8) :: E_bound = 1d0/12d0 + real(8) :: lam = 1d0 + real(8) :: w1 = 1d0 + real(8) :: w2 = 1d0 + + real(8) :: tmacro = 1d0 ! Macrosteps for integration + real(8) :: tmax = 200d0 ! Maximum integration time +end diff --git a/python/03_henon_heiles/init_func.py b/python/03_henon_heiles/init_func.py new file mode 100644 index 0000000..8e259b3 --- /dev/null +++ b/python/03_henon_heiles/init_func.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed May 6 14:50:32 2020 + +@author: ert +""" +#%% +from sympy import * +from sympy.utilities.autowrap import autowrap, ufuncify +from sympy.utilities.lambdify import lambdastr, lambdify, implemented_function +import shutil + +tmp = '.' +#try: +# shutil.rmtree(tmp) +#except: +# pass + +#%% # Kernel functions (kernel, 1st and second derivatives) +x, y, xa, ya, xb, yb, l, lx, ly = symbols(r'x y x_a y_a x_b, y_b, l, lx, ly') + + +kern_sqexp = exp(-x**2/(2.0*l**2)) +# +kern_x = kern_sqexp.subs(x, xa-xb).subs(l, lx) +kern_y = kern_sqexp.subs(x, ya-yb).subs(l, ly) +kern = ((kern_x)*(kern_y)).simplify() + +dkdxa = diff(kern, xa).simplify() +dkdxb = diff(kern, xb).simplify() +dkdya = diff(kern, ya).simplify() +dkdyb = diff(kern, yb).simplify() +dkdxadxb = diff(kern, xa, xb).simplify() +dkdyadyb = diff(kern, ya, yb).simplify() +dkdxadyb = diff(kern, xa, yb).simplify() + +d3kdxdx0dy0 = diff(kern, xa, xb, yb).simplify() +d3kdydy0dy0 = diff(kern, ya, yb, yb).simplify() +d3kdxdy0dy0 = diff(kern, xa, yb, yb).simplify() + +dkdlx = diff(kern, lx).simplify() +dkdly = diff(kern, ly).simplify() + +d3kdxdx0dlx = diff(kern, xa, xb, lx).simplify() +d3kdydy0dlx = diff(kern, ya, yb, lx).simplify() +d3kdxdy0dlx = diff(kern, xa, yb, lx).simplify() + +d3kdxdx0dly = diff(kern, xa, xb, ly).simplify() +d3kdydy0dly = diff(kern, ya, yb, ly).simplify() +d3kdxdy0dly = diff(kern, xa, yb, ly).simplify() + +#%% +from sympy.utilities.codegen import codegen +seq = (xa, ya, xb, yb, lx, ly) +[(name, code), (h_name, header)] = \ +codegen([('kern_num', kern), + ('dkdx_num', dkdxa), + ('dkdy_num', dkdya), + ('dkdx0_num', dkdxb), + ('dkdy0_num', dkdyb), + ('d2kdxdx0_num', dkdxadxb), + ('d2kdydy0_num', dkdyadyb), + ('d2kdxdy0_num', dkdxadyb), + ('d3kdxdx0dy0_num', d3kdxdx0dy0), + ('d3kdydy0dy0_num', d3kdydy0dy0), + ('d3kdxdy0dy0_num', d3kdxdy0dy0), + ('dkdlx_num', dkdlx), + ('dkdly_num', dkdly), + ('d3kdxdx0dlx_num', d3kdxdx0dlx), + ('d3kdydy0dlx_num', d3kdydy0dlx), + ('d3kdxdy0dlx_num', d3kdxdy0dlx), + ('d3kdxdx0dly_num', d3kdxdx0dly), + ('d3kdydy0dly_num', d3kdydy0dly), + ('d3kdxdy0dly_num', d3kdxdy0dly), + ], "F95", "kernels_sq", + argument_sequence=seq, + header=False, empty=False) +with open(name, 'w') as f: + f.write(code) diff --git a/python/03_henon_heiles/kernels_sq.f90 b/python/03_henon_heiles/kernels_sq.f90 new file mode 100644 index 0000000..755b2d2 --- /dev/null +++ b/python/03_henon_heiles/kernels_sq.f90 @@ -0,0 +1,217 @@ +REAL*8 function kern_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +kern_num = exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0*(x_a - x_b)**2/lx**2) +end function +REAL*8 function dkdx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx_num = 1.0d0*(-x_a + x_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2*( & + x_a - x_b)**2)/(lx**2*ly**2))/lx**2 +end function +REAL*8 function dkdy_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy_num = 1.0d0*(-y_a + y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2*( & + x_a - x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function dkdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx0_num = 1.0d0*(x_a - x_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2*( & + x_a - x_b)**2)/(lx**2*ly**2))/lx**2 +end function +REAL*8 function dkdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy0_num = 1.0d0*(y_a - y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2*( & + x_a - x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function d2kdxdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdx0_num = 1.0d0*(lx**2 - (x_a - x_b)**2)*exp(0.5d0*(-lx**2*(y_a - & + y_b)**2 - ly**2*(x_a - x_b)**2)/(lx**2*ly**2))/lx**4 +end function +REAL*8 function d2kdydy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdydy0_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp(0.5d0*(-lx**2*(y_a - & + y_b)**2 - ly**2*(x_a - x_b)**2)/(lx**2*ly**2))/ly**4 +end function +REAL*8 function d2kdxdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdy0_num = -1.0d0*(x_a - x_b)*(y_a - y_b)*exp(-0.5d0*(lx**2*(y_a - & + y_b)**2 + ly**2*(x_a - x_b)**2)/(lx**2*ly**2))/(lx**2*ly**2) +end function +REAL*8 function d3kdxdx0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dy0_num = (lx**2 - 1.0d0*(x_a - x_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*(x_a - x_b)**2)/(lx**2*ly**2))/(lx & + **4*ly**2) +end function +REAL*8 function d3kdydy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dy0_num = (3.0d0*ly**2 - (y_a - y_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*(x_a - x_b)**2)/(lx**2*ly**2))/ly** & + 6 +end function +REAL*8 function d3kdxdy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dy0_num = (1.0d0*ly**2 - (y_a - y_b)**2)*(x_a - x_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*(x_a - x_b)**2)/(lx**2*ly**2))/(lx & + **2*ly**4) +end function +REAL*8 function dkdlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdlx_num = 1.0d0*(x_a - x_b)**2*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0 & + *(x_a - x_b)**2/lx**2)/lx**3 +end function +REAL*8 function dkdly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdly_num = 1.0d0*(y_a - y_b)**2*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0 & + *(x_a - x_b)**2/lx**2)/ly**3 +end function +REAL*8 function d3kdxdx0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dlx_num = (-2.0d0*lx**4 + 5.0d0*lx**2*(x_a - x_b)**2 - 1.0d0*( & + x_a - x_b)**4)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*(x_a - & + x_b)**2)/(lx**2*ly**2))/lx**7 +end function +REAL*8 function d3kdydy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dlx_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*(x_a - x_b)**2*exp( & + -0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*(x_a - x_b)**2)/(lx**2*ly**2 & + ))/(lx**3*ly**4) +end function +REAL*8 function d3kdxdy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dlx_num = (2.0d0*lx**2 - 1.0d0*(x_a - x_b)**2)*(x_a - x_b)*(y_a & + - y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*(x_a - x_b)**2)/( & + lx**2*ly**2))/(lx**5*ly**2) +end function +REAL*8 function d3kdxdx0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dly_num = 1.0d0*(lx**2 - (x_a - x_b)**2)*(y_a - y_b)**2*exp( & + -0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*(x_a - x_b)**2)/(lx**2*ly**2 & + ))/(lx**4*ly**3) +end function +REAL*8 function d3kdydy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dly_num = (-2.0d0*ly**4 + 5.0d0*ly**2*(y_a - y_b)**2 - 1.0d0*( & + y_a - y_b)**4)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*(x_a - & + x_b)**2)/(lx**2*ly**2))/ly**7 +end function +REAL*8 function d3kdxdy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dly_num = (2.0d0*ly**2 - 1.0d0*(y_a - y_b)**2)*(x_a - x_b)*(y_a & + - y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*(x_a - x_b)**2)/( & + lx**2*ly**2))/(lx**2*ly**5) +end function diff --git a/python/03_henon_heiles/main.py b/python/03_henon_heiles/main.py new file mode 100644 index 0000000..c2bb3f5 --- /dev/null +++ b/python/03_henon_heiles/main.py @@ -0,0 +1,227 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Sep 29 11:42:52 2020 + +@author: Katharina Rath +""" + +import numpy as np +import random +import ghalton +from scipy.optimize import minimize +from func import (build_K, buildKreg, applymap_henon, nll_chol, nll_grad) +import tkinter +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +from sklearn.metrics import mean_squared_error +import scipy +from mpl_toolkits.mplot3d import Axes3D +import henon +#%% init parameters +N = 55 #training data +nm = 500 #map application +sig2_n = 1e-12 #noise**2 in observations +Ntest = 37 # number of testpoints + +#%% calculate training data +E_bound = 0.01 +lam = 1.0 +def energy(x): + H = 1/2*(x[0]**2+x[2]**2) + 1/2*(x[1]**2+x[3]**2) + lam*(x[0]**2*x[1] - x[1]**3/3) + return H +def ebound(x): + Eb = 0.5*x[1]**2+0.5*x[0]**2-lam*1/3*x[0]**3 + return Eb +def calc_qdot(x): + return np.sqrt(2*E_bound-x[1]**2-x[0]**2+lam*2/3*x[0]**3) + +seq = ghalton.Halton(2) +samples_all = seq.get(2*N)*np.array([0.3, 0.3]) + np.array([-0.15, -0.15]) + +samples = [] +for i in range(0, len(samples_all)): + if ebound(samples_all[i, :]) < E_bound: + samples.append(samples_all[i,:]) + +print('N =', N) +samples = np.array(samples)[0:N] +qdot = calc_qdot(samples[0:N].T) +samples = np.stack((np.zeros([N]),samples[0:N,0], qdot, samples[0:N,1])).T + +#%% +t0 = 0.0 # starting time +t1 = 1000 # end time + +# Setting module options +henon.henon.lam = lam +henon.henon.tmax = t1 + +# Tracing orbits for training data +out = [] +fout = [] +tout = [] +plt.figure() +for ipart in range(N): + z0 = samples[ipart, :] + tcut, zcut, icut = henon.integrate(z0) + out.append([tcut[:icut], zcut[:,:icut]]) + fout.append(zcut[:,:icut]) + tout.append(tcut[:icut]) + plt.plot(out[-1][1][1,:], out[-1][1][3,:], ',') + + +#%% cut list to minimum number of poincare sections +lenp1 = nm +for i in range(0,N): + lenp = fout[i].shape[1] + if lenp < lenp1: + lenp1 = lenp + +f = np.zeros((N, 4, lenp1)) +tev = np.zeros((N, lenp1)) +for i in range(0,N): + f[i,:,:] = fout[i][:, 0:lenp1] + tev[i, :] = tout[i][0:lenp1] + +#set training data +# xtrain = q, ytrain = P, ztrain = grad F = (p-P, Q-q) +# rescale data for better hyperparameter optimization +q = f[:, 1, 0]*1e2 +p = f[:, 3, 0]*1e2 +Q = f[:, 1, 1]*1e2 +P = f[:, 3, 1]*1e2 + +zqtrain = Q - q +zptrain = p - P + +ztrain = np.hstack((zptrain, zqtrain)) +xtrain = np.hstack((q, P)) + +zqtrain = Q - q +zptrain = p - P + +xtrain = np.hstack((q, P)) +ztrain = np.concatenate((zptrain, zqtrain)) + +random.seed(1) +q0map = np.linspace(-0.1,0.1,Ntest) +p0map = np.linspace(-0.1,0.1,Ntest) +q0map = random.sample(list(q0map), Ntest) +random.seed(0) +p0map = random.sample(list(p0map), Ntest) +Q0map = np.array(q0map)*1e2 +P0map = np.array(p0map)*1e2 +test_samples = np.zeros([Ntest,2]) +test_samples[:,0] = np.array(q0map) +test_samples[:,1] = np.array(p0map) + +#%% set up GP +# as indicated in Algorithm 1: Semi-implicit symplectic GP map +#hyperparameter optimization of length scales (lq, lp) + +log10l0 = np.array((0,0), dtype = float) + +# Step 1: Usual GP regression of P over (q,p) +#fit GP + hyperparameter optimization to have a first guess for newton for P +xtrainp = np.hstack((q, p)) +ztrainp = P-p + +sigp = 2*np.amax(np.abs(ztrainp))**2 + +def nll_transform2(log10hyp, sig, sig2n, x, y, N): + hyp = 10**log10hyp + builder = lambda x, x0, hyp, K: buildKreg(x, x0, hyp, K, N) + return nll_chol(np.hstack((hyp, sig, [sig2n])), x, y, N, buildK=builder) +res = minimize(nll_transform2, np.array((log10l0)), args = (sigp, sig2_n, xtrainp, ztrainp.T.flatten(), N), method='L-BFGS-B', bounds = ((-10, 1), (-10, 1))) +print(res.success) + +lp = 10**res.x +hypp = np.hstack((lp, sigp)) +print('Optimized lengthscales for regular GP: lq =', "{:.2f}".format(lp[0]), 'lp = ', "{:.2f}".format(lp[1])) +# build K and its inverse +Kp = np.zeros((N, N)) +buildKreg(xtrainp, xtrainp, hypp, Kp) +Kyinvp = scipy.linalg.inv(Kp + sig2_n*np.eye(Kp.shape[0])) +#%% +# Step 2: symplectic GP regression of -Delta p and Delta q over mixed variables (q,P) according to Eq. 41 +# hyperparameter optimization for lengthscales (lq, lp) and GP fitting +sig = 2*np.amax(np.abs(ztrain))**2 +log10l0 = np.array((0,0), dtype = float) +def nll_transform_grad(log10hyp, sig, sig2n, x, y, N): + hyp = 10**log10hyp + # hyp = log10hyp + out = nll_grad(np.hstack((hyp, sig, [sig2n])), x, y, N) + return out[0] + +#log 10 -> BFGS +res = minimize(nll_transform_grad, np.array((-1,-1)), args = (sig, sig2_n, xtrain, ztrain.T.flatten(), 2*N), method='L-BFGS-B', tol= 1e-8, bounds = ((-2, 2), (-2, 2)))#, +sol1 = 10**res.x +l = [np.abs(sol1[0]), np.abs(sol1[1])] +print('Optimized lengthscales for mixed GP: lq =', "{:.2f}".format(l[0]), 'lp = ', "{:.2f}".format(l[1]), 'sig = ', "{:.2f}".format(sig)) + +#%% +#build K(x,x') and regularized inverse with sig2_n +# K(x,x') corresponds to L(q,P,q',P') +hyp = np.hstack((l, sig)) +K = np.empty((2*N, 2*N)) +build_K(xtrain, xtrain, hyp, K) +Kyinv = scipy.linalg.inv(K + sig2_n*np.eye(K.shape[0])) + +# caluate training error +Eftrain = K.dot(Kyinv.dot(ztrain)) +outtrain = mean_squared_error(ztrain, Eftrain) +print('training error', "{:.1e}".format(outtrain)) + +#%% Application of symplectic map +qmap, pmap = applymap_henon( + nm, Ntest, hyp, hypp, Q0map, P0map, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv) + +# Tracing orbits for reference plot +henon.henon.tmax = 5000 +fqdot = calc_qdot(test_samples.T) +testsamples = np.stack((np.zeros(Q0map.shape), test_samples[:,0], fqdot, test_samples[:,1])).T +foutmap = [] +for ipart in range(0,Ntest): + z0 = testsamples[ipart, :] + tcut, zcut, icut = henon.integrate(z0) + foutmap.append(zcut[:,:icut]) + +#%% +lenp1 = nm +for i in range(0,Ntest): + lenp = foutmap[i].shape[1] + if lenp < lenp1: + lenp1 = lenp + +fintmap = np.zeros((Ntest, 4, lenp1)) +for i in range(0,Ntest): + fintmap[i,:,:] = foutmap[i][:, 0:lenp1] + +#%% plot results + +plt.rc('xtick',labelsize=15) +plt.rc('ytick',labelsize=15) + +plt.figure(figsize = [10,3]) +plt.subplot(1,3,1) +for i in range(0, Ntest): + plt.plot(1e-2*qmap[:,i], 1e-2*pmap[:,i], 'k^', label = 'GP', markersize = 0.5) +plt.xlabel(r"$q_2$", fontsize = 20) +plt.ylabel(r"$p_2$", fontsize = 20) +plt.tight_layout() +plt.subplot(1,3,2) +for i in range(0, Ntest): + plt.plot(fintmap[i, 1,:], fintmap[i, 3,:], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) +plt.xlabel(r"$q_2$", fontsize = 20) +plt.ylabel(r"$p_2$", fontsize = 20) +plt.tight_layout() +plt.subplot(1,3,3) +for i in range(0, Ntest): + plt.plot(fintmap[i, 1,:], fintmap[i, 3,:], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) + plt.plot(1e-2*qmap[:,i], 1e-2*pmap[:,i], 'k^', label = 'GP', markersize = 0.5) +plt.xlabel(r"$q_2$", fontsize = 20) +plt.ylabel(r"$p_2$", fontsize = 20) +plt.tight_layout() + +plt.show(block = True) \ No newline at end of file diff --git a/python/03_henon_heiles/make_henon.mk b/python/03_henon_heiles/make_henon.mk new file mode 100644 index 0000000..395547d --- /dev/null +++ b/python/03_henon_heiles/make_henon.mk @@ -0,0 +1,9 @@ +sources = vode/dvode_f90_m.f90 henon_mod.f90 henon.f90 + +all: henon.*.so + +henon.*.so: henon.pyf + python3 -m numpy.f2py -m henon -c henon.pyf $(sources) + +henon.pyf: henon_mod.f90 henon.f90 + python3 -m numpy.f2py henon_mod.f90 henon.f90 -m henon -h henon.pyf --overwrite-signature \ No newline at end of file diff --git a/python/03_henon_heiles/make_kernels.mk b/python/03_henon_heiles/make_kernels.mk new file mode 100644 index 0000000..7bd1f44 --- /dev/null +++ b/python/03_henon_heiles/make_kernels.mk @@ -0,0 +1,10 @@ +FC = gfortran +FFLAGS = -Wall -march=native -O2 -g -fbacktrace +PYTHON = python3 +NAME = kernels_sq + +all: $(NAME).f90 + $(PYTHON) -m numpy.f2py -m $(NAME) -c $(NAME).f90 --f90flags='$(FFLAGS)' -lgomp + +$(NAME).f90: init_func.py + $(PYTHON) init_func.py \ No newline at end of file diff --git a/python/03_henon_heiles/test_henon.py b/python/03_henon_heiles/test_henon.py new file mode 100644 index 0000000..bf1daca --- /dev/null +++ b/python/03_henon_heiles/test_henon.py @@ -0,0 +1,19 @@ +import numpy as np +import matplotlib.pyplot as plt +import henon + +N = 10 + +# Setting module options +henon.henon.lam = 0.5 +henon.henon.tmax = 1000.0 + +out = [] + +# Tracing orbits +for ipart in range(N): + z0 = np.random.rand(4) - 0.5 + z0[0] = 0.0 + tcut, zcut, icut = henon.integrate(z0) + out.append([tcut[:icut], zcut[:,:icut]]) + plt.plot(out[-1][1][1,:], out[-1][1][3,:], '.') diff --git a/python/03_henon_heiles/vode/ChangeRecord.txt b/python/03_henon_heiles/vode/ChangeRecord.txt new file mode 100644 index 0000000..a53458f --- /dev/null +++ b/python/03_henon_heiles/vode/ChangeRecord.txt @@ -0,0 +1,356 @@ +!_______________________________________________________________________ +! DVODE_F90 Conversion Change Record +! ST 08/23/13 +! Add argument INTENT to example1.f90 and example2.f90 +! Revise quickstart instructions +! ST 06/15/04: +! Convert test program dtest.f to dtest.f90 +! Use unix2dos on vode.f +! Use Metcalf converter to produce dvode_1.f90 from vode.f +! Change some of the Metcalf formatting +! Trim trailing blanks to produce dvode_2.f90 +! ST 06/16/04: +! Dimension RPAR, IPAR, ATOL, ITOL as(*) or (1) in the documentation +! prologue and in dtest.f to produce dvode_3.f90 +! Convert DVNORM to a subroutine DVNORMST to produce dvode_4.f90 +! Convert IXSAV to a subroutine IXSAVST to produce dvode_5.f90 +! Replace DUMACH and DUMSET by F90 intrinsic calculation to +! produce dvode_6.f90 +! Convert IUMACH to subroutine IUMACHST to produce dvode_.7f90 +! Convert function DDOT to subroutine DDOTST +! Convert function IDAMAX to subroutine IDAMAXST to produce +! dvode_.9f90 +! Note: DVNORM, IXSAV, IUMACH, DDOT, IDAMAX later changed +! back to original forms (see 06/23/04 below) +! Convert dvode_9.f90 to a module DVODE_M.F90=dvode_10.f90 +! Convert dvode_10.f90 to dvode_11.f90 with dynamic storage +! allocation for the nonuser portions of the RWORK and IWORK +! arrays. +! Deallocate work arrays at the start of new problems (ISTATE=1) +! to produce dvode_12.f90 +! ST 06/17/04: +! Remove LRWUSER and LIWUSER from call list. Set internally to +! 22 and 30, respectively. +! Set lengths LRW and LIW of the allocatable works arrays +! according to value of MF and MAXORD and verify that the +! lengths of the arrays allocated are identical to +! original minimum sizes in dvode.f. Inserted IMPLICIT NONE +! and necessary declarations in all routines to produce +! dvode13.f90 +! Get rid of warning messages for conditional arithmetic +! tests on equality to produce dvode14.f90 +! Replace LE, GE, LT, GT, NE, EQ with <=, >=, <, >, /=, == +! to produce dvode15.f90 +! Miscellaneous cleanup to produce dvode16.f90 +! ST 06/18/04: +! Move DATA constants to PRIVATE section and eliminated the +! corresponding SAVE statements to produce dvode17.f90 +! Move the DVOD02 common block to PRIVATE section to +! produce dvode18.f90 +! Move the DVOD01 common block to PRIVATE section to +! produce dvode19.f90 +! Modify DVSRCO to reflect above changes to produce dvode20.f90 +! Add interface DVINDY_USER to allow the user to interpolate +! the derivative to produce dvode21.f90 +! ST 06/19/04: +! Add SET_OPTS function and GET_STATS subroutine. +! Add PUBLIC data type VODE_OPTS +! Remove MF, ITOL, ATOL, RTOL, IOPT, RUSER, IUSER from dvode +! argument list (passed via fields in OPTS) +! Add OPTS to dvode call list to produce dvode22.f90 +! ST 06/20/04: +! Add INTERFACE VODE_F90 to allow OPTIONS and JAC to be +! OPTIONAL, to produce dvode_f90_m23.f90 +! ST 06/21/04 +! Add short summary of usage for DVODE_F90 +! Eliminate RPAR and IPAR +! Modify prologue example programs to reflect changes +! ST 06/22/04 +! Make necessary changes to accommodate ISTATE=3 calls +! to produce dvode_f90_m24.f90 ** not yet tested ** +! ST 06/23/04 +! Reactivate DVNLSD in its original form (passed as argument) +! to produce (v25) +! Reactivate DVNORM in its original form (as a function) (v26) +! Reactivate DDOT in its original form (as a function) (v27) +! Reactivate IDAMAX in its original form (as a function) (v28) +! Reactivate IXSAV and IUMACH in their original form (as +! functions) to produce dvode_f90_m29.f90 +! Convert new PRINT statements to (terminal) XERRDV error +! messages +! Debug INDEX = 3 changes to produce dvode_f90_m29.f90 +! (test program = check3.f90) +! ST 06/23/04 +! Change name of DYINDY to DVINDY_CORE and DYINDY_USER to +! DVINDY to produce dvode_f90_m31.f90 +! ST 06/29/04 +! Add routine SET_IAJA to calculate sparse pointer arrays +! to produce dvode_f90_m32.f90 +! ST 07/02/04 +! Rewrite SET_OPTS to produce dvode_f90_m34.f90 +! Test various input options in demo34.f90 +! GB 07/05/04 +! Change AE/RE options to ABSERR/RELERR +! ST 07/18/04 +! Finish first stab at implementing MA28 +! ST 07/26/04 +! Finish second stab at implementing MA28 and MOSS +! ST 07/27/04 +! Finish third stab at implementing MA28 and MOSS +! ST 07/28/04 +! Test sparse option with the column humidification problem +! ST 07/29/04 +! Adapt LCHECK and LROOTS subroutines from LSODAR and add +! to DVODE_F90 +! ST 07/31/04 +! Make JAC and GFUN optional; the VODE_F90 interface calls +! DVODE in one of four ways depending on presence of +! JAC and/or GFUN +! Error if SET_OPTS not called before DVODE_F90 called +! for the first time +! ST 07/31/04 +! First stab at root finding +! ST 08/02/04 +! Finish implementation of LSODAR root finding and adapt +! LSODAR demo problems for vode_f90 solution +! ST 08/05/04 +! Implement temporary changes for optional enforcement of +! nonnegativity constraints on the solution +! ST 08/06/04 +! Replace some labelled DO's and some loops +! Modify calls to DVODE_F90 in the test programs and +! and verify they work correctly +! First cut at printing warning message when |y| < abserr +! ST 08/07/04 +! Implement nonnegativity option in SET_OPTS and remove +! temporary coding from DVODE_F90 +! ST 08/08/04 +! Remove temporary subroutine DVSTEPS (copy of DVSTEP for +! sparse solutions) +! Remove dummy PSOL from calls lists for DVSTEP, DVNLSD, +! and DVNLSS +! ST 08/08/04 +! Add short summary of typical OPTIONS settings +! Run source thru a spell checker +! Take another whack at the documentation prologue for DVODE_F90 +! ST 08/11/04 +! Solve Robertson problem with nonnegativity, dense and sparse tests +! Change how sparse pointers arrays determined via derivative +! calls in DVPREPS to be the same as elsewhere +! Add easy to use version of SET_OPTS named mortal_opts +! Take another whack at the documentation prologue for DVODE_F90 +! ST 08/12/04 +! Add argument Called_From_Where to CHECK_STAT to indicate where +! a storage allocation/deallocation occurred +! Add subroutine RELEASE_ARRAYS which may be called to deallocate +! the arrays allocated by DVODE_F90 and to determine how much +! storage was used +! ST 08/13/04 +! With the nonnegativity option, project both y and y' +! Whack on documentation prologues +! Temporary subroutine CHECK_MESSAGES to write all error messages +! Renumber error messages +! ST 08/16/04 +! Cleaner stab at nonnegativity +! Temporarily change DVINDY/DVINDY_CORE not to enforce negativity +! ST 08/17/04 +! Add DVINDY_BNDS for use by the user callable DVINDY to enforce +! nonnegativity +! Add INTENT attribute to all subroutine and function arguments +! ST 08/18/04 +! Reorganize type declarations +! Make cosmetic header changes +! Eliminate NMES from XERRDV and change name to XERRDV +! ST 09/05/04 +! Stopped being stupid and got nonnegativity to work +! ST 09/07/04 +! Arbitrary bounds enforcement +! ST 10/26/04 +! Get rid of some compiler warning messages about tests of equality +! ST 11/03/04 +! Fix an problem in DVROOTS involving JROOT +! ST 11/11/04 +! Remove mortal_opts function +! ST 12/25/04-12/28/04 +! Implement HSL MA48 +! ST 01/05/05 +! Separate MA28 and MA48 versions +! ST 01/09/05 +! MA28 Hollerith formats to strings +! MA28 lower case +! 0 default for lp/mp in MA28 +! ST 01/11/05 +! Single precision version of MA28 +! Save TSAVE in blocks G,H in DVODE +! Change DOUBLE PRECISION to REAL(KIND=WP) +! Single precision version SVODE_F90 of DVODE_F90 +! D* to E* in SVODE_F90, *=0,...,9,-,+ +! Some D's replaced by d's to make global replacements possible +! D to S in BLAS routines and rest of SVODE_F90 +! ST 01/11/05 +! Change definition of UMAX in single precision version +! ST 02/09/05 +! Reorganize documentation prologue +! ST 03/30/05 +! Implement Alan's fix for root too close to X2 in root finder +! ST 05/11/05 +! Implement Alan's fix for testing for equality of T in +! DVINDY_CORE and DVINDY_BNDS +! Implement Alan's fix for nearby zeros in DVCHECK +! ST 05/14/05 +! Change distribution file to constitute only one F90 module +! Miscellaneous cosmetic changes +! Replace several decimal values for numbers by named variables +! Delete unused LINPACK function DNRM2 +! ST 05/15/05 +! Determine the working precision and define UMAX for MA28 +! accordingly in SET_OPTS +! Miscellaneous cosmetic changes +! Convert case in MA28 +! Modify single precision demo programs to call DVODE +! than SVODE +! ST 05/16/05 +! Define arithmetic constants using _WP extension +! ST 06/03/05 +! Change name of NULL to MYNULL in MA28 routines +! Delete unnecessary commented out statements +! ST 06/04/05 +! Remove ACOR, SAVF, EWT from the RWORK array +! Eliminate LACOR, LSAVF, LEWT pointers +! ST 06/05/05 +! Remove WM from the RWORK array +! Use WM1, WM2 instead of WM(1), WM(2) +! ST 06/06/05 +! Make changes necessary to allow the maximum order to be +! reduced at output points (save YH, WM and restore) +! ST 06/07/05 +! Make cosmetic changes +! ST 06/08/05 +! Make cosmetic changes +! Add directions for building LAPACK-based and MA48-based +! versions +! ST 06/11/05 +! Standardize declaration and polish with nag +! Cosmetic changes +! ST 06/12/05 +! Cosmetic changes +! ST 06/13/05 +! Replace USE_LINPACK flag by USE_LAPACK flag +! Remove check on sizes of ISTAT and RSTAT arrays in GET_STATS +! Add explicit declarations of public and private subroutines +! and functions +! Add the user callable subroutines to the generic interfaces +! block +! ST 06/14/05 +! Cosmetic changes +! ST 07/21/05 +! Implement option to increase the elbow room in the MA28 +! work arrays +! Implement option to invoke MC19 sparse scaling of numerical +! Jacobians +! Clean up the 2d diurnal kinetics program +! ST 07/25/05 +! Implement MA28_EPS threshold option for numerical singularity +! ST 07/26/05 +! Implement MA28_MESSAGES option to control MA28 printing +! ST 07/30/05 +! Implement sparsity recalculation if a structurally singular +! matrix is encountered +! ST 07/31/05 +! Implement MA28_RPS option to force calculation of a new pivot +! sequence if MA28BD encounters a singular matrix +! ST 08/02/05 +! Add SET_NORMAL_OPTS for normal usage +! Add SET_OPTS_2 for changing HMAX, HMIN, and MXSTEP +! ST 08/03/05 +! Change how swag storage increment for sparse Jacobians determined +! Change length of ISTATS to 31 +! Change sparse statistics available in GET_STATS +! ST 08/10/05 +! Add SUB_DIAGONALS and SUP_DIAGONALS option for banded systems +! Add BGROUP to compute sparse column grouping for banded +! systems with known diagonals and verify it's results +! match those of DGROUP +! ST 08/11/05 +! Add subroutine BANDED_IAJA to compute the sparse matrix +! descriptor arrays for a banded matrix with known +! diagonals +! Make miscellaneous cosmetic changes +! ST 08/12/05 +! One at a time column version of BANDED_IAJA +! Modify DVJAC to handle banded systems using diagonal information +! ST 08/16/05 +! Work on documentation for SET_OPTS +! ST 08/17/05 +! Use DTEMP rather than SAVF in DVPREPS +! ST 08/18/05 +! Add CONSTANT_JACOBIAN option +! ST 08/23/05 +! Add SET_INTERMEDIATE_OPTS and work on the documentation for each +! of SET_NORMAL_OPTS, SET_INTERMEDIATE_OPTS, and SET_OPTS +! ST 08/24/05 +! Add warning if default error tolerances used +! Work on OPTs documentation +! ST 08/25/05 +! Make miscellaneous cosmetic changes +! ST 08/28/05 +! Work on the MA48 based sparse solution option +! ST 08/29/05 +! Fixes in the MA48 version +! ST 09/05/05 +! Put back the IMPLICIT NONE statements that nag tools deleted +! ST 09/06/05 +! Do not allow default error tolerances to be used +! ST 09/08/05 +! Add the USE_FAST_FACTOR flag in MA48 version +! ST 09/11/05 +! Add debug prints +! ST 09/14/05 +! Delete the first two slots in the WM Jacobian array +! ST 09/18/05 +! Incorporate option to use JACSP for sparse Jacobians +! ST 09-19-05 +! Change name of DSM to DVDSM +! Do not allow NEQ to be reduced +! Add flags for consecutive error test failures and for consecutive +! corrector failures. +! ST 09-20-05 +! Modify JACSP to produce JACSPDV +! Incorporate option to use JACSPDV for dense Jacobians +! Incorporate option to use JACSPDV for banded Jacobians +! Incorporate option to use JACSPDV for sparse Jacobians +! ST 09-21-05 +! Supply YSCALE to JACSPDV +! ST 10-04-05 +! Make a few changes to introductory comments +! ST 12-05 +! Add subroutine interfaces +! Change some MA28 work arrays to two-dimensional and reference +! differently to satisfy the Salford ftn95 compiler +! Modify demo programs to make Salford happy +! ST 01-01-08 +! Fixed bug found by Richard Cox (restore NZ when sparse +! analytic Jacobian used) +! End DVODE_F90 Conversion Change Record +!_______________________________________________________________________ +! f77 REVISION HISTORY(YYYYMMDD) +! 19890615 Date Written. Initial release. +! 19890922 Added interrupt/restart ability, minor changes throughout. +! 19910228 Minor revisions in line format, prologue, etc. +! 19920227 Modifications by D. Pang: +! (1) Applied subgennam to get generic intrinsic names. +! (2) Changed intrinsic names to generic in comments. +! (3) Added *DECK lines before each routine. +! 19920721 Names of routines and labelled common blocks changed, so as +! to be unique in combined single/real(wp) code (ACH) +! 19920722 Minor revisions to prologue (ACH). +! 19920831 Conversion to real(wp) done (ACH). +! 19921106 Fixed minor bug: ETAQ,ETAQM1 in DVSTEP SAVE statement (ACH) +! 19921118 Changed LUNSAV/MFLGSV to IXSAV(ACH). +! 19941222 Removed MF overwrite; attached sign to H in est. second +! deriv. in DVHIN; misc. comment changes throughout (ACH). +! 19970515 Minor corrections to comments in prologue, DVJAC(ACH). +! 19981111 Corrected Block B by adding final line, GOTO 200 (ACH). +! 20020430 Various upgrades (ACH): Use ODEPACK error handler package. +! Replaced D1MACH by DUMACH. Various changes to main +! prologue and other routine prologues. diff --git a/python/03_henon_heiles/vode/dvode_f90_m.f90 b/python/03_henon_heiles/vode/dvode_f90_m.f90 new file mode 100644 index 0000000..b88146b --- /dev/null +++ b/python/03_henon_heiles/vode/dvode_f90_m.f90 @@ -0,0 +1,18960 @@ +! If your usage of this software requires a license, the following +! BSD-like license is applicable. If it is not appropriate for your +! usage, please contact the authors. +! +! Copyright (c) 2013 +! G.D. Byrne Illinois Institute of Technology, Chicago, ILL +! S. Thompson, Radford University, Radford, VA +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! Redistributions of source code must retain the above copyright +! notice, this list of conditions, and the following disclaimer. +! +! Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer +! in the documentation and/or other materials provided with the +! distribution. +! +! Neither the name of the organizations nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. + + MODULE DVODE_F90_M + +! This version is the August 2013 release. +! Last change: 08/23/13 +! _____________________________________________________________________ +! Working Precision + IMPLICIT NONE +! Define the working precision for DVODE_F90. Change D0 to E0 in the +! next statement to convert to single precision. + INTEGER, PARAMETER, PRIVATE :: WP = KIND(1.0D0) +! ______________________________________________________________________ +! Overview + +! The f77 ordinary differential equation solver VODE.f is applicable to +! nonstiff systems of odes and to stiff systems having dense or banded +! Jacobians. DVODE_F90 is a Fortran 90 extension of VODE.f. While +! retaining all of the features available in VODE.f, we have +! incorporated several new options in DVODE_F90 including: +! 1. the ability to solve stiff systems with sparse Jacobians +! 2. internal management of storage and work arrays +! 3. specification of options via optional keywords +! 4. the ability to perform root finding or "event detection" +! 5. various new diagnostic and warning messages +! 6. the ability to impose solution bounds +! 7. several specialized options for dealing with sparsity +! ______________________________________________________________________ +! Version Information + +! This is DVODE_F90, the double precision FORTRAN 90 extension of the +! f77 DVODE.f ordinary differential equation solver. This version uses +! MA28 for sparse Jacobians. This file and related information can be +! obtained at the following support page: +! +! http://www.radford.edu/~thompson/vodef90web/ +! +! We are indebted to Richard Cox (ORNL) for providing us with his +! implementation of MA28 in LSOD28.f (a variant of Alan Hindmarsh's +! lsodes.f). We are indebted to Alan Hindmarsh for numerous contributions. +! In particular, we borrowed liberally from the f77 solvers VODE.f, +! LSODAR.f, and LSODES.f while developing DVODE_F90. We are indebted +! to Doug Salane for providing us with his JACSP Jacobian routines. +! +! If you find a bug or encounter a problem with DVODE_F90, please +! contact one of us: +! G.D. Byrne (gbyrne@wi.rr.com) +! S. Thompson (thompson@radford.edu) +! A set of quick start instructions is provided below. +! ______________________________________________________________________ +! Note on F90/F95 Compilers + +! To date we have used DVODE_F90 successfully with all F90/F95 compilers +! to which we have access. In particular, we have used it with the Lahey +! F90 and Lahey-Fujitsu F95 compilers, the Compaq Visual F90 compiler, +! the g90 compiler, and the INTEL, Portland, Salford, and SUN compilers. +! It should be noted that compilers such as Salford's FTN95 complain +! about uninitialized arrays passed as subroutine arguments and the use of +! slices of two dimensional arrays as one dimensional vectors, and will +! not run using the strictest compiler options. It is perfectly safe to +! use the /-CHECK compiler option to avoid these FTN95 runtime checks. +! DVODE_F90 does not use any variable for numerical purposes until it +! has been assigned an appropriate value. +! ______________________________________________________________________ +! Quick Start Instructions + +! (1) Compile this file. Then compile, link, and execute the program +! example1.f90. The output is written to the file example1.dat. +! Verify that the last line of the output is the string +! 'No errors occurred.' +! (2) Repeat this process for the program example2.f90. +! +! Other test programs you may wish to run to verify your installation +! of DVODE_F90 are: +! +! (3) Run the test programs nonstiffoptions.f90 and stiffoptions.f90 +! and verify that the last line in the output files produced is +! 'No errors occurred.' They solve the problems in the Toronto +! test suites using several different error tolerances and various +! solution options. Note that stiffoptions.f90 takes several +! minutes to run because it performs several thousand separate +! integrations. +! (4) Locate the file robertson.f90 in the demo programs and look at +! how options are set using SET_OPTS, how DVODE_F90 is called to +! obtain the solution at desired output times, and how the +! derivative and Jacobian routines are supplied. Note too the +! manner in which the solution is constrained to be nonnegative. +! (5) Locate demoharmonic.f90 and look at how root finding options +! are set and how the event residual routine is supplied to +! DVODE_F90. +! (6) The other demo programs available from the DVODE_F90 support +! page illustrate various other solution options available in +! DVODE_F90. The demo programs may be obtained from +! +! http://www.radford.edu/~thompson/vodef90web/index.html +! ______________________________________________________________________ +! DVODE_F90 Full Documentation Prologue + +! Section 1. Setting Options in DVODE_F90 +! Section 2. Calling DVODE_F90 +! Section 3. Choosing Error Tolerances +! Section 4. Choosing the Method of Integration +! Section 5. Interpolation of the Solution and Derivatives +! Section 6. Handling Events (Root Finding) +! Section 7. Gathering Integration Statistics +! Section 8. Determining Jacobian Sparsity Structure Arrays +! Section 9. Original DVODE Documentation Prologue +! Section 10. Example Usage + +! Note: Search on the string 'Section' to locate these sections. You +! may wish to refer to the support page which has the sections broken +! into smaller pieces. +! ______________________________________________________________________ +! Section 1. Setting Options in DVODE_F90 +! +! You can use any of three options routines: +! +! SET_NORMAL_OPTS +! SET_INTERMEDIATE_OPTS +! SET_OPTS + +! OPTIONS = SET_NORMAL_OPTS(DENSE_J, BANDED_J, SPARSE_J, & +! USER_SUPPLIED_JACOBIAN, LOWER_BANDWIDTH, UPPER_BANDWIDTH, & +! RELERR, ABSERR, ABSERR_VECTOR, NEVENTS) + +! OPTIONS = SET_INTERMEDIATE_OPTS(DENSE_J, BANDED_J, SPARSE_J, & +! USER_SUPPLIED_JACOBIAN,LOWER_BANDWIDTH, UPPER_BANDWIDTH, & +! RELERR, ABSERR, ABSERR_VECTOR,TCRIT, H0, HMAX, HMIN, MAXORD, & +! MXSTEP, MXHNIL, NZSWAG, USER_SUPPLIED_SPARSITY, MA28_RPS, & +! NEVENTS, CONSTRAINED, CLOWER, CUPPER, CHANGE_ONLY_f77_OPTIONS) & + +! OPTIONS = SET_OPTS(METHOD_FLAG, DENSE_J, BANDED_J, SPARSE_J, & +! USER_SUPPLIED_JACOBIAN, SAVE_JACOBIAN, CONSTANT_JACOBIAN, & +! LOWER_BANDWIDTH, UPPER_BANDWIDTH, SUB_DIAGONALS, SUP_DIAGONALS, & +! RELERR, RELERR_VECTOR, ABSERR, ABSERR_VECTOR, TCRIT, H0, HMAX, & +! HMIN, MAXORD, MXSTEP, MXHNIL, YMAGWARN, SETH, UPIVOT, NZSWAG, & +! USER_SUPPLIED_SPARSITY, NEVENTS, CONSTRAINED, CLOWER, CUPPER, & +! MA28_ELBOW_ROOM, MC19_SCALING, MA28_MESSAGES, MA28_EPS, & +! MA28_RPS, CHANGE_ONLY_f77_OPTIONS, JACOBIAN_BY_JACSP) + +! Please refer to the documentation prologue for each of these functions +! to see what options may be used with each. Note that input to each is +! via keyword and all variables except error tolerances are optional. +! Defaults are used for unspecified options. If an option is available +! in SET_NORMAL OPTS, it is available and has the same meaning in +! SET_INTERMEDIATE_OPTS and SET_OPTS. Similarly, if an option is available +! in SET_INTERMEDIATE_OPTS, it is available and has the same meaning in +! SET_OPTS. + +! The first two functions are provided merely for convenience. +! SET_NORMAL_OPTS is available simply to relieve you of reading the +! documentation for SET_OPTS and to use default values for all but +! the most common options. SET_INTERMEDIATE_OPTS is available to allow +! you more control of the integration while still using default values +! for less commonly used options. SET_OPTS allows you to specify any +! of the options available in DVODE_F90. + +! Roughly, SET_NORMAL_OPTS is intended to provide for dense, banded, +! and numerical sparse Jacobians without the need to specify other +! specialized options. SET_INTERMEDIATE_OPTIONS is intended to allow +! more general sparse Jacobian options. SET_OPTS is intended to provide +! access to all options in DVODE_F90. + +! Please note that SET_INTERMEDIATE_OPTS can be invoked using the same +! arguments as SET_NORMAL_OPTS; and SET_OPTS can be invoked using the +! same arguments as either SET_NORMAL_OPTS or SET_INTERMEDIATE_OPTS. +! If you wish you can simply delete SET_NORMAL_OPTS as well as +! SET_INTERMEDIATE_OPTS and use only SET_OPTS for all problems. If you +! do so, you need only include the options that you wish to use when +! you invoke SET_OPTIONS. + +! In the following description any reference to SET_OPTS applies equally +! to SET_NORMAL_OPTS and SET_INTERMEDIATE OPTS. + +! Before calling DVODE_F90 for the first time, SET_OPTS must be invoked. +! Typically, SET_OPTS is called once to set the desired integration +! options and parameters. DVODE_F90 is then called in an output loop to +! obtain the solution for the desired times. A detailed description of +! the DVODE_F90 arguments is given in a section below. Detailed descriptions +! of the options available via SET_OPTS are given in the documentation prologue. +! Although each option available in the f77 version of DVODE as well as +! several additional ones are available in DVODE_F90 via SET_OPTS, +! several of the available options are not relevant for most problems +! and need not be specified. Refer to the accompanying demonstration +! programs for specific examples of each usage. Note that after any call +! to DVODE_F90, you may call GET_STATS to gather relevant integration +! statistics. After your problem has completed, you may call +! RELEASE_ARRAYS to deallocate any internal arrays allocated by +! DVODE_F90 and to determine how much storage was used by DVODE_F90. +! +! To communicate with DVODE_F90 you will need to include the following +! statement in your calling program: +! USE DVODE_F90_M +! and include the following statement in your type declarations section: +! TYPE(VODE_OPTS) :: OPTIONS +! Below are brief summaries of typical uses of SET_OPTS. +! Nonstiff Problems: +! OPTIONS = SET_OPTS(RELERR=RTOL,ABSERR=ATOL) +! The above use of SET_OPTS will integrate your system of odes +! using the nonstiff Adams methods while using a relative error +! tolerance of RTOL and an absolute error tolerance of ATOL. +! Your subsequent call to DVODE_F90 might look like: +! CALL DVODE_F90(F,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS) +! OPTIONS = SET_OPTS(RELERR=RTOL,ABSERR=ATOL,NEVENTS=NG) +! If you wish to do root finding, SET_OPTS can be used as above. +! Here, NEVENTS is the desired number of root finding functions. +! Your subsequent call to DVODE_F90 might look like: +! CALL DVODE_F90(F,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS,G_FCN=G) +! Here F is the name of your derivative subroutine and G is the +! name of your subroutine to evaluate residuals of the root +! finding functions. +! OPTIONS = SET_OPTS(RELERR=RTOL,ABSERR_VECTOR=ATOL) +! This use of SET_OPTS indicates that a scalar relative error +! tolerance and a vector of absolute error tolerances will be +! used. +! Stiff Problems, internally generated dense Jacobian: +! OPTIONS = SET_OPTS(DENSE_J=.TRUE.,RELERR=RTOL,ABSERR=ATOL) +! This use of DENSE_J=.TRUE. indicates that DVODE_F90 will +! use the stiffly stable BDF methods and will approximate +! the Jacobian, considered to be a dense matrix, using +! finite differences. Your subsequent call to DVODE_F90 +! might look like: +! CALL DVODE_F90(F,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS) +! OPTIONS = SET_OPTS(DENSE_J=.TRUE.,ABSERR=ATOL,RELERR=RTOL, & +! USER_SUPPLIED_JACOBIAN=.TRUE.) +! If you know the Jacobian and wish to supply subroutine JAC +! as described in the documentation for DVODE_F90, the options +! call could look like the above. +! Your subsequent call to DVODE_F90 might look like: +! CALL DVODE_F90(F1,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS,J_FCN=JAC) +! Here, JAC is the name of the subroutine that you provide to +! evaluate the known Jacobian. +! Stiff Problems, internally generated banded Jacobian: +! OPTIONS = SET_OPTS(BANDED_J=.TRUE.,RELERR=RTOL,ABSERR=ATOL, & +! LOWER_BANDWIDTH=ML,UPPER_BANDWIDTH=MU) +! This use of BANDED_J=.TRUE. indicates that DVODE_F90 will +! use the stiffly stable BDF methods and will approximate the +! Jacobian, considered to be a banded matrix, using finite +! differences. Here ML is the lower bandwidth of the Jacobian +! and ML is the upper bandwidth of the Jacobian. +! Stiff Problems, internally generated sparse Jacobian: +! OPTIONS = SET_OPTS(SPARSE_J=.TRUE.,ABSERR=ATOL,RELERR=RTOL) +! This use of SET_OPTS indicates that the Jacobian is a sparse +! matrix. Its structure will be approximated internally by +! making calls to your derivative routine. If you know the +! structure before hand, you may provide it directly in a +! variety of ways as described in the documentation prologue +! for SET_OPTS. In addition, several other options related +! to sparsity are available. +! More complicated common usage: +! Suppose you need to solve a stiff problem with a sparse Jacobian. +! After some time, the structure of the Jacobian changes and you +! wish to have DVODE_F90 recalculate the structure before continuing +! the integration. Suppose that initially you want to use an absolute +! error tolerance of 1.0D-5 and that when the Jacobian structure is +! changed you wish to reduce the error tolerance 1.0D-7. Your calls +! might look like this. +! RTOL = ... +! ATOL = 1.0D-5 +! OPTIONS = SET_OPTS(SPARSE_J=.TRUE.,ABSERR=ATOL,RELERR=RTOL) +! Output loop: +! CALL DVODE_F90(FCN,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS) +! At desired time: +! ISTATE = 3 +! ATOL = 1.0D-7 +! OPTIONS = SET_OPTS(SPARSE_J=.TRUE.,ABSERR=ATOL,RELERR=RTOL) +! End of output loop + +! In the following we have summarized and described how some of the demonstration +! programs set options and call DVODE_F90. In each case the necessary parameters +! are defined before invoking SET_OPTS. The call to DVODE_F90 is in a loop in +! which the output time is successively updated. The actual programs are available +! from the web support page +! +! http://www.radford. edu/~thompson/vodef90web/index.html/ +! +! Problem Summary +! +! Problem NEQ Jacobian Features Illustrated +! +! Prologue Example 1 3 Dense Basic +! +! Prologue Example 2 3 Dense Root finding +! +! Robertson 3 Dense Solution bounds +! +! Harmonic Oscillator 4 Nonstiff Root finding +! +! Flow Equations 5-1800 Sparse Automatic determination +! of sparsity arrays +! +! Diurnal Kinetics 50-5000 Sparse or banded Sparsity options +! +! Options Used and DVODE_F90 Call +! +! Prologue Example 1 +! +! OPTIONS = SET_NORMAL_OPTS(DENSE_J=.TRUE., ABSERR_VECTOR=ATOL, RELERR=RTOL, & +! USER_SUPPLIED_JACOBIAN=.TRUE.) +! CALL DVODE_F90(FEX,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS,J_FCN=JEX) +! +! The problem consists of a stiff system of NEQ=3 equations. The dense +! Jacobian option (DENSE_J) is used. A vector ATOL(*) of error tolerances +! is used. A scalar relative error tolerance RTOL is used. Subroutine JEX +! is provided to evaluate the analytical Jacobian. If the last argument +! J_FCN=JEX is omitted (as in Example 2), a numerical Jacobian will +! be used. +! +! Prologue Example 2 +! +! OPTIONS = SET_NORMAL_OPTS(DENSE_J=.TRUE., RELERR=RTOL, ABSERR_VECTOR=ATOL, & +! NEVENTS=NG) +! CALL DVODE_F90(FEX,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS,G_FCN=GEX) +! +! The system in Example 1 is used to illustrate root finding. It is +! desired to locate the times at which two of the solution components +! attain prescribed values. NEVENTS=2 informs the solver that two such +! functions are used. Subroutine GEX is used to calculate the residuals +! for these two functions. A dense numerical Jacobian is used. +! +! Robertson Problem +! +! OPTIONS = SET_INTERMEDIATE_OPTS(DENSE_J=.TRUE., RELERR_VECTOR=RTOL, & +! ABSERR_VECTOR=ABSERR_TOLERANCES, CONSTRAINED=BOUNDED_COMPONENTS, & +! CLOWER=LOWER_BOUNDS, CUPPER=UPPER_BOUNDS) +! +! CALL DVODE_F90(DERIVS,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS,J_FCN=JACD) +! The system used in Examples 1 and 2 is solved over a much larger +! time interval. The solution is constrained to be nonegative. This +! is done by prescribing the components to be constrained (BOUNDED_COMPONENTS). +! Artificially large values are used to impose upper bounds (UPPER_BOUNDS) +! and lower bounds of zero are used to force a nonnegative solution. +! +! Harmonic Oscillator Problem +! +! OPTIONS = SET_NORMAL_OPTS(RELERR=RTOL, ABSERR=ATOL, NEVENTS=NG) +! CALL DVODE_F90(DERIVS,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS,G_FCN=GEVENTS) +! +! A nonstiff system of NEQ=4 equations is solved. The nonstiff option is +! used because neither DENSE_ nor BANDED_J nor SPARSE_J is present. It is +! desired to find the times at which Y(2) or Y(3) is equal to 0. Residuals +! for the two corresponding event functions are calculated in subroutine +! GEVENTS. +! +! Flow Equations Problem +! +! OPTIONS = SET_OPTS(SPARSE_J=SPARSE, ABSERR=ATOL(1), RELERR=RTOL(1), & +! MXSTEP=100000, NZSWAG=20000) +! CALL DVODE_F90(FCN,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS) +! +! This is a stiff system of equations resulting a method of lines +! discretization. The Jacobian is sparse. Scalar absolute and relative +! error tolerances are used. The Jacobian structure and a numerical +! Jacobian are used. The solver is limited to a maximum of MXSTEP steps. +! NZSWAG is the amount by which allocated array sizes will be increased. +! The accompanying test program may be used to illutrate several other +! solution options. +! +! Diurnal Kinetics Problem +! +! OPTIONS = SET_OPTS(SPARSE_J=SPARSE, BANDED_J=BANDED, DENSE_J=DENSE, & +! ABSERR_VECTOR=ATOL(1:NEQ), RELERR=RTOL(1), MXSTEP=100000, & +! NZSWAG=50000, HMAX=MAXH, LOWER_BANDWIDTH=ML, UPPER_BANDWIDTH=MU, & +! MA28_ELBOW_ROOM=10, MC19_SCALING=.TRUE., MA28_MESSAGES=.FALSE., & +! MA28_EPS=1.0D-4, MA28_RPS=.TRUE., & +! USER_SUPPLIED_SPARSITY=SUPPLY_STRUCTURE) +! CALL USERSETS_IAJA(IA, IADIM, JA, JADIM) +! CALL DVODE_F90(FCN, NEQ, Y, T, TOUT, ITASK, ISTATE, OPTIONS) +! +! This problem can be used to illustrate most solution options. Here, dense, +! banded, or sparse Jacobians are used depending on the values of the first +! three parameters. A vector error tolerance is used and a scalar relative +! error tolerance is used. If a banded solution is desired, it is necessary +! to supply the bandwidths ML and MU. If a sparse solution is desired, +! several special options are used. The most important one is MA28_RPS to +! force the solver to update the partial pivoting sequence when singular +! iteration matrices are encountered. The sparsity pattern is determined +! numerically if SUPPLY_STRUCTURE is FALSE. Otherwise the user will supply +! the pattern by calling subroutine USERSETS_IAJA. +! ______________________________________________________________________ +! Section 2. Calling DVODE_F90 +! +! DVODE_F90 solves the initial value problem for stiff or nonstiff +! systems of first order ODEs, +! dy/dt = f(t,y), or, in component form, +! dy(i)/dt = f(i) = f(i,t,y(1),y(2),...,y(NEQ)) (i = 1,...,NEQ). +! DVODE_F90 is a package based on the EPISODE and EPISODEB packages, +! and on the ODEPACK user interface standard. It was developed from +! the f77 solver DVODE developed by Brown, Byrne, and Hindmarsh. +! DVODE_F90 also provides for the solution of sparse systems in a +! fashion similar to LSODES and LSOD28. Currently, MA28 is used +! to perform the necessary sparse linear algebra. DVODE_F90 also +! contains the provision to do root finding in a fashion similar +! to the LSODAR solver from ODEPACK. + +! Communication between the user and the DVODE_F90 package, for normal +! situations, is summarized here. This summary describes only a subset +! of the full set of options available. See the full description for +! details, including optional communication, nonstandard options, and +! instructions for special situations. +! CALL DVODE_F90(F,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS,J_FCN=JAC,G_FCN=GEX) +! The arguments in the call list to DVODE_F90 have the following +! meanings. +! F = The name of the user-supplied subroutine defining the +! ODE system. The system must be put in the first-order +! form dy/dt = f(t,y), where f is a vector-valued function +! of the scalar t and the vector y. Subroutine F is to +! compute the function f. It is to have the form +! SUBROUTINE F(NEQ,T,Y,YDOT) +! DOUBLE PRECISION T,Y(NEQ),YDOT(NEQ) +! where NEQ, T, and Y are input, and the array YDOT = f(t,y) +! is output. Y and YDOT are arrays of length NEQ. +! Subroutine F should not alter Y(1),...,Y(NEQ). +! If F (and JAC) are not contained in a module available to +! your calling program, you must declare F to be EXTERNAL +! in the calling program. +! NEQ = The size of the ODE system (number of first order +! ordinary differential equations). +! Y = A double precision array for the vector of dependent variables, +! of length NEQ or more. Used for both input and output on the +! first call (ISTATE = 1), and only for output on other calls. +! On the first call, Y must contain the vector of initial +! values. In the output, Y contains the computed solution +! evaluated at T. +! T = The independent variable. In the input, T is used only on +! the first call, as the initial point of the integration. +! In the output, after each call, T is the value at which a +! computed solution Y is evaluated (usually the same as TOUT). +! On an error return, T is the farthest point reached. +! TOUT = The next value of t at which a computed solution is desired. +! TOUT is Used only for input. When starting the problem +! (ISTATE = 1), TOUT may be equal to T for one call, then +! should not equal T for the next call. For the initial T, +! an input value of TOUT unequal to T is used in order to +! determine the direction of the integration (i.e. the +! algebraic sign of the step sizes) and the rough scale +! of the problem. Integration in either direction (forward +! or backward in t) is permitted. If ITASK = 2 or 5 (one-step +! modes), TOUT is ignored after the first call (i.e. the +! first call with TOUT \= T). Otherwise, TOUT is required +! on every call. If ITASK = 1, 3, or 4, the values of TOUT +! need not be monotone, but a value of TOUT which backs up +! is limited to the current internal t interval, whose +! endpoints are TCUR - HU and TCUR. (Refer to the description +! of GET_STATS for a description of TCUR and HU.) +! ITASK = An index specifying the task to be performed. +! Input only. ITASK has the following values and meanings. +! 1 means normal computation of output values of y(t) at +! t = TOUT (by overshooting and interpolating). +! 2 means take one step only and return. +! 3 means stop at the first internal mesh point at or +! beyond t = TOUT and return. +! 4 means normal computation of output values of y(t) at +! t = TOUT but without overshooting t = TCRIT. +! TCRIT must be specified in your SET_OPTS call. TCRIT +! may be equal to or beyond TOUT, but not behind it in +! the direction of integration. This option is useful +! if the problem has a singularity at or beyond t = TCRIT. +! 5 means take one step, without passing TCRIT, and return. +! TCRIT must be specified in your SET_OPTS call. +! If ITASK = 4 or 5 and the solver reaches TCRIT (within +! roundoff), it will return T = TCRIT(exactly) to indicate +! this (unless ITASK = 4 and TOUT comes before TCRIT, in +! which case answers at T = TOUT are returned first). +! ISTATE = an index used for input and output to specify the +! the state of the calculation. +! In the input, the values of ISTATE are as follows. +! 1 means this is the first call for the problem +! (initializations will be done). See note below. +! 2 means this is not the first call, and the calculation +! is to continue normally, with no change in any input +! parameters except possibly TOUT and ITASK. +! 3 means this is not the first call, and the +! calculation is to continue normally, but with +! a change in input parameters other than +! TOUT and ITASK. Desired changes require SET_OPTS +! be called prior to calling DVODE_F90 again. +! A preliminary call with TOUT = T is not counted as a +! first call here, as no initialization or checking of +! input is done. (Such a call is sometimes useful to +! include the initial conditions in the output.) +! Thus the first call for which TOUT is unequal to T +! requires ISTATE = 1 in the input. +! In the output, ISTATE has the following values and meanings. +! 1 means nothing was done, as TOUT was equal to T with +! ISTATE = 1 in the input. +! 2 means the integration was performed successfully. +! 3 means a root of one of your root finding functions +! has been located. +! A negative value of ISTATE indicates that DVODE_F90 +! encountered an error as described in the printed error +! message. Since the normal output value of ISTATE is 2, +! it does not need to be reset for normal continuation. +! Also, since a negative input value of ISTATE will be +! regarded as illegal, a negative output value requires +! the user to change it, and possibly other input, before +! calling the solver again. +! OPTIONS = The options structure produced by your call to SET_OPTS. +! JAC = The name of the user-supplied routine (MITER = 1 or 4 or 6) +! If you do not specify that a stiff method is to be used +! in your call to SET_OPTS, you need not include JAC in +! your call to DVODE_F90. If you specify a stiff method and +! that a user supplied Jacobian will be supplied, JAC must +! compute the Jacobian matrix, df/dy, as a function of the +! scalar t and the vector y. It is to have the form: +! SUBROUTINE JAC(NEQ, T, Y, ML, MU, PD, NROWPD) +! DOUBLE PRECISION T, Y(NEQ), PD(NROWPD,NEQ) +! where NEQ, T, Y, ML, MU, and NROWPD are input and the array +! PD is to be loaded with partial derivatives (elements of the +! Jacobian matrix) in the output. PD must be given a first +! dimension of NROWPD. T and Y have the same meaning as in +! Subroutine F. +! In the full matrix case (MITER = 1), ML and MU are +! ignored, and the Jacobian is to be loaded into PD in +! columnwise manner, with df(i)/dy(j) loaded into PD(i,j). +! In the band matrix case (MITER = 4), the elements +! within the band are to be loaded into PD in columnwise +! manner, with diagonal lines of df/dy loaded into the rows +! of PD. Thus df(i)/dy(j) is to be loaded into PD(i-j+MU+1,j). +! ML and MU are the half-bandwidth parameters. (See IUSER). +! The locations in PD in the two triangular areas which +! correspond to nonexistent matrix elements can be ignored +! or loaded arbitrarily, as they are overwritten by DVODE_F90. +! In the sparse matrix case the elements of the matrix +! are determined by the sparsity structure given by the +! IA and JA pointer arrays. Refer to the documentation +! prologue for SET_OPTS for a description of the arguments +! for JAC since they differ from the dense and banded cases. +! JAC need not provide df/dy exactly. A crude +! approximation (possibly with a smaller bandwidth) will do. +! In either case, PD is preset to zero by the solver, +! so that only the nonzero elements need be loaded by JAC. +! In the sparse matrix case, JAC has a different form: +! SUBROUTINE JAC (N, T, Y, IA, JA, NZ, PD) +! Given the number of odes N, the current time T, and the +! current solution vector Y, JAC must do the following: +! If NZ = 0 on input: +! Replace NZ by the number of nonzero elements in the +! Jacobian. The diagonal of the Jacobian must be included. +! Do NOT define the arrays IA, JA, PD at this time. +! Once JAC has been called with NZ = 0 and you have +! defined the value of NZ, future calls to JAC will use +! this value of NZ. +! When a call is made with NZ unequal to 0, you must +! define the sparsity structure arrays IA and JA, and +! the sparse Jacobian PD. +! IA defines the number of nonzeros including the +! diagonal in each column of the Jacobian. Define +! IA(1) = 1 and for J = 1,..., N, +! IA(J+1) = IA(J) + number of nonzeros in column J. +! Diagonal elements must be included even if they are +! zero. You should check to ensure that IA(N+1)-1 = NZ. +! JA defines the rows in which the nonzeros occur. +! For I = 1,...,NZ, JA(I) is the row in which the Ith +! element of the Jacobian occurs. JA must also include +! the diagonal elements of the Jacobian. +! PD defines the numerical value of the Jacobian +! elements. For I = 1,...,NZ, PD(I) is the numerical +! value of the Ith element in the Jacobian. PD must +! also include the diagonal elements of the Jacobian. +! GFUN = the name of the subroutine to evaluate the residuals for +! event functions. If you do not specify that events are +! present (by specifying NEVENTS > 0 in SET_OPTS), you +! need not include GFUN in your call list for DVODE_F90. +! If GFUN is not contained in a module available to your +! calling program, you must declare GFUN to be EXTERNAL +! in your calling program. +! To continue the integration after a successful return, simply +! reset TOUT and call DVODE_F90 again. No other parameters need +! be reset unless ISTATE=3 in which case, reset it to 2 before +! calling DVODE_F90 again. +! ______________________________________________________________________ +! Section 3. Choosing Error Tolerances +! +! This is the most important aspect of solving odes numerically. +! You may supply any of four keywords and values. If you wish to +! use scalar error tolerances, you should supply ABSERR and RELERR. +! For a good many problems, it is advisable to supply a vector of +! absolute error tolerances ABSERR_VECTOR = desired vector. This +! allows you to use different tolerances for each component of +! the solution. If ABSERR_VECTOR is supplied, it must be a vector +! of length NEQ where NEQ is the number of odes in your system. +! Similarly, you may supply a vector of relative error tolerances, +! RELERR_VECTOR. If no tolerances are specified, DVODE_F90 will use +! default error tolerances ABSERR=1D-6 and RELERR=1D-4; but it is +! strongly recommended that you supply values that are appropriate +! for your problem. In the event you do not supply error tolerances, +! DVODE_F90 will print a reminder that the default error tolerances +! are not appropriate for all problems. +! +! RELERR can be set to a scalar value as follows. +! Determine the number of significant digits of accuracy desired, +! which will be a positive integer, say, N. +! Then set RELERR = 10**-(N+1). +! The authors recommend that RELERR be no larger than 10**-4. +! The authors recommend a vector valued absolute error tolerance, +! which can be set as follows. +! For the I-th component of the solution vector, Y(I), determine +! the positive number FLOOR(I) at which ABS(Y(I)) becomes +! negligible for the problem at hand. FLOOR(I) is sometimes called +! the problem zero or the floor value for the I-th component and is +! problem dependent. For a given problem that is not scaled, these +! floor values may well vary by up to 9 orders of magnitude. +! Set ABSERR(I) = FLOOR(I) or to be conservative +! ABSERR_VECTOR(I) = 0.1*FLOOR(I). There is no variable FLOOR in +! DVODE_F90. If it is difficult to divine the components of ABSERR, +! (or FLOOR) make a reasonable guess, run the problem, then set that +! ABSERR_VECTOR so for I = 1, 2,...NEQ, +! ABSERR_VECTOR(I) = 1D-6*RELERR*MAX{ABS(Y(I,TOUT): for all TOUT}. +! The correct choices for RELERR and ABSERR can and do have +! significant impact on both the quality of the solution and run +! time. Counter intuitively, error tolerances that are too loose +! can and do increase run time significantly and the quality of +! the solution may be seriously compromised. +! Examples: +! 1. OPTIONS = SET_OPTS(DENSE_J=.TRUE., ABSERR=1D-8,RELERR=1D-8) +! This will yield MF = 22. Both the relative error tolerance +! and the absolute error tolerance will equal 1D-8. +! 2. OPTIONS = SET_OPTS(DENSE_J=.TRUE.,RELERR=1D-5, & +! ABSERR_VECTOR=(/1D-6,1D-8/)) +! For a system with NEQ=2 odes, this will yield MF = 22. A scalar +! relative error tolerance equal to 1D-5 will be used. Component 1 +! of the solution will use an absolute error tolerance of 1D-6 +! while component 2 will use an absolute error tolerance of 1D-8. +! ______________________________________________________________________ +! Section 4. Choosing the Method of Integration +! +! If you wish to specify a numerical value for METHOD_FLAG, it can equal +! any of the legal values of MF for DVODE or LSODES. (See below.) If +! you do not wish to specify a numerical value for METHOD_FLAG, you +! may specify any combination of the five logical keywords DENSE_J, +! BANDED_J, SPARSE_J, USER_SUPPLIED_JACOBIAN, SAVE_JACOBIAN that you +! wish. Appropriate values will be used in DVODE_F90 for any variables +! that are not present. The first three flags indicate the type of +! Jacobian, dense, banded, or sparse. If USER_SUPPLIED_JACOBIAN=.TRUE., +! the Jacobian will be determined using the subroutine JAC you supply +! in your call to DVODE_F90. Otherwise, an internal Jacobian will be +! generated using finite differences. +! Examples: +! 1. OPTIONS = SET_OPTS(METHOD_FLAG=22,...) +! DVODE will use the value MF=22 as described in the documentation +! prologue. In this case, the stiff BDF methods will be used with +! a dense, internally generated Jacobian. +! 2. OPTIONS = SET_OPTS(METHOD_FLAG=21,...) +! This is the same an Example 1 except that a user supplied dense +! Jacobian will be used and DVODE will use MF=21. +! 3. OPTIONS = SET_OPTS(DENSE_J=.TRUE.,...) +! This will yield MF = 22 as in Example 1, provided +! USER_SUPPLIED_JACOBIAN and SAVE_JACOBIAN are not present, or if +! present are set to their default +! values of .FALSE. and .TRUE., respectively. +! 4. OPTIONS = SET_OPTS(DENSE_J=.TRUE.,& +! USER_SUPPLIED_JACOBIAN=.TRUE.,...) +! This will yield MF = 21 as in Example 2, provided SAVE_JACOBIAN +! is not present, or if present is set to its default value .FALSE. +! Notes: +! 1. If you specify more than one of DENSE_J, BANDED_J, and SPARSE_J, +! DENSE_J takes precedence over BANDED_J which in turn takes +! precedence over SPARSE_J. +! 2. By default, DVODE_F90 saves a copy of the Jacobian and reuses the +! copy when necessary. For problems in which storage requirements +! are acute, you may wish to override this default and have +! DVODE_F90 recalculate the Jacobian rather than use a saved copy. +! You can do this by specifying SAVE_JACOBIAN=.FALSE. It is +! recommended that you not do this unless necessary since it can +! have a significant impact on the efficiency of DVODE_F90. (For +! example, when solving a linear problem only one evaluation of +! the Jacobian is required with the default option.) +! 3. If you choose BANDED_J = .TRUE. or if you supply a value of MF +! that corresponds to a banded Jacobian, you must also supply the +! lower bandwidth ML and the upper bandwidth of the Jacobian MU +! by including the keywords +! LOWER_BANDWIDTH = value of ML and UPPER_BANDWIDTH = value of M +! More on Method Selection +! The keyword options available in SET_OPTS are intended to replace +! the original method indicator flag MF. However, if you wish to +! retain the flexibility of the original solver, you may specify MF +! directly in your call to SET_OPTS. This is done by using the +! keyword METHOD_FLAG=MF in your SET_OPTS where MF has any of the +! values in the following description. Refer to the demonstration +! program demosp.f90 for an example in which this is done. +! MF = The method flag. Used only for input. The legal values of +! MF are: +! 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 26, +! 27, -11, -12, -14, -15, -21, -22, -24, -25, -26, -27. +! MF is a signed two-digit integer, MF = JSV*(10*METH+MITER). +! JSV = SIGN(MF) indicates the Jacobian-saving strategy: +! JSV = 1 means a copy of the Jacobian is saved for reuse +! in the corrector iteration algorithm. +! JSV = -1 means a copy of the Jacobian is not saved +! (valid only for MITER = 1, 2, 4, or 5). +! METH indicates the basic linear multistep method: +! METH = 1 means the implicit Adams method. +! METH = 2 means the method based on backward +! differentiation formulas (BDF-s). +! MITER indicates the corrector iteration method: +! MITER = 0 means functional iteration (no Jacobian matrix +! is involved). +! MITER = 1 means chord iteration with a user-supplied +! full (NEQ by NEQ) Jacobian. +! MITER = 2 means chord iteration with an internally +! generated (difference quotient) full Jacobian +! (using NEQ extra calls to F per df/dy value). +! MITER = 4 means chord iteration with a user-supplied +! banded Jacobian. +! MITER = 5 means chord iteration with an internally +! generated banded Jacobian (using ML+MU+1 extra +! calls to F per df/dy evaluation). +! MITER = 6 means chord iteration with a user-supplied +! sparse Jacobian. +! MITER = 7 means chord iteration with an internally +! generated sparse Jacobian +! If MITER = 1, 4, or 6 the user must supply a subroutine +! JAC(the name is arbitrary) as described above under JAC. +! For other values of MITER, JAC need not be provided. +! ______________________________________________________________________ +! Section 5. Interpolation of the Solution and Derivative +! +! Following a successful return from DVODE_F90, you may call +! subroutine DVINDY to interpolate the solution or derivative. +! SUBROUTINE DVINDY(T, K, DKY, IFLAG) +! DVINDY computes interpolated values of the K-th derivative of the +! dependent variable vector y, and stores it in DKY. This routine +! is called with K = 0 or K = 1 and T = TOUT. In either case, the +! results are returned in the array DKY of length at least NEQ which +! must be declared and dimensioned in your calling program. The +! computed values in DKY are obtained by interpolation using the +! Nordsieck history array. +! ______________________________________________________________________ +! Section 6. Handling Events (Root Finding) +! +! DVODE_F90 contains root finding provisions. It attempts to +! locates the roots of a set of functions +! g(i) = g(i,t,y(1),...,y(NEQ)) (i = 1,...,ng). +! To use root finding include NEVENTS=NG in your call to SET_OPTS +! where NG is the number of root finding functions. You must then +! supply subroutine GFUN in your call to DVODE_F90 using +! G_FCN=GFUN as the last argument. GFUN must has the form +! SUBROUTINE GFUN(NEQ, T, Y, NG, GOUT) +! where NEQ, T, Y, and NG are input, and the array GOUT is output. +! NEQ, T, and Y have the same meaning as in the F routine, and +! GOUT is an array of length NG. For i = 1,...,NG, this routine is +! to load into GOUT(i) the value at (T,Y) of the i-th constraint +! function g(i). DVODE_F90 will find roots of the g(i) of odd +! multiplicity (i.e. sign changes) as they occur during the +! integration. GFUN must be declared EXTERNAL in the calling +! program. Note that because of numerical errors in the functions +! g(i) due to roundoff and integration error, DVODE_F90 may return +! false roots, or return the same root at two or more nearly equal +! values of t. This is particularly true for problems in which the +! integration is restarted (ISTATE = 1) at a root. If such false +! roots are suspected, you should consider smaller error tolerances +! and/or higher precision in the evaluation of the g(i). Note +! further that if a root of some g(i) defines the end of the +! problem, the input to DVODE_F90 should nevertheless allow +! integration to a point slightly past that root, so that DVODE_F90 +! can locate the root by interpolation. Each time DVODE_F90 locates +! a root of one of your event functions it makes a return to the +! calling program with ISTATE = 3. When such a return is made and +! you have processed the results, simply change ISTATE = 2 and call +! DVODE_F90 again without making other changes. +! ______________________________________________________________________ +! Section 7. Gathering Integration Statistics +! +! SUBROUTINE GET_STATS(RSTATS, ISTATS, NUMEVENTS, JROOTS) +! Caution: +! RSTATS and ISTATS must be declared and dimensioned in your +! main program. The minimum dimensions are: +! DIMENSION RSTATS(22), ISTATS(31) +! This subroutine returns the user portions of the original DVODE +! RUSER and IUSER arrays, and if root finding is being done, it +! returns the original LSODAR JROOT vector. NUMEVENTS and JROOTS +! are optional parameters. NUMEVENTS is the number of root functions +! and JROOTS is an integer array of length NUMEVENTS. +! Available Integration Statistics: +! HU RUSER(11) The step size in t last used (successfully). +! HCUR RUSER(12) The step size to be attempted on the next step. +! TCUR RUSER(13) The current value of the independent variable +! which the solver has actually reached, i.e. the +! current internal mesh point in t. In the output, +! TCUR will always be at least as far from the +! initial value of t as the current argument T, +! but may be farther (if interpolation was done). +! TOLSF RUSER(14) A tolerance scale factor, greater than 1.0, +! computed when a request for too much accuracy was +! detected (ISTATE = -3 if detected at the start of +! the problem, ISTATE = -2 otherwise). If ITOL is +! left unaltered but RTOL and ATOL are uniformly +! scaled up by a factor of TOLSF for the next call, +! then the solver is deemed likely to succeed. +! (The user may also ignore TOLSF and alter the +! tolerance parameters in any other way appropriate.) +! NST IUSER(11) The number of steps taken for the problem so far. +! NFE IUSER(12) The number of f evaluations for the problem so far. +! NJE IUSER(13) The number of Jacobian evaluations so far. +! NQU IUSER(14) The method order last used (successfully). +! NQCUR IUSER(15) The order to be attempted on the next step. +! IMXER IUSER(16) The index of the component of largest magnitude in +! the weighted local error vector (E(i)/EWT(i)), +! on an error return with ISTATE = -4 or -5. +! LENRW IUSER(17) The length of RUSER actually required. +! This is defined on normal returns and on an illegal +! input return for insufficient storage. +! LENIW IUSER(18) The length of IUSER actually required. +! This is defined on normal returns and on an illegal +! input return for insufficient storage. +! NLU IUSER(19) The number of matrix LU decompositions so far. +! NNI IUSER(20) The number of nonlinear (Newton) iterations so far. +! NCFN IUSER(21) The number of convergence failures of the nonlinear +! solver so far. +! NETF IUSER(22) The number of error test failures of the integrator +! so far. +! MA28AD_CALLS IUSER(23) The number of calls made to MA28AD +! MA28BD_CALLS IUSER(24) The number of calls made to MA28BD +! MA28CD_CALLS IUSER(25) The number of calls made to MA28CD +! MC19AD_CALLS IUSER(26) The number of calls made to MC19AD +! IRNCP IUSER(27) The number of compressions done on array JAN +! ICNCP IUSER(28) The number of compressions done on array ICN +! MINIRN IUSER(29) Minimum size for JAN array +! MINICN IUSER(30) Minimum size for ICN array +! MINNZ IUSER(31) Number of nonzeros in sparse Jacobian +! JROOTS JROOTS Optional array of component indices for components +! having a zero at the current time +! ______________________________________________________________________ +! Section 8. Determining Jacobian Sparsity Structure Arrays +! +! If you are solving a problem with a sparse Jacobian, the arrays +! that define the sparsity structure are needed. The arrays may +! be determined in any of several ways. +! 1. If you choose the default mode by indicating SPARSE=.TRUE., +! the sparsity arrays will be determined internally by DVODE_F90 +! by making calls to your derivative subroutine. This mode is +! equivalent to using the integration method flag MF = 227. +! 2. The DVODE_F90 method flag MF is defined to be +! MF = 100*MOSS + 10*METH + MITER. If you supply MF = 227 (or 217), +! the sparse Jacobian will be determined using finite differences; +! and the sparsity arrays will be determined by calling your +! derivative subroutine. +! 3. If you supply MF = 126 (or 116), you must supply the Jacobian +! subroutine JAC to define the exact Jacobian. JAC must have the +! following form: +! SUBROUTINE JAC (N, T, Y, IA, JA, NZ, PD) +! Given the number of odes N, the current time T, and the current +! solution vector Y, JAC must do the following: +! - If NZ = 0 on input: +! Replace NZ by the number of nonzero elements in the Jacobian. +! The diagonal of the Jacobian must be included. +! Do NOT define the arrays IA, JA, PD at this time. +! Once JAC has been called with NZ = 0 and you have defined the +! value of NZ, future calls to JAC will use this value of NZ. +! - When a call is made with NZ unequal to 0, you must define the +! sparsity structure arrays IA and JA, and the sparse Jacobian +! PD. +! - IA defines the number of nonzeros including the diagonal +! in each column of the Jacobian. Define IA(1) = 1 and for +! J = 1,..., N, +! IA(J+1) = IA(J) + number of nonzeros in column J. +! Diagonal elements must be include even if they are zero. +! You should check to ensure that IA(N+1)-1 = NZ. +! - JA defines the rows in which the nonzeros occur. For +! I = 1,...,NZ, JA(I) is the row in which the Ith element +! of the Jacobian occurs. JA must also include the diagonal +! elements of the Jacobian. +! - PD defines the numerical value of the Jacobian elements. +! For I = 1,...,NZ, PD(I) is the numerical value of the +! Ith element in the Jacobian. PD must also include the +! diagonal elements of the Jacobian. +! 4. If you wish to supply the IA and JA arrays directly, use +! MF = 27. In this case, after calling SET_OPTS, you must call +! SET_IAJA supplying the arrays IAUSER and JAUSER described in +! the documentation prologue for SET_IAJA. These arrays will be +! used when approximate Jacobians are determined using finite +! differences. +! There are two user callable sparsity structure subroutines: +! USERSETS_IAJA may be used if you wish to supply the sparsity +! structure directly. +! SUBROUTINE USERSETS_IAJA(IAUSER,NIAUSER,JAUSER,NJAUSER) +! Caution: +! If it is called, USERSETS_IAJA must be called after the +! call to SET_OPTS. +! Usage: +! CALL SET_IAJA(IAUSER,NIAUSER,JAUSER,NJAUSER) +! In this case, IAUSER of length NIAUSER will be used for +! IA; and JAUSER of length NJAUSER will be used for JA. +! Arguments: +! IAUSER = user supplied IA array +! NIAUSER = length of IAUSER array +! JAUSER = user supplied JA vector +! NJAUSER = length of JAUSER array +! The second subroutine allows you to approximate the sparsity +! structure using derivative differences. It allows more flexibility +! in the determination of perturbation increments used. +! SUBROUTINE SET_IAJA(DFN,NEQ,T,Y,FMIN,NTURB,DTURB,IAUSER, & +! NIAUSER, JAUSER, NJAUSER) +! Caution: +! If it is called, SET_IAJA must be called after the call to +! SET_OPTS. +! Usage: +! SET_IAJA may be called in one of two ways: + +! CALL SET_IAJA(DFN,NEQ,T,Y,FMIN,NTURB,DTURB) +! In this case IA and JA will be determined using calls +! to your derivative routine DFN. +! CALL SET_IAJA(DFN,NEQ,T,Y,FMIN,NTURB,DTURB,IAUSER,NIAUSER, & +! JAUSER, NJAUSER) +! In this case, IAUSER of length NIAUSER will be used for +! IA; and JAUSER of length NJAUSER will be used for JA. +! T, Y, FMIN, NTURB, and DTURB will be ignored (though +! they must be present in the argument list). +! Arguments: +! DFN = DVODE derivative subroutine +! NEQ = Number of odes +! T = independent variable t +! Y = solution y(t) +! FMIN = Jacobian threshold value. Elements of the Jacobian +! with magnitude smaller than FMIN will be ignored. +! FMIN will be ignored if it is less than or equal +! to ZERO. +! NTURB = Perturbation flag. If NTURB=1, component I of Y +! will be perturbed by 1.01D0. +! If NTURB=NEQ, component I of Y will be perturbed +! by ONE + DTURB(I). +! DTURB = perturbation vector of length 1 or NEQ. +! If these four optional parameters are present, IAUSER and JAUSER +! will be copied to IA and JA rather than making derivative calls +! to approximate IA and JA: +! IAUSER = user supplied IA array +! NIAUSER = length of IAUSER array +! JAUSER = user supplied JA vector +! NJAUSER = length of JAUSER array +! ______________________________________________________________________ +! Section 9. Original DVODE.F Documentation Prologue +! +! SUBROUTINE DVODE(F, NEQ, Y, T, TOUT, ITASK, ISTATE, OPTS, JAC, GFUN) +! DVODE: Variable-coefficient Ordinary Differential Equation solver, +! with fixed-leading-coefficient implementation. +! Note: +! Numerous changes have been made in the documentation and the code +! from the original Fortran 77 DVODE solver. With regard to the new +! F90 version, if you choose options that correspond to options +! available in the original f77 solver, you should obtain the same +! results. In all testing, identical results have been obtained +! between this version and a simple F90 translation of the original +! solver. +! DVODE solves the initial value problem for stiff or nonstiff +! systems of first order ODEs, +! dy/dt = f(t,y), or, in component form, +! dy(i)/dt = f(i) = f(i,t,y(1),y(2),...,y(NEQ)) (i = 1,...,NEQ). +! DVODE is a package based on the EPISODE and EPISODEB packages, and +! on the ODEPACK user interface standard, with minor modifications. +! This version is based also on elements of LSODES and LSODAR. +! Authors: +! Peter N. Brown and Alan C. Hindmarsh +! Center for Applied Scientific Computing, L-561 +! Lawrence Livermore National Laboratory +! Livermore, CA 94551 +! George D. Byrne +! Illinois Institute of Technology +! Chicago, IL 60616 +! References: +! 1. P. N. Brown, G. D. Byrne, and A. C. Hindmarsh, "VODE: A Variable +! Coefficient ODE Solver," SIAM J. Sci. Stat. Comput., 10 (1989), +! pp. 1038-1051. Also, LLNL Report UCRL-98412, June 1988. +! 2. G. D. Byrne and A. C. Hindmarsh, "A Polyalgorithm for the +! Numerical Solution of Ordinary Differential Equations," +! ACM Trans. Math. Software, 1 (1975), pp. 71-96. +! 3. A. C. Hindmarsh and G. D. Byrne, "EPISODE: An Effective Package +! for the Integration of Systems of Ordinary Differential +! Equations," LLNL Report UCID/30112, Rev. 1, April 1977. +! 4. G. D. Byrne and A. C. Hindmarsh, "EPISODEB: An Experimental +! Package for the Integration of Systems of Ordinary Differential +! Equations with Banded Jacobians," LLNL Report UCID/30132, April +! 1976. +! 5. A. C. Hindmarsh, "ODEPACK, a Systematized Collection of ODE +! Solvers," in Scientific Computing, R. S. Stepleman et al., eds., +! North-Holland, Amsterdam, 1983, pp. 55-64. +! 6. K. R. Jackson and R. Sacks-Davis, "An Alternative Implementation +! of Variable Step-Size Multistep Formulas for Stiff ODEs," ACM +! Trans. Math. Software, 6 (1980), pp. 295-318. +! Summary of Usage +! Communication between the user and the DVODE package, for normal +! situations, is summarized here. This summary describes only a subset +! of the full set of options available. See the full description for +! details, including optional communication, nonstandard options, +! and instructions for special situations. See also the example +! problem (with program and output) following this summary. +! A. First provide a subroutine of the form: +! SUBROUTINE F(NEQ, T, Y, YDOT) +! REAL(KIND=WP) T, Y(NEQ), YDOT(NEQ) +! which supplies the vector function f by loading YDOT(i) with f(i). +! B. Next determine (or guess) whether or not the problem is stiff. +! Stiffness occurs when the Jacobian matrix df/dy has an eigenvalue +! whose real part is negative and large in magnitude, compared to the +! reciprocal of the t span of interest. If the problem is nonstiff, +! use a method flag MF = 10. If it is stiff, there are four standard +! choices for MF(21, 22, 24, 25), and DVODE requires the Jacobian +! matrix in some form. In these cases (MF > 0), DVODE will use a +! saved copy of the Jacobian matrix. If this is undesirable because of +! storage limitations, set MF to the corresponding negative value +! (-21, -22, -24, -25). (See full description of MF below.) +! The Jacobian matrix is regarded either as full (MF = 21 or 22), +! or banded (MF = 24 or 25). In the banded case, DVODE requires two +! half-bandwidth parameters ML and MU. These are, respectively, the +! widths of the lower and upper parts of the band, excluding the main +! diagonal. Thus the band consists of the locations (i,j) with +! i-ML <= j <= i+MU, and the full bandwidth is ML+MU+1. +! C. If the problem is stiff, you are encouraged to supply the Jacobian +! directly (MF = 21 or 24), but if this is not feasible, DVODE will +! compute it internally by difference quotients (MF = 22 or 25). +! If you are supplying the Jacobian, provide a subroutine of the form: +! SUBROUTINE JAC(NEQ, T, Y, ML, MU, PD, NROWPD) +! REAL(KIND=WP) T, Y(NEQ), PD(NROWPD,NEQ) +! which supplies df/dy by loading PD as follows: +! For a full Jacobian (MF = 21), load PD(i,j) with df(i)/dy(j), +! the partial derivative of f(i) with respect to y(j). (Ignore the +! ML and MU arguments in this case.) +! For a banded Jacobian (MF = 24), load PD(i-j+MU+1,j) with +! df(i)/dy(j), i.e. load the diagonal lines of df/dy into the rows of +! PD from the top down. +! In either case, only nonzero elements need be loaded. +! D. Write a main program which calls subroutine DVODE once for +! each point at which answers are desired. This should also provide +! for possible use of logical unit 6 for output of error messages +! by DVODE. On the first call to DVODE, supply arguments as follows: +! F = Name of subroutine for right-hand side vector f. +! This name must be declared external in calling program. +! NEQ = Number of first order ODEs. +! Y = Array of initial values, of length NEQ. +! T = The initial value of the independent variable. +! TOUT = First point where output is desired (/= T). +! ITOL = 1 or 2 according as ATOL(below) is a scalar or array. +! RTOL = Relative tolerance parameter (scalar). +! ATOL = Absolute tolerance parameter (scalar or array). +! The estimated local error in Y(i) will be controlled so as +! to be roughly less (in magnitude) than +! EWT(i) = RTOL*ABS(Y(i)) + ATOL if ITOL = 1, or +! EWT(i) = RTOL*ABS(Y(i)) + ATOL(i) if ITOL = 2. +! Thus the local error test passes if, in each component, +! either the absolute error is less than ATOL(or ATOL(i)), +! or the relative error is less than RTOL. +! Use RTOL = 0.0 for pure absolute error control, and +! use ATOL = 0.0 (or ATOL(i) = 0.0) for pure relative error +! control. Caution: Actual (global) errors may exceed these +! local tolerances, so choose them conservatively. +! ITASK = 1 for normal computation of output values of Y at t = TOUT. +! ISTATE = Integer flag (input and output). Set ISTATE = 1. +! IOPT = 0 to indicate no optional input used. +! JAC = Name of subroutine for Jacobian matrix (MF = 21 or 24). +! If used, this name must be declared external in calling +! program. If not used, pass a dummy name. +! MF = Method flag. Standard values are: +! 10 for nonstiff (Adams) method, no Jacobian used. +! 21 for stiff (BDF) method, user-supplied full Jacobian. +! 22 for stiff method, internally generated full Jacobian. +! 24 for stiff method, user-supplied banded Jacobian. +! 25 for stiff method, internally generated banded Jacobian. +! E. The output from the first call (or any call) is: +! Y = Array of computed values of y(t) vector. +! T = Corresponding value of independent variable (normally TOUT). +! ISTATE = 2 if DVODE was successful, negative otherwise. +! -1 means excess work done on this call. (Perhaps wrong MF.) +! -2 means excess accuracy requested. (Tolerances too small.) +! -3 means illegal input detected. (See printed message.) +! -4 means repeated error test failures. (Check all input.) +! -5 means repeated convergence failures. (Perhaps bad +! Jacobian supplied or wrong choice of MF or tolerances.) +! -6 means error weight became zero during problem. (Solution +! component I vanished, and ATOL or ATOL(I) = 0.) +! F. To continue the integration after a successful return, simply +! reset TOUT and call DVODE again. No other parameters need be reset. +! Full Description of User Interface to DVODE +! The user interface to DVODE consists of the following parts. +! i. The call sequence to subroutine DVODE, which is a driver +! routine for the solver. This includes descriptions of both +! the call sequence arguments and of user-supplied routines. +! Following these descriptions is +! * a description of optional input available through the +! call sequence, +! * a description of optional output (in the work arrays), and +! * instructions for interrupting and restarting a solution. +! ii. Descriptions of other routines in the DVODE package that may be +! (optionally) called by the user. These provide the ability to +! alter error message handling, save and restore the internal +! PRIVATE variables, and obtain specified derivatives of the +! solution y(t). +! iii. Descriptions of PRIVATE variables to be declared in overlay +! or similar environments. +! iv. Description of two routines in the DVODE package, either of +! which the user may replace with his own version, if desired. +! these relate to the measurement of errors. +! Part i. Call Sequence. +! The call sequence parameters used for input only are +! F, NEQ, TOUT, ITOL, RTOL, ATOL, ITASK, IOPT, JAC, MF, +! and those used for both input and output are +! Y, T, ISTATE. +! The work arrays RUSER and IUSER are used for conditional and +! optional input and optional output. (The term output here refers +! to the return from subroutine DVODE to the user's calling program.) +! The legality of input parameters will be thoroughly checked on the +! initial call for the problem, but not checked thereafter unless a +! change in input parameters is flagged by ISTATE = 3 in the input. +! The descriptions of the call arguments are as follows. +! F = The name of the user-supplied subroutine defining the +! ODE system. The system must be put in the first-order +! form dy/dt = f(t,y), where f is a vector-valued function +! of the scalar t and the vector y. Subroutine F is to +! compute the function f. It is to have the form +! SUBROUTINE F(NEQ, T, Y, YDOT) +! REAL(KIND=WP) T, Y(NEQ), YDOT(NEQ) +! where NEQ, T, and Y are input, and the array YDOT = f(t,y) +! is output. Y and YDOT are arrays of length NEQ. +! Subroutine F should not alter Y(1),...,Y(NEQ). +! F must be declared EXTERNAL in the calling program. + +! If quantities computed in the F routine are needed +! externally to DVODE, an extra call to F should be made +! for this purpose, for consistent and accurate results. +! If only the derivative dy/dt is needed, use DVINDY instead. +! NEQ = The size of the ODE system (number of first order +! ordinary differential equations). Used only for input. +! NEQ may not be increased during the problem, but +! can be decreased (with ISTATE = 3 in the input). +! Y = A real array for the vector of dependent variables, of +! length NEQ or more. Used for both input and output on the +! first call (ISTATE = 1), and only for output on other calls. +! On the first call, Y must contain the vector of initial +! values. In the output, Y contains the computed solution +! evaluated at T. If desired, the Y array may be used +! for other purposes between calls to the solver. +! This array is passed as the Y argument in all calls to +! F and JAC. +! T = The independent variable. In the input, T is used only on +! the first call, as the initial point of the integration. +! In the output, after each call, T is the value at which a +! computed solution Y is evaluated (usually the same as TOUT). +! On an error return, T is the farthest point reached. +! TOUT = The next value of t at which a computed solution is desired. +! Used only for input. +! When starting the problem (ISTATE = 1), TOUT may be equal +! to T for one call, then should /= T for the next call. +! For the initial T, an input value of TOUT /= T is used +! in order to determine the direction of the integration +! (i.e. the algebraic sign of the step sizes) and the rough +! scale of the problem. Integration in either direction +! (forward or backward in t) is permitted. +! If ITASK = 2 or 5 (one-step modes), TOUT is ignored after +! the first call (i.e. the first call with TOUT /= T). +! Otherwise, TOUT is required on every call. +! If ITASK = 1, 3, or 4, the values of TOUT need not be +! monotone, but a value of TOUT which backs up is limited +! to the current internal t interval, whose endpoints are +! TCUR - HU and TCUR. (See optional output, below, for +! TCUR and HU.) +! ITOL = An indicator for the type of error control. See +! description below under ATOL. Used only for input. +! RTOL = A relative error tolerance parameter, either a scalar or +! an array of length NEQ. See description below under ATOL. +! Input only. +! ATOL = An absolute error tolerance parameter, either a scalar or +! an array of length NEQ. Input only. +! The input parameters ITOL, RTOL, and ATOL determine +! the error control performed by the solver. The solver will +! control the vector e = (e(i)) of estimated local errors +! in Y, according to an inequality of the form +! rms-norm of (E(i)/EWT(i)) <= 1, +! where EWT(i) = RTOL(i)*ABS(Y(i)) + ATOL(i), +! and the rms-norm (root-mean-square norm) here is +! rms-norm(v) = sqrt(sum v(i)**2 / NEQ). Here EWT +! is a vector of weights which must always be positive, and +! the values of RTOL and ATOL should all be nonnegative. +! The following table gives the types (scalar/array) of +! RTOL and ATOL, and the corresponding form of EWT(i). +! ITOL RTOL ATOL EWT(i) +! 1 scalar scalar RTOL*ABS(Y(i)) + ATOL +! 2 scalar array RTOL*ABS(Y(i)) + ATOL(i) +! 3 array scalar RTOL(i)*ABS(Y(i)) + ATOL +! 4 array array RTOL(i)*ABS(Y(i)) + ATOL(i) +! When either of these parameters is a scalar, it need not +! be dimensioned in the user's calling program. +! If none of the above choices (with ITOL, RTOL, and ATOL +! fixed throughout the problem) is suitable, more general +! error controls can be obtained by substituting +! user-supplied routines for the setting of EWT and/or for +! the norm calculation. See Part iv below. +! If global errors are to be estimated by making a repeated +! run on the same problem with smaller tolerances, then all +! components of RTOL and ATOL(i.e. of EWT) should be scaled +! down uniformly. +! ITASK = An index specifying the task to be performed. +! Input only. ITASK has the following values and meanings. +! 1 means normal computation of output values of y(t) at +! t = TOUT(by overshooting and interpolating). +! 2 means take one step only and return. +! 3 means stop at the first internal mesh point at or +! beyond t = TOUT and return. +! 4 means normal computation of output values of y(t) at +! t = TOUT but without overshooting t = TCRIT. +! TCRIT must be input as RUSER(1). TCRIT may be equal to +! or beyond TOUT, but not behind it in the direction of +! integration. This option is useful if the problem +! has a singularity at or beyond t = TCRIT. +! 5 means take one step, without passing TCRIT, and return. +! TCRIT must be input as RUSER(1). +! Note: If ITASK = 4 or 5 and the solver reaches TCRIT +! (within roundoff), it will return T = TCRIT(exactly) to +! indicate this (unless ITASK = 4 and TOUT comes before TCRIT, +! in which case answers at T = TOUT are returned first). +! ISTATE = an index used for input and output to specify the +! the state of the calculation. +! In the input, the values of ISTATE are as follows. +! 1 means this is the first call for the problem +! (initializations will be done). See note below. +! 2 means this is not the first call, and the calculation +! is to continue normally, with no change in any input +! parameters except possibly TOUT and ITASK. +! (If ITOL, RTOL, and/or ATOL are changed between calls +! with ISTATE = 2, the new values will be used but not +! tested for legality.) +! 3 means this is not the first call, and the +! calculation is to continue normally, but with +! a change in input parameters other than +! TOUT and ITASK. Changes are allowed in +! NEQ, ITOL, RTOL, ATOL, IOPT, MF, ML, MU, +! and any of the optional input except H0. +! (See IUSER description for ML and MU.) +! Caution: +! If you make a call to DVODE_F90 with ISTATE=3, you will +! first need to call SET_OPTS again, supplying the new +! necessary option values. +! Note: A preliminary call with TOUT = T is not counted +! as a first call here, as no initialization or checking of +! input is done. (Such a call is sometimes useful to include +! the initial conditions in the output.) +! Thus the first call for which TOUT /= T requires +! ISTATE = 1 in the input. +! In the output, ISTATE has the following values and meanings. +! 1 means nothing was done, as TOUT was equal to T with +! ISTATE = 1 in the input. +! 2 means the integration was performed successfully. +! -1 means an excessive amount of work (more than MXSTEP +! steps) was done on this call, before completing the +! requested task, but the integration was otherwise +! successful as far as T. (MXSTEP is an optional input +! and is normally 5000.) To continue, the user may +! simply reset ISTATE to a value > 1 and call again. +! (The excess work step counter will be reset to 0.) +! In addition, the user may increase MXSTEP to avoid +! this error return. (See optional input below.) +! -2 means too much accuracy was requested for the precision +! of the machine being used. This was detected before +! completing the requested task, but the integration +! was successful as far as T. To continue, the tolerance +! parameters must be reset, and ISTATE must be set +! to 3. The optional output TOLSF may be used for this +! purpose. (Note: If this condition is detected before +! taking any steps, then an illegal input return +! (ISTATE = -3) occurs instead.) +! -3 means illegal input was detected, before taking any +! integration steps. See written message for details. +! Note: If the solver detects an infinite loop of calls +! to the solver with illegal input, it will cause +! the run to stop. +! -4 means there were repeated error test failures on +! one attempted step, before completing the requested +! task, but the integration was successful as far as T. +! The problem may have a singularity, or the input +! may be inappropriate. +! -5 means there were repeated convergence test failures on +! one attempted step, before completing the requested +! task, but the integration was successful as far as T. +! This may be caused by an inaccurate Jacobian matrix, +! if one is being used. +! -6 means EWT(i) became zero for some i during the +! integration. Pure relative error control (ATOL(i)=0.0) +! was requested on a variable which has now vanished. +! The integration was successful as far as T. +! Note: Since the normal output value of ISTATE is 2, +! it does not need to be reset for normal continuation. +! Also, since a negative input value of ISTATE will be +! regarded as illegal, a negative output value requires the +! user to change it, and possibly other input, before +! calling the solver again. +! IOPT = An integer flag to specify whether or not any optional +! input is being used on this call. Input only. +! The optional input is listed separately below. +! IOPT = 0 means no optional input is being used. +! Default values will be used in all cases. +! IOPT = 1 means optional input is being used. +! RUSER = A real working array (real(wp)). +! The length of RUSER must be at least 22 +! 20 + NYH * (MAXORD + 1) where +! NYH = the initial value of NEQ, +! MAXORD = 12 (if METH = 1) or 5 (if METH = 2) (unless a +! smaller value is given as an optional input), +! The first 22 words of RUSER are reserved for conditional +! and optional input and optional output. + +! The following word in RUSER is a conditional input: +! RUSER(1) = TCRIT = critical value of t which the solver +! is not to overshoot. Required if ITASK is +! 4 or 5, and ignored otherwise. (See ITASK.) +! IUSER = An integer work array. The length of IUSER must be at least 30. +! 30 if MITER = 0 or 3 (MF = 10, 13, 20, 23), or +! 30 + NEQ otherwise (ABS(MF) = 11,12,14,15,16,17,21,22, +! 24,25,26,27). +! The first 30 words of IUSER are reserved for conditional and +! optional input and optional output. + +! The following 2 words in IUSER are conditional input: +! IUSER(1) = ML These are the lower and upper +! IUSER(2) = MU half-bandwidths, respectively, of the +! banded Jacobian, excluding the main diagonal. +! The band is defined by the matrix locations +! (i,j) with i-ML <= j <= i+MU. ML and MU +! must satisfy 0 <= ML,MU <= NEQ-1. +! These are required if MITER is 4 or 5, and +! ignored otherwise. ML and MU may in fact be +! the band parameters for a matrix to which +! df/dy is only approximately equal. +! Note: The work arrays must not be altered between calls to DVODE +! for the same problem, except possibly for the conditional and +! optional input, and except for the last 3*NEQ words of RUSER. +! The latter space is used for internal scratch space, and so is +! available for use by the user outside DVODE between calls, if +! desired (but not for use by F or JAC). +! JAC = The name of the user-supplied routine (MITER = 1 or 4 or 6) +! to compute the Jacobian matrix, df/dy, as a function of +! the scalar t and the vector y. It is to have the form +! SUBROUTINE JAC(NEQ, T, Y, ML, MU, PD, NROWPD) +! REAL(KIND=WP) T, Y(NEQ), PD(NROWPD,NEQ) +! where NEQ, T, Y, ML, MU, and NROWPD are input and the array +! PD is to be loaded with partial derivatives (elements of the +! Jacobian matrix) in the output. PD must be given a first +! dimension of NROWPD. T and Y have the same meaning as in +! Subroutine F. +! In the full matrix case (MITER = 1), ML and MU are +! ignored, and the Jacobian is to be loaded into PD in +! columnwise manner, with df(i)/dy(j) loaded into PD(i,j). +! In the band matrix case (MITER = 4), the elements +! within the band are to be loaded into PD in columnwise +! manner, with diagonal lines of df/dy loaded into the rows +! of PD. Thus df(i)/dy(j) is to be loaded into PD(i-j+MU+1,j). +! ML and MU are the half-bandwidth parameters. (See IUSER). +! The locations in PD in the two triangular areas which +! correspond to nonexistent matrix elements can be ignored +! or loaded arbitrarily, as they are overwritten by DVODE. +! In the sparse matrix case the elements of the matrix +! are determined by the sparsity structure given by the +! IA and JA pointer arrays. Refer to the documentation +! prologue for SET_OPTS for a description of the arguments +! for JAC since they differ from the dense and banded cases. +! JAC need not provide df/dy exactly. A crude +! approximation (possibly with a smaller bandwidth) will do. +! In either case, PD is preset to zero by the solver, +! so that only the nonzero elements need be loaded by JAC. +! Each call to JAC is preceded by a call to F with the same +! arguments NEQ, T, and Y. Thus to gain some efficiency, +! intermediate quantities shared by both calculations may be +! saved in a user common block by F and not recomputed by JAC, +! if desired. Also, JAC may alter the Y array, if desired. +! JAC must be declared external in the calling program. +! MF = The method flag. Used only for input. The legal values of +! MF are 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, +! 25, 26, 27, -11, -12, -14, -15, -21, -22, -24, -25, -26, +! -27. +! MF is a signed two-digit integer, MF = JSV*(10*METH+MITER). +! JSV = SIGN(MF) indicates the Jacobian-saving strategy: +! JSV = 1 means a copy of the Jacobian is saved for reuse +! in the corrector iteration algorithm. +! JSV = -1 means a copy of the Jacobian is not saved +! (valid only for MITER = 1, 2, 4, or 5). +! METH indicates the basic linear multistep method: +! METH = 1 means the implicit Adams method. +! METH = 2 means the method based on backward +! differentiation formulas (BDF-s). +! MITER indicates the corrector iteration method: +! MITER = 0 means functional iteration (no Jacobian matrix +! is involved). +! MITER = 1 means chord iteration with a user-supplied +! full (NEQ by NEQ) Jacobian. +! MITER = 2 means chord iteration with an internally +! generated (difference quotient) full Jacobian +! (using NEQ extra calls to F per df/dy value). +! MITER = 3 means chord iteration with an internally +! generated diagonal Jacobian approximation +! (using 1 extra call to F per df/dy evaluation). +! MITER = 4 means chord iteration with a user-supplied +! banded Jacobian. +! MITER = 5 means chord iteration with an internally +! generated banded Jacobian (using ML+MU+1 extra +! calls to F per df/dy evaluation). +! MITER = 6 means chord iteration with a user-supplied +! sparse Jacobian. +! MITER = 7 means chord iteration with an internally +! generated sparse Jacobian +! If MITER = 1, 4, or 6 the user must supply a subroutine +! JAC(the name is arbitrary) as described above under JAC. +! For other values of MITER, a dummy argument can be used. +! Optional Input +! The following is a list of the optional input provided for in the +! call sequence. (See also Part ii.) For each such input variable, +! this table lists its name as used in this documentation, its +! location in the call sequence, its meaning, and the default value. +! The use of any of this input requires IOPT = 1, and in that +! case all of this input is examined. A value of zero for any of +! these optional input variables will cause the default value to be +! used. Thus to use a subset of the optional input, simply preload +! locations 5 to 10 in RUSER and IUSER to 0.0 and 0, respectively, +! and then set those of interest to nonzero values. +! NAME LOCATION MEANING AND DEFAULT VALUE +! H0 RUSER(5) The step size to be attempted on the first step. +! The default value is determined by the solver. +! HMAX RUSER(6) The maximum absolute step size allowed. +! The default value is infinite. +! HMIN RUSER(7) The minimum absolute step size allowed. +! The default value is 0. (This lower bound is not +! enforced on the final step before reaching TCRIT +! when ITASK = 4 or 5.) +! MAXORD IUSER(5) The maximum order to be allowed. The default +! value is 12 if METH = 1, and 5 if METH = 2. +! If MAXORD exceeds the default value, it will +! be reduced to the default value. +! If MAXORD is changed during the problem, it may +! cause the current order to be reduced. +! MXSTEP IUSER(6) Maximum number of (internally defined) steps +! allowed during one call to the solver. +! The default value is 5000. +! MXHNIL IUSER(7) Maximum number of messages printed (per problem) +! warning that T + H = T on a step (H = step size). +! This must be positive to result in a non-default +! value. The default value is 10. +! Optional Output +! As optional additional output from DVODE, the variables listed +! below are quantities related to the performance of DVODE +! which are available to the user. These are communicated by way of +! the work arrays, but also have internal mnemonic names as shown. +! Except where stated otherwise, all of this output is defined +! on any successful return from DVODE, and on any return with +! ISTATE = -1, -2, -4, -5, or -6. On an illegal input return +! (ISTATE = -3), they will be unchanged from their existing values +! (if any), except possibly for TOLSF, LENRW, and LENIW. +! On any error return, output relevant to the error will be defined, +! as noted below. +! NAME LOCATION MEANING +! HU RUSER(11) The step size in t last used (successfully). +! HCUR RUSER(12) The step size to be attempted on the next step. +! TCUR RUSER(13) The current value of the independent variable +! which the solver has actually reached, i.e. the +! current internal mesh point in t. In the output, +! TCUR will always be at least as far from the +! initial value of t as the current argument T, +! but may be farther (if interpolation was done). +! TOLSF RUSER(14) A tolerance scale factor, greater than 1.0, +! computed when a request for too much accuracy was +! detected (ISTATE = -3 if detected at the start of +! the problem, ISTATE = -2 otherwise). If ITOL is +! left unaltered but RTOL and ATOL are uniformly +! scaled up by a factor of TOLSF for the next call, +! then the solver is deemed likely to succeed. +! (The user may also ignore TOLSF and alter the +! tolerance parameters in any other way appropriate.) +! NST IUSER(11) The number of steps taken for the problem so far. +! NFE IUSER(12) The number of f evaluations for the problem so far. +! NJE IUSER(13) The number of Jacobian evaluations so far. +! NQU IUSER(14) The method order last used (successfully). +! NQCUR IUSER(15) The order to be attempted on the next step. +! IMXER IUSER(16) The index of the component of largest magnitude in +! the weighted local error vector (e(i)/EWT(i)), +! on an error return with ISTATE = -4 or -5. +! LENRW IUSER(17) The length of RUSER actually required. +! This is defined on normal returns and on an illegal +! input return for insufficient storage. +! LENIW IUSER(18) The length of IUSER actually required. +! This is defined on normal returns and on an illegal +! input return for insufficient storage. +! NLU IUSER(19) The number of matrix LU decompositions so far. +! NNI IUSER(20) The number of nonlinear (Newton) iterations so far. +! NCFN IUSER(21) The number of convergence failures of the nonlinear +! solver so far. +! NETF IUSER(22) The number of error test failures of the integrator +! so far. +! The following two arrays are segments of the RUSER array which +! may also be of interest to the user as optional output. +! For each array, the table below gives its internal name, +! its base address in RUSER, and its description. +! Interrupting and Restarting +! If the integration of a given problem by DVODE is to be interrupted +! and then later continued, such as when restarting an interrupted run +! or alternating between two or more ODE problems, the user should save, +! following the return from the last DVODE call prior to the +! interruption, the contents of the call sequence variables and +! internal PRIVATE variables, and later restore these values before the +! next DVODE call for that problem. To save and restore the PRIVATE +! variables, use subroutine DVSRCO, as described below in part ii. +! In addition, if non-default values for either LUN or MFLAG are +! desired, an extra call to XSETUN and/or XSETF should be made just +! before continuing the integration. See Part ii below for details. +! Part ii. Other Routines Callable. +! The following are optional calls which the user may make to +! gain additional capabilities in conjunction with DVODE. +! (The routines XSETUN and XSETF are designed to conform to the +! SLATEC error handling package.) +! FORM OF CALL FUNCTION +! CALL XSETUN(LUN) Set the logical unit number, LUN, for +! output of messages from DVODE, if +! the default is not desired. +! The default value of LUN is 6. +! CALL XSETF(MFLAG) Set a flag to control the printing of +! messages by DVODE. +! MFLAG = 0 means do not print. (Danger: +! This risks losing valuable information.) +! Either of the above calls may be made at +! any time and will take effect immediately. +! CALL DVINDY(...) Provide derivatives of y, of various +! orders, at a specified point T, if +! desired. It may be called only after +! a successful return from DVODE. +! The detailed instructions for using DVINDY are as follows. +! The form of the call is: +! CALL DVINDY(T,K,DKY,IFLAG) +! The input parameters are: +! T = Value of independent variable where answers are desired +! (normally the same as the T last returned by DVODE). +! For valid results, T must lie between TCUR - HU and TCUR. +! (See optional output for TCUR and HU.) +! K = Integer order of the derivative desired. K must satisfy +! 0 <= K <= NQCUR, where NQCUR is the current order +! (see optional output). The capability corresponding +! to K = 0, i.e. computing y(T), is already provided +! by DVODE directly. Since NQCUR >= 1, the first +! derivative dy/dt is always available with DVINDY. +! The output parameters are: +! DKY = A real array of length NEQ containing the computed value +! of the K-th derivative of y(t). +! IFLAG = Integer flag, returned as 0 if K and T were legal, +! -1 if K was illegal, and -2 if T was illegal. +! On an error return, a message is also written. +! Part iii. Optionally Replaceable Solver Routines. +! Below are descriptions of two routines in the DVODE package which +! relate to the measurement of errors. Either routine can be +! replaced by a user-supplied version, if desired. However, since such +! a replacement may have a major impact on performance, it should be +! done only when absolutely necessary, and only with great caution. +! (Note: The means by which the package version of a routine is +! superseded by the user's version may be system-dependent.) +! (a) DEWSET. +! The following subroutine is called just before each internal +! integration step, and sets the array of error weights, EWT, as +! described under ITOL/RTOL/ATOL above: +! SUBROUTINE DEWSET(NEQ, ITOL, RTOL, ATOL, YCUR, EWT) +! where NEQ, ITOL, RTOL, and ATOL are as in the DVODE call sequence, +! YCUR contains the current dependent variable vector, and +! EWT is the array of weights set by DEWSET. +! If the user supplies this subroutine, it must return in EWT(i) +! (i = 1,...,NEQ) a positive quantity suitable for comparison with +! errors in Y(i). The EWT array returned by DEWSET is passed to the +! DVNORM function (See below.), and also used by DVODE in the +! computation of the optional output IMXER, the diagonal Jacobian +! approximation, and the increments for difference quotient Jacobians. +! In the user-supplied version of DEWSET, it may be desirable to use +! the current values of derivatives of y. Derivatives up to order NQ +! are available from the history array YH, described above under +! Optional Output. In DEWSET, YH is identical to the YCUR array, +! extended to NQ + 1 columns with a column length of NYH and scale +! factors of h**j/factorial(j). On the first call for the problem, +! given by NST = 0, NQ is 1 and H is temporarily set to 1.0. +! NYH is the initial value of NEQ. Thus, for example, the current +! value of dy/dt can be obtained as YCUR(NYH+i)/H (i=1,...,NEQ) +! (and the division by H is unnecessary when NST = 0). +! (b) DVNORM. +! The following is a function which computes the weighted +! root-mean-square norm of a vector v: +! D = DVNORM(N, V, W) +! where: +! N = the length of the vector, +! V = real array of length N containing the vector, +! W = real array of length N containing weights, +! D = sqrt((1/N) * sum(V(i)*W(i))**2). +! DVNORM is called with N = NEQ and with W(i) = 1.0/EWT(i), where +! EWT is as set by subroutine DEWSET. +! If the user supplies this routine, it should return a nonnegative +! value of DVNORM suitable for use in the error control in DVODE. +! None of the arguments should be altered by DVNORM. +! For example, a user-supplied DVNORM function might: +! -substitute a max-norm of (V(i)*W(i)) for the rms-norm, or +! -ignore some components of V in the norm, with the effect of +! suppressing the error control on those components of Y. +!_______________________________________________________________________ +! Other Routines in the DVODE Package + +! In addition to subroutine DVODE, the DVODE package includes the +! following subroutines and function routines (not user callable): +! DVHIN computes an approximate step size for the initial step. +! DVINDY_CORE computes an interpolated value of the y vector at t=TOUT. +! DVINDY computes an interpolated value of the y vector at t=TOUT. +! (user callable) +! DVSTEP is the core integrator, which does one step of the +! integration and the associated error control. +! DVSET sets all method coefficients and test constants. +! DVNLSD, solves the underlying nonlinear system -- the corrector. +! DVNLSS28 +! DVJAC, computes and preprocesses the Jacobian matrix J = df/dy +! DVJACS28 and the Newton iteration matrix P = I - (h/l1)*J. +! DVSOL, manages solution of linear system in chord iteration. +! DVSOLS28 +! DVJUST adjusts the history array on a change of order. +! DEWSET sets the error weight vector EWT before each step. +! DVNORM computes the weighted r.m.s. norm of a vector. +! DACOPY is a routine to copy a two-dimensional array to another. +! DGEFA_F90 and DGESL_F90 are routines from LINPACK for solving full +! systems of linear algebraic equations. +! DGBFA_F90 and DGBSL_F90 are routines from LINPACK for solving banded +! linear systems. +! DAXPY_F90, DSCAL_F90, and DCOPY_F90 are basic linear algebra modules +! (BLAS). +! DVCHECK does preliminary checking for roots, and serves as an +! interface between subroutine DVODE_F90 and subroutine +! DVROOTS. +! DVROOTS finds the leftmost root of a set of functions. +! ______________________________________________________________________ +! Section 10. Example Usage +! +! MODULE example1 +! The following is a simple example problem, with the coding +! needed for its solution by DVODE_F90. The problem is from +! chemical kinetics, and consists of the following three rate +! equations: +! dy1/dt = -.04d0*y1 + 1.d4*y2*y3 +! dy2/dt = .04d0*y1 - 1.d4*y2*y3 - 3.d7*y2**2 +! dy3/dt = 3.d7*y2**2 +! on the interval from t = 0.0d0 to t = 4.d10, with initial +! conditions y1 = 1.0d0, y2 = y3 = 0.0d0. The problem is stiff. +! The following coding solves this problem with DVODE_F90, +! using a user supplied Jacobian and printing results at +! t = .4, 4.,...,4.d10. It uses ITOL = 2 and ATOL much smaller +! for y2 than y1 or y3 because y2 has much smaller values. At +! the end of the run, statistical quantities of interest are +! printed. (See optional output in the full DVODE description +! below.) Output is written to the file example1.dat. +! CONTAINS +! SUBROUTINE FEX(NEQ, T, Y, YDOT) +! IMPLICIT NONE +! INTEGER NEQ +! DOUBLE PRECISION T, Y, YDOT +! DIMENSION Y(NEQ), YDOT(NEQ) +! YDOT(1) = -.04D0*Y(1) + 1.D4*Y(2)*Y(3) +! YDOT(3) = 3.E7*Y(2)*Y(2) +! YDOT(2) = -YDOT(1) - YDOT(3) +! RETURN +! END SUBROUTINE FEX +! SUBROUTINE JEX(NEQ, T, Y, ML, MU, PD, NRPD) +! IMPLICIT NONE +! INTEGER NEQ,ML,MU,NRPD +! DOUBLE PRECISION PD, T, Y +! DIMENSION Y(NEQ), PD(NRPD,NEQ) +! PD(1,1) = -.04D0 +! PD(1,2) = 1.D4*Y(3) +! PD(1,3) = 1.D4*Y(2) +! PD(2,1) = .04D0 +! PD(2,3) = -PD(1,3) +! PD(3,2) = 6.E7*Y(2) +! PD(2,2) = -PD(1,2) - PD(3,2) +! RETURN +! END SUBROUTINE JEX +! END MODULE example1 +!****************************************************************** + +! PROGRAM runexample1 +! USE DVODE_F90_M +! USE example1 +! IMPLICIT NONE +! DOUBLE PRECISION ATOL, RTOL, T, TOUT, Y, RSTATS +! INTEGER NEQ, ITASK, ISTATE, ISTATS, IOUT, IERROR, I +! DIMENSION Y(3), ATOL(3), RSTATS(22), ISTATS(31) +! TYPE(VODE_OPTS) :: OPTIONS +! OPEN(UNIT=6, FILE = 'example1.dat') +! IERROR = 0 +! NEQ = 3 +! Y(1) = 1.0D0 +! Y(2) = 0.0D0 +! Y(3) = 0.0D0 +! T = 0.0D0 +! TOUT = 0.4D0 +! RTOL = 1.D-4 +! ATOL(1) = 1.D-8 +! ATOL(2) = 1.D-14 +! ATOL(3) = 1.D-6 +! ITASK = 1 +! ISTATE = 1 +! OPTIONS = SET_OPTS(DENSE_J=.TRUE.,ABSERR_VECTOR=ATOL, & +! RELERR=RTOL, USER_SUPPLIED_JACOBIAN=.TRUE.) +! DO IOUT = 1,12 +! CALL DVODE_F90(FEX,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS,J_FCN=JEX) +! CALL GET_STATS(RSTATS,ISTATS) +! WRITE(6,63)T,Y(1),Y(2),Y(3) +! DO I = 1, NEQ +! IF (Y(I) < 0.0D0) IERROR = 1 +! END DO +! IF (ISTATE < 0) THEN +! WRITE(6,64)ISTATE +! STOP +! END IF +! TOUT = TOUT*10.0D0 +! END DO +! WRITE(6,60) ISTATS(11),ISTATS(12),ISTATS(13),ISTATS(19), & +! ISTATS(20),ISTATS(21),ISTATS(22) +! IF (IERROR == 1) THEN +! WRITE(6,61) +! ELSE +! WRITE(6,62) +! END IF +! 60 FORMAT(/' No. steps =',I4,' No. f-s =',I4, & +! ' No. J-s =',I4,' No. LU-s =',I4/ & +! ' No. nonlinear iterations =',I4/ & +! ' No. nonlinear convergence failures =',I4/ & +! ' No. error test failures =',I4/) +! 61 FORMAT(/' An error occurred.') +! 62 FORMAT(/' No errors occurred.') +! 63 FORMAT(' At t =',D12.4,' y =',3D14.6) +! 64 FORMAT(///' Error halt: ISTATE =',I3) +! STOP +! END PROGRAM runexample1 +! +! MODULE example2 +! The following is a modification of the previous example +! program to illustrate root finding. The problem is from +! chemical kinetics, and consists of the following three +! rate equations: +! dy1/dt = -.04d0*y1 + 1.d4*y2*y3 +! dy2/dt = .04d0*y1 - 1.d4*y2*y3 - 3.d7*y2**2 +! dy3/dt = 3.d7*y2**2 +! on the interval from t = 0.0d0 to t = 4.d10, with initial +! conditions y1 = 1.0d0, y2 = y3 = 0.0d0. The problem is stiff. +! In addition, we want to find the values of t, y1, y2, +! and y3 at which: +! (1) y1 reaches the value 1.d-4, and +! (2) y3 reaches the value 1.d-2. +! The following coding solves this problem with DVODE_F90 +! using an internally generated dense Jacobian and +! printing results at t = .4, 4., ..., 4.d10, and at the +! computed roots. It uses ITOL = 2 and ATOL much smaller +! for y2 than y1 or y3 because y2 has much smaller values. +! At the end of the run, statistical quantities of interest +! are printed (see optional outputs in the full description +! below). Output is written to the file example2.dat. +! CONTAINS +! SUBROUTINE FEX (NEQ, T, Y, YDOT) +! IMPLICIT NONE +! INTEGER NEQ +! DOUBLE PRECISION T, Y, YDOT +! DIMENSION Y(3), YDOT(3) +! YDOT(1) = -0.04D0*Y(1) + 1.0D4*Y(2)*Y(3) +! YDOT(3) = 3.0D7*Y(2)*Y(2) +! YDOT(2) = -YDOT(1) - YDOT(3) +! RETURN +! END SUBROUTINE FEX +! SUBROUTINE GEX (NEQ, T, Y, NG, GOUT) +! IMPLICIT NONE +! INTEGER NEQ, NG +! DOUBLE PRECISION T, Y, GOUT +! DIMENSION Y(3), GOUT(2) +! GOUT(1) = Y(1) - 1.0D-4 +! GOUT(2) = Y(3) - 1.0D-2 +! RETURN +! END SUBROUTINE GEX +! END MODULE example2 +!****************************************************************** +! PROGRAM runexample2 +! USE DVODE_F90_M +! USE example2 +! IMPLICIT NONE +! INTEGER ITASK, ISTATE, NG, NEQ, IOUT, JROOT, ISTATS, & +! IERROR, I +! DOUBLE PRECISION ATOL, RTOL, RSTATS, T, TOUT, Y +! DIMENSION Y(3), ATOL(3), RSTATS(22), ISTATS(31), JROOT(2) +! TYPE(VODE_OPTS) :: OPTIONS +! OPEN (UNIT=6, FILE='example2.dat') +! IERROR = 0 +! NEQ = 3 +! Y(1) = 1.0D0 +! Y(2) = 0.0D0 +! Y(3) = 0.0D0 +! T = 0.0D0 +! TOUT = 0.4D0 +! RTOL = 1.0D-4 +! ATOL(1) = 1.0D-8 +! ATOL(2) = 1.0D-12 +! ATOL(3) = 1.0D-8 +! ITASK = 1 +! ISTATE = 1 +! NG = 2 +! OPTIONS = SET_OPTS(DENSE_J=.TRUE.,RELERR=RTOL, & +! ABSERR_VECTOR=ATOL,NEVENTS=NG) +! DO 40 IOUT = 1,12 +! 10 CONTINUE +! CALL DVODE_F90(FEX,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS,G_FCN=GEX) +! CALL GET_STATS(RSTATS, ISTATS, NG, JROOT) +! WRITE(6,20) T, Y(1), Y(2), Y(3) +! DO I = 1, NEQ +! IF (Y(I) < 0.0D0) IERROR = 1 +! END DO +! 20 FORMAT(' At t =',D12.4,' Y =',3D14.6) +! IF (ISTATE < 0) GOTO 60 +! IF (ISTATE == 2) GOTO 40 +! WRITE(6,30) JROOT(1),JROOT(2) +! 30 FORMAT(5X,' The above line is a root, JROOT =',2I5) +! ISTATE = 2 +! GOTO 10 +! 40 TOUT = TOUT*10.0D0 +! WRITE(6,50) ISTATS(11), ISTATS(12), ISTATS(13), ISTATS(10) +! IF (IERROR == 1) THEN +! WRITE(6,61) +! ELSE +! WRITE(6,62) +! END IF +! 50 FORMAT(/' No. steps =',I4,' No. f-s =',I4,' No. J-s =',I4, & +! ' No. g-s =',I4/) +! STOP +! 60 WRITE(6,70) ISTATE +! 61 FORMAT(/' An error occurred.') +! 62 FORMAT(/' No errors occurred.') +! 70 FORMAT(///' Error halt.. ISTATE =',I3) +! STOP +! END PROGRAM runexample2 +!_______________________________________________________________________ +! BEGINNING OF DVODE_F90_M PRIVATE SECTION. +! Note: This global information is used throughout DVODE_F90. +!_______________________________________________________________________ +! JACSPDB arrays and parameters. + LOGICAL, PRIVATE :: USE_JACSP, LIKE_ORIGINAL_VODE + INTEGER, PRIVATE :: INFODS, LIWADS, MAXGRPDS, MINGRPDS, NRFJACDS, & + NCFJACDS, LWKDS, LIWKDS + INTEGER, ALLOCATABLE, PRIVATE :: INDROWDS(:), INDCOLDS(:), & + NGRPDS(:), IPNTRDS(:), JPNTRDS(:), IWADS(:), IWKDS(:), IOPTDS(:) + REAL (WP), ALLOCATABLE, PRIVATE :: YSCALEDS(:), WKDS(:), FACDS(:) + REAL (WP), PRIVATE :: U125, U325 +!_______________________________________________________________________ + LOGICAL, PARAMETER, PRIVATE :: USE_MA48_FOR_SPARSE=.FALSE. +!_______________________________________________________________________ +! *****MA48 build change point. Replace the above statement. +! LOGICAL, PARAMETER, PRIVATE :: USE_MA48_FOR_SPARSE=.TRUE. +!_______________________________________________________________________ +! *****MA48 build change point. Insert these statements. +! MA48 type declarations: +! TYPE(ZD01_TYPE) MATRIX +! TYPE(MA48_CONTROL) CONTROL +! TYPE(MA48_FACTORS) FACTORS +! TYPE(MA48_AINFO) AINFO +! TYPE(MA48_FINFO) FINFO +! TYPE(MA48_SINFO) SINFO +!_______________________________________________________________________ +! .. Parameters .. +! IPCUTH_MAX - maximum number of times the solver will halve the +! stepsize to prevent an infeasible prediction if +! solution bounds are used +! KFC - maximum number of consecutive convergence failures +! before crashing the order +! KFH - maximum number of consecutive error test failures +! before giving up (changed from 7 to 15) +! MAXCOR - maximum number of corrections +! MSBP - maximum number of steps before forming new P matrix +! MXNCF - maximum number of consecutive convergence failures +! before giving up +! MXHNLO - maximum number of T+H=T messages +! MXSTP0 - maximum number of integration steps +! L***** - lengths and pointers for some internal arrays +! INTEGER, PARAMETER, PRIVATE :: KFC = -3, KFH = -7, LENIV1 = 33, & + INTEGER, PARAMETER, PRIVATE :: IPCUTH_MAX = 100, KFC = -3, & + KFH = -15, LENIV1 = 33, & + LENIV2 = 8, LENRV1 = 48, LENRV2 = 1, LIWUSER = 30, LRWUSER = 22, & + MAXCOR = 3, MAX_ARRAY_SIZE = 900000000, MSBP = 20, MXHNL0 = 10, & + MXNCF = 10, MXSTP0 = 5000 +!_______________________________________________________________________ +! *****LAPACK build change point. Use .TRUE. for LAPACK. +! LOGICAL, PARAMETER, PRIVATE :: USE_LAPACK = .TRUE. +!_______________________________________________________________________ + REAL (WP), PARAMETER, PRIVATE :: ADDON = 1.0E-6_WP + REAL (WP), PARAMETER, PRIVATE :: BIAS1 = 6.0_WP + REAL (WP), PARAMETER, PRIVATE :: BIAS2 = 6.0_WP + REAL (WP), PARAMETER, PRIVATE :: BIAS3 = 10.0_WP + REAL (WP), PARAMETER, PRIVATE :: CCMAX = 0.3_WP + REAL (WP), PARAMETER, PRIVATE :: CORTES = 0.1_WP + REAL (WP), PARAMETER, PRIVATE :: CRDOWN = 0.3_WP + REAL (WP), PARAMETER, PRIVATE :: ETACF = 0.25_WP + REAL (WP), PARAMETER, PRIVATE :: ETAMIN = 0.1_WP + REAL (WP), PARAMETER, PRIVATE :: ETAMX1 = 1.0E4_WP + REAL (WP), PARAMETER, PRIVATE :: ETAMX2 = 10.0_WP + REAL (WP), PARAMETER, PRIVATE :: ETAMX3 = 10.0_WP + REAL (WP), PARAMETER, PRIVATE :: ETAMXF = 0.2_WP + REAL (WP), PARAMETER, PRIVATE :: FIVE = 5.0_WP + REAL (WP), PARAMETER, PRIVATE :: FOUR = 4.0_WP + REAL (WP), PARAMETER, PRIVATE :: HALF = 0.5_WP + REAL (WP), PARAMETER, PRIVATE :: HUN = 100.0_WP + REAL (WP), PARAMETER, PRIVATE :: HUNDRETH = 0.01_WP + REAL (WP), PARAMETER, PRIVATE :: ONE = 1.0_WP + REAL (WP), PARAMETER, PRIVATE :: ONEPSM = 1.00001_WP + REAL (WP), PARAMETER, PRIVATE :: PT1 = 0.1_WP + REAL (WP), PARAMETER, PRIVATE :: PT2 = 0.2_WP + REAL (WP), PARAMETER, PRIVATE :: RDIV = 2.0_WP + REAL (WP), PARAMETER, PRIVATE :: SIX = 6.0_WP + REAL (WP), PARAMETER, PRIVATE :: TEN = 10.0_WP + REAL (WP), PARAMETER, PRIVATE :: TENTH = 0.1_WP + REAL (WP), PARAMETER, PRIVATE :: THOU = 1000.0_WP + REAL (WP), PARAMETER, PRIVATE :: THRESH = 1.5_WP + REAL (WP), PARAMETER, PRIVATE :: TWO = 2.0_WP + REAL (WP), PARAMETER, PRIVATE :: ZERO = 0.0_WP + +! Beginning of DVODE_F90 interface. +! .. +! .. Generic Interface Blocks .. + INTERFACE DVODE_F90 + +! VODE_F90 is the interface subroutine that is actually invoked +! when the user calls DVODE_F90. It in turn calls subroutine +! DVODE which is the driver that directs all the work. + MODULE PROCEDURE VODE_F90 + +! GET_STATS can be called to gather integration statistics. + MODULE PROCEDURE GET_STATS + +! DVINDY can be called to interpolate the solution and derivative. + MODULE PROCEDURE DVINDY + +! RELEASE_ARRAYS can be called to release/deallocate the work arrays. + MODULE PROCEDURE RELEASE_ARRAYS + +! SET_IAJA can be called to set sparse matrix descriptor arrays. + MODULE PROCEDURE SET_IAJA + +! USERSETS_IAJA can be called to set sparse matrix descriptor arrays. + MODULE PROCEDURE USERSETS_IAJA + +! CHECK_STAT can be called to stop if a storage allocation or +! deallocation error occurs. + MODULE PROCEDURE CHECK_STAT + +! JACSP can be called to calculate a Jacobian using Doug Salane's +! algoritm + MODULE PROCEDURE JACSP + +! DVDSM can be called to calculate sparse pointer arrays needed +! by JACSP + MODULE PROCEDURE DVDSM + + END INTERFACE +! .. +! .. Derived Type Declarations .. + TYPE, PUBLIC :: VODE_OPTS + REAL (WP), DIMENSION (:), POINTER :: ATOL, RTOL + INTEGER :: MF, METH, MITER, MOSS, ITOL, IOPT, NG + LOGICAL :: DENSE, BANDED, SPARSE + END TYPE VODE_OPTS +! .. +! .. Local Scalars .. +!_______________________________________________________________________ +! *****MA48 build change point. Insert these statements. +! For communication with subroutine ma48_control_array: +! REAL (WP), PUBLIC :: COPY_OF_U_PIVOT +!_______________________________________________________________________ + REAL (WP), PRIVATE :: ACNRM, ALPHA, BIG, BIG1, CCMXJ, CGCE, CONP, CRATE, & + DRC, DRES, DXMAX, EPS, ERRMAX, ETA, ETAMAX, FRACINT, FRACSUB, H, HMIN, & + HMXI, HNEW, HSCAL, HU, MEPS, MRESID, MRMIN, PRL1, RC, RESID, RL1, & + RMIN, SETH, T0ST, THEMAX, TLAST, TN, TOL, TOL1, TOUTC, UMAX, UROUND, & + U_PIVOT, X2, WM1, WM2 + INTEGER, PRIVATE :: ADDTOJA, ADDTONNZ, CONSECUTIVE_CFAILS, & + CONSECUTIVE_EFAILS, ELBOW_ROOM, IADIM, IANPIV, IAVPIV, & + ICF, ICNCP, IFAIL, IMAX, IMIN, INEWJ, INIT, IPUP, IRANK, IRFND, IRNCP, & + ISTART, ISTATC, ITASKC, JADIM, JCUR, JMIN, JSTART, JSV, KFLAG, KOUNTL, & + KUTH, L, LARGE, LAST, LENIGP, LICN_ALL, LIRN_ALL, LIW, LIWM, LMAX, & + LOCJS, LP, LRW, LWM, LWMDIM, LWMTEMP, LYH, LYHTEMP, MANPIV, MAPIV, MAXG,& + MAXIT, MAXORD, MB28, MB48, METH, MICN, MICNCP, MINICN, MINIRN, MIRANK, & + MIRN, MIRNCP, MITER, MLP, MOSS, MP, MSBG, MSBJ, MXHNIL, MXSTEP, N, NZB, & + NCFN, NDROP, NDROP1, NDX, NETF, NEWH, NEWQ, NFE, NGC, NGE, NGP, NHNIL, & + NJE, NLP, NLU, NNI, NNZ, NOITER, NQ, NQNYH, NQU, NQWAIT, NSLG, NSLJ, & + NSLP, NSRCH, NSRCH1, NST, NSUBS, NSUPS, NUM, NUMNZ, NYH, NZ_ALL, & + NZ_SWAG, PREVIOUS_MAXORD, WPD, WPS, MA28AD_CALLS, MA28BD_CALLS, & + MA28CD_CALLS, MC19AD_CALLS, MAX_MINIRN, MAX_MINICN, MAX_NNZ, BNGRP +! MA48AD_CALLS, MA48BD_CALLS, MA48CD_CALLS +! *****MA48 build change point. Insert the above line. + LOGICAL, PRIVATE :: ABORT, ABORT1, ABORT2, ABORT3, ABORTA, ABORTB, & + ALLOW_DEFAULT_TOLS, BUILD_IAJA, BOUNDS, CHANGED_ACOR, GROW, IAJA_CALLED,& + J_HAS_BEEN_COMPUTED, J_IS_CONSTANT, LBIG, LBIG1, LBLOCK, MA48_WAS_USED, & + OK_TO_CALL_MA28, SUBS, SUPS, OPTS_CALLED, REDO_PIVOT_SEQUENCE, & + SCALE_MATRIX, SPARSE, USE_FAST_FACTOR, YMAXWARN +! .. +! .. Local Arrays .. + REAL (WP), ALLOCATABLE, PRIVATE :: ACOR(:), CSCALEX(:), EWT(:), & + FPTEMP(:), FTEMP(:), FTEMP1(:), G0(:), G1(:), GX(:), JMAT(:), & + LB(:), PMAT(:), RSCALEX(:), RWORK(:), SAVF(:), UB(:), WM(:), & + WMTEMP(:), WSCALEX(:,:), YHNQP2(:), YHTEMP(:), YMAX(:), YNNEG(:), & + YTEMP(:), DTEMP(:) + REAL (WP), PRIVATE :: EL(13), RUSER(22), TAU(13), TQ(5) + INTEGER, ALLOCATABLE, PRIVATE :: BIGP(:), BJGP(:), IA(:), IAB(:), IAN(:), & + ICN(:), IDX(:), IGP(:), IKEEP28(:,:), IW28(:,:), IWORK(:), JA(:), & + JAB(:), JAN(:), JATEMP(:), JGP(:), JROOT(:), JVECT(:), SUBDS(:), SUPDS(:) + INTEGER, PRIVATE :: IDISP(2), IUSER(30), LNPIV(10), LPIV(10) + INTEGER, PRIVATE :: MORD(2) = (/ 12, 5 /) +! .. +! .. Public Subroutines and Functions .. + PUBLIC :: & + DAXPY_F90, DCOPY_F90, DDOT_F90, DGBFA_F90, DGBSL_F90, DGEFA_F90, & + DGESL_F90, DSCAL_F90, IDAMAX_F90 +! .. +! .. Private Subroutines and Functions .. + PRIVATE :: & + CHECK_DIAG , DACOPY , DEWSET , DGROUP , & + DGROUPDS , DVCHECK , DVHIN , DVINDY_BNDS , & + DVINDY_CORE , DVJAC , DVJACS28 , DVJUST , & + DVNLSD , DVNLSS28 , DVNORM , DVNRDN , & + DVNRDP , DVNRDS , DVODE , DVPREPS , & + DVROOTS , DVSET , DVSOL , DVSOLS28 , & + DVSRCO , DVSTEP , GDUMMY , IUMACH , & + IXSAV , JACSPDB , JDUMMY , MA28AD , & + MA28BD , MA28CD , MA28DD , MA28ID , & + MA30AD , MA30BD , MA30CD , MA30DD , & + MC13E , MC19AD , MC20AD , MC20BD , & + MC21A , MC21B , MC22AD , MC23AD , & + MC24AD , SET_ICN , XERRDV , XSETF , & + XSETUN , DEGR , IDO , NUMSRT , & + SEQ , SETR , SLO , SRTDAT , & + FDJS +! DVJACS48 , DVNLSS48 , DVPREPS48 , DVSOLS48 +!_______________________________________________________________________ +! *****MA48 build change point. Insert the above line. +!_______________________________________________________________________ +! .. +! .. Intrinsic Functions .. + INTRINSIC KIND +! .. +! .. Data Statements .. + DATA OPTS_CALLED/ .FALSE./ + DATA MP/6/, NLP/6/, MLP/6/, NSRCH/32768/, ISTART/0/, MAXIT/16/, & + LBIG/ .FALSE./, LBLOCK/ .TRUE./, GROW/ .TRUE./, & + TOL/0.0_WP/, CGCE/0.5_WP/, BIG/0.0_WP/, ABORT1/ .TRUE./, & + ABORT2/ .TRUE./, ABORT3/ .FALSE./, ABORT/ .FALSE./, MIRN/0/, & + MICN/0/, MIRNCP/0/, MICNCP/0/, MIRANK/0/, NDROP1/0/, & + MRMIN/0.0D0/, MRESID/0/, OK_TO_CALL_MA28/.FALSE./ +! .. +! END OF DVODE_F90 PRIVATE SECTION. +!_______________________________________________________________________ + + CONTAINS + + SUBROUTINE VODE_F90(F,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTS,J_FCN,G_FCN) +! .. +! This is an interface for DVODE to allow JAC and GFUN to be +! OPTIONAL arguments. +! .. + IMPLICIT NONE +! .. +! .. Structure Arguments .. + TYPE (VODE_OPTS) :: OPTS +! .. +! .. Scalar Arguments .. + REAL (WP), INTENT (INOUT) :: T, TOUT + INTEGER, INTENT (INOUT) :: ISTATE + INTEGER, INTENT (IN) :: ITASK, NEQ +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: Y(*) +! .. +! .. Subroutine Arguments .. + OPTIONAL :: G_FCN, J_FCN + EXTERNAL J_FCN +! .. +! .. Subroutine Interfaces .. + INTERFACE + SUBROUTINE F(NEQ,T,Y,YDOT) + INTEGER, PARAMETER :: WP = KIND(1.0D0) + INTEGER NEQ + REAL(WP) T + REAL(WP), DIMENSION(NEQ) :: Y, YDOT + INTENT(IN) :: NEQ, T, Y + INTENT(OUT) :: YDOT + END SUBROUTINE F + END INTERFACE + + INTERFACE + SUBROUTINE G_FCN(NEQ,T,Y,NG,GROOT) + INTEGER, PARAMETER :: WP = KIND(1.0D0) + INTEGER NEQ, NG + REAL(WP) T + REAL(WP), DIMENSION(NEQ) :: Y + REAL(WP), DIMENSION(NG) :: GROOT(NG) + INTENT(IN) :: NEQ, T, Y, NG + INTENT(OUT) :: GROOT + END SUBROUTINE G_FCN + END INTERFACE + +! Note: +! The best we can do here is to declare J_FCN to be +! external. The interface for a sparse problem differs +! from that for a banded or dense problem. The following +! would suffuce for a banded or dense problem. +! INTERFACE +! SUBROUTINE J_FCN(NEQ,T,Y,ML,MU,PD,NROWPD) +! INTEGER, PARAMETER :: WP = KIND(1.0D0) +! INTEGER NEQ, ML, MU, NROWPD +! REAL(WP) T +! REAL(WP), DIMENSION(NEQ) :: Y +! REAL(WP), DIMENSION(NEQ) :: PD(NROWPD,NEQ) +! INTENT(IN) :: NEQ, T, Y, ML, MU, NROWPD +! INTENT(INOUT) :: PD +! END SUBROUTINE J_FCN +! END INTERFACE +! The following would suffice for a sparse problem. +! INTERFACE +! SUBROUTINE J_FCN(NEQ,T,Y,IA,JA,NZ,P) +! INTEGER, PARAMETER :: WP = KIND(1.0D0) +! INTEGER NEQ, NZ +! REAL(WP) T +! REAL(WP), DIMENSION Y(*), P(*) +! INTEGER, DIMENSION IA(*), JA(*) +! INTENT(IN) :: NEQ, T, Y +! INTENT(INOUT) IA, JA, NZ, P +! END SUBROUTINE J_FCN +! END INTERFACE +! .. +! .. Local Scalars .. + INTEGER :: HOWCALL, METH, MFA, MITER, MOSS, NG + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, PRESENT +! .. +! .. FIRST EXECUTABLE STATEMENT VODE_F90 +! .. +! Check that SET_OPTS has been called. + IF (.NOT.OPTS_CALLED) THEN + MSG = 'You have not called SET_OPTS before' + CALL XERRDV(MSG,10,1,0,0,0,0,ZERO,ZERO) + MSG = 'calling DVODE_F90 the first time.' + CALL XERRDV(MSG,10,2,0,0,0,0,ZERO,ZERO) + END IF + +! Check that JAC is present if it is needed. + IF (PRESENT(J_FCN)) THEN + ELSE +! Note: +! MOSS is irrelevant. OPTS%MF is two digits after the +! call to SET_OPTS. + MFA = ABS(OPTS%MF) + MOSS = MFA/100 + METH = (MFA-100*MOSS)/10 + MITER = MFA - 100*MOSS - 10*METH + IF (MITER==1 .OR. MITER==4 .OR. MITER==6) THEN + MSG = 'You have specified a value of the integration' + CALL XERRDV(MSG,20,1,0,0,0,0,ZERO,ZERO) + MSG = 'method flag MF which requires that you supply' + CALL XERRDV(MSG,20,1,0,0,0,0,ZERO,ZERO) + MSG = 'a Jacobian subroutine JAC; but FAC is not' + CALL XERRDV(MSG,20,1,0,0,0,0,ZERO,ZERO) + MSG = 'present in the argument list.' + CALL XERRDV(MSG,20,2,0,0,0,0,ZERO,ZERO) + END IF + END IF + +! Check that GFUN is present if it is needed. + + IF (PRESENT(G_FCN)) THEN + ELSE + NG = OPTS%NG + IF (NG>0) THEN + MSG = 'You have indicated that events are present but' + CALL XERRDV(MSG,30,1,0,0,0,0,ZERO,ZERO) + MSG = 'you have not supplied a GFUN subroutine.' + CALL XERRDV(MSG,30,2,0,0,0,0,ZERO,ZERO) + END IF + END IF + +! Determine how DVODE will be called. + +! HOWCALL = 1: JDUMMY, GDUMMY +! 2: JAC, GFUN +! 3: JAC, GDUMMY +! 4: JDUMMY, GFUN + HOWCALL = 1 + IF (PRESENT(J_FCN)) THEN + IF (PRESENT(G_FCN)) THEN + HOWCALL = 2 + ELSE + HOWCALL = 3 + END IF + ELSE + IF (PRESENT(G_FCN)) THEN + HOWCALL = 4 + ELSE + HOWCALL = 1 + END IF + END IF + +! Call DVODE to do the integration. + + IF (HOWCALL==1) THEN + CALL DVODE(F,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTS,JDUMMY,GDUMMY) + ELSE IF (HOWCALL==2) THEN + CALL DVODE(F,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTS,J_FCN,G_FCN) + ELSE IF (HOWCALL==3) THEN + CALL DVODE(F,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTS,J_FCN,GDUMMY) + ELSE IF (HOWCALL==4) THEN + CALL DVODE(F,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTS,JDUMMY,G_FCN) + END IF + RETURN + + END SUBROUTINE VODE_F90 +!_______________________________________________________________________ + + SUBROUTINE JDUMMY(NEQ,T,Y,ML,MU,PD,NROWPD) +! .. +! This is a dummy Jacobian subroutine for VODE_F90 (never called). +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP) :: T + INTEGER :: ML, MU, NEQ, NROWPD, I + LOGICAL DUMMY +! .. +! .. Array Arguments .. + REAL (WP) :: PD(NROWPD,*), Y(*) +! .. + INTENT(IN) T, Y, ML, MU, NROWPD + INTENT(INOUT) PD +! .. +! .. FIRST EXECUTABLE STATEMENT JDUMMY +! .. +! Get rid of some needless compiler warning messages. + DUMMY = .FALSE. + IF (DUMMY) THEN + I = NEQ + I = ML + I = MU + I = NROWPD + PD(1,1) = T + PD(1,1) = Y(1) + PD(1,1) = DBLE(REAL(I)) + END IF + RETURN + + END SUBROUTINE JDUMMY +!_______________________________________________________________________ + + SUBROUTINE GDUMMY(NEQ,T,Y,NG,GOUT) +! .. +! This is a dummy event subroutine for VODE_F90 (never called). +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP) :: T + INTEGER :: NEQ, NG, I + LOGICAL DUMMY +! .. +! .. Array Arguments .. + REAL (WP) :: GOUT(*), Y(*) +! .. + INTENT(IN) NEQ, T, Y, NG + INTENT(OUT) GOUT +! .. +! .. FIRST EXECUTABLE STATEMENT JDUMMY +! .. +! Get rid of some needless compiler warning messages. + DUMMY = .FALSE. + IF (DUMMY) THEN + I = NEQ + I = NG + GOUT(1) = T + GOUT(1) = Y(1) + GOUT(1) = DBLE(REAL(I)) + END IF + RETURN + + END SUBROUTINE GDUMMY +!_______________________________________________________________________ + + SUBROUTINE SET_OPTS_2(HMAX,HMIN,MXSTEP) +! .. +! Allow the maximum step size, the minimum step size, and the maximum +! number of steps to be changed without restarting the integration. +! .. +! Quick Summary of Options +! HMAX - Maximum step size in DVODE +! HMIN - Minimum step size in DVODE +! MXSTEP - Maximum number of integration steps in DVODE +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP), OPTIONAL, INTENT (IN) :: HMAX, HMIN + INTEGER, OPTIONAL, INTENT (IN) :: MXSTEP +! .. +! .. Local Scalars .. + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ALLOCATED, PRESENT +! .. +! .. FIRST EXECUTABLE STATEMENT SET_OPTS_2 +! .. +! Check that SET_OPTS has been called: + IF (.NOT.OPTS_CALLED) THEN + MSG = 'You have not called SET_OPTS before' + CALL XERRDV(MSG,40,1,0,0,0,0,ZERO,ZERO) + MSG = 'calling subroutine SET_OPTS_2.' + CALL XERRDV(MSG,40,2,0,0,0,0,ZERO,ZERO) + END IF + IF (PRESENT(HMAX)) THEN + RUSER(6) = HMAX + MSG = 'HMAX changed in SET_OPTS_2.' + CALL XERRDV(MSG,50,1,0,0,0,1,HMAX,ZERO) + END IF + IF (PRESENT(HMIN)) THEN + RUSER(7) = HMIN + MSG = 'HMIN changed in SET_OPTS_2.' + CALL XERRDV(MSG,60,1,0,0,0,1,HMIN,ZERO) + END IF + IF (PRESENT(MXSTEP)) THEN + IUSER(6) = MXSTEP + MSG = 'MXSTEP changed in SET_OPTS_2.' + CALL XERRDV(MSG,70,1,1,MXSTEP,0,0,ZERO,ZERO) + END IF + + END SUBROUTINE SET_OPTS_2 +!_______________________________________________________________________ + + FUNCTION SET_NORMAL_OPTS(DENSE_J, BANDED_J, SPARSE_J, & + USER_SUPPLIED_JACOBIAN, LOWER_BANDWIDTH, UPPER_BANDWIDTH, & + RELERR, ABSERR, ABSERR_VECTOR, NEVENTS) RESULT(OPTS) + +! FUNCTION SET_NORMAL_OPTS: +! Jacobian type: +! DENSE_J, BANDED_J, SPARSE_J +! Analytic Jacobian: +! USER_SUPPLIED_JACOBIAN +! If banded Jacobian: +! LOWER_BANDWIDTH,UPPER_BANDWIDTH +! Error tolerances: +! RELERR, ABSERR, ABSERR_VECTOR +! Rootfinding: +! NEVENTS +! RESULT(OPTS) + +! Note: +! Invoking SET_NORMAL_OPTS causes an integration restart. A common +! situation is one in which all you wish to change is one of the +! vode.f77 optional parameters HMAX, HMIN, or MXSTEP. Once the +! integration is started and this is all you wish to do, you can +! change any of these parameters without restarting the integration +! simply by calling subroutine SET_OPTS_2: +! CALL SET_OPTS_2(HMAX,HMIN,MXSTEP) +! Each of the three arguments is optional and only the ones actually +! supplied will be used. Changes will take effect in the same manner +! as in the VODE.f77 solver. +! +! NORMAL_OPTIONS sets user parameters for DVODE via keywords. +! Values that are defined herein will be used internally by +! DVODE. All option keywords are OPTIONAL and order is not +! important. These options should be adequate for most problems. +! If you wish to use more specialized options, you must use +! SET_INTERMEDIATE_OPTS or SET_OPTS rather than NORMAL_OPTS. +! If you wish to use SET_INTERMEDIATE_OPTS or SET_OPTS, you +! may use any of the SET_NORMAL_OPTS keywords or any of the +! keywords available for these two functions. Of course, you +! may opt to simply use SET_OPTS for all problems. + +! Note that DVODE_F90 requires that one of SET_NORMAL_OPTS or +! SET_INTERMEDIATE_OPTS or SET_OPTS is called before the first +! time DVODE_F90 is called. +! +! Important Note: +! If feasible, you should use the dense or banded option; but +! SET_NORMAL_OPTS allows you to use a sparse internal Jacobian +! (i.e., one that is determined using finite differences) and +! structure pointere array that are determined internally +! using finite differences. If any of the following are true +! (1) DVODE_F90 doesn't perform satisfactorily for +! your problem, +! (2) you are solving a very large problem, +! (3) you wish to supply the sparse pointer arrays +! directly, +! (4) you wish to supply an analytical sparse Jacobian, +! or +! (5) you wish to use one of the specialized sparse +! Jacobian options, +! you are encouraged to use SET_OPTS which contains several +! provisions for solving sparse problems more efficiently. + +! Option Types: +! DENSE_J - logical +! BANDED_J - logical +! SPARSE_J - logical +! USER_SUPPLIED_JACOBIAN - logical +! LOWER_BANDWIDTH - integer +! UPPER_BANDWIDTH - integer +! RELERR - real(wp) scalar +! ABSERR - real(wp) scalar +! ABSERR_VECTOR - real(wp) vector +! NEVENTS - integer +! Options: +! ABSERR = Absolute error tolerance +! ABSERR_VECTOR = Vector of absolute error tolerances +! RELERR = Scalar relative error tolerance +! NEVENTS = Number of event functions (requires +! user-supplied GFUN) +! DENSE_J = Use dense linear algebra if .TRUE. +! BANDED_J = Use banded linear algebra if .TRUE. +! LOWER_BANDWIDTH = Lower bandwidth of the Jacobian +! (required if BANDED_J = .TRUE.) +! UPPER_BANDWIDTH = Upper bandwidth of the Jacobian +! (required if BANDED_J = .TRUE.) +! SPARSE_J = Use sparse linear algebra if .TRUE. +! USER_SUPPLIED_JACOBIAN = Exact Jacobian option +! (requires user-supplied JAC; +! ignored for SPARSE_J=.TRUE.) +! +! Note: DENSE_J takes precedence over BANDED_J which in turn +! takes precedence over SPARSE_J if more than one is supplied. +! If neither of the three flags is present, the nonstiff Adams +! option will be used. Similiarly, ABSERR_VECTOR takes +! precedence over ABSERR. +! +! Note on Jacobian Storage Formats: +! +! If you supply an analytic Jacobian PD, load the +! Jacobian elements DF(I)/DY(J), the partial +! derivative of F(I) with respect to Y(J), using +! the following formats. Here, Y is the solution, +! F is the derivative, and PD is the Jacobian. +! +! For a full Jacobian, load PD(I,J) with DF(I)/DY(J). +! Your code might look like this: +! DO J = 1, NEQ +! DO I = 1, NEQ +! PD(I,J) = ... DF(I)/DY(J) +! END DO +! END DO +! +! For a banded Jacobian, load PD(I-J+MU+1,J) with +! DF(I)/DY(J) where ML is the lower bandwidth +! and MU is the upper bandwidth of the Jacobian. +! Your code might look like this: +! DO J = 1, NEQ +! I1 = MAX(1,J-ML) +! I2 = MIN(N,J+MU) +! DO I = I1, I2 +! K = I-J+MU+1 +! PD(K,J) = ... DF(I)/DY(J) +! END DO +! END DO +! .. + IMPLICIT NONE +! .. +! .. Function Return Value .. + TYPE (VODE_OPTS) :: OPTS +! .. +! .. Scalar Arguments .. + REAL (WP), OPTIONAL, INTENT (IN) :: ABSERR,RELERR + INTEGER, OPTIONAL, INTENT (IN) :: LOWER_BANDWIDTH, & + NEVENTS,UPPER_BANDWIDTH + LOGICAL, OPTIONAL, INTENT (IN) :: BANDED_J, DENSE_J, & + SPARSE_J, USER_SUPPLIED_JACOBIAN +! .. +! .. Array Arguments .. + REAL (WP), OPTIONAL, INTENT (IN) :: ABSERR_VECTOR(:) +! .. +! .. Local Scalars .. + INTEGER :: IER,IOPT,METH,MF,MFA,MFSIGN,MITER,ML,MOSS, & + MU,NAE,NG,NRE + LOGICAL :: BANDED,DENSE,SPARSE + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ALLOCATED, IABS, MAX, MINVAL, PRESENT, SIGN, SIZE +! .. +! .. FIRST EXECUTABLE STATEMENT SET_NORMAL_OPTS +! .. + RUSER(1:LRWUSER) = ZERO + IUSER(1:LIWUSER) = 0 + +! Allow default error tolerances? + ALLOW_DEFAULT_TOLS = .FALSE. + +! Maximum number of consecutive error test failures? + CONSECUTIVE_EFAILS = KFH + +! Maximum number of consecutive corrector iteration failures? + CONSECUTIVE_CFAILS = MXNCF + +! Use JACSP to approximate Jacobian? + USE_JACSP = .FALSE. + +! Set the flag to indicate that SET_NORMAL_OPTS has been called. + OPTS_CALLED = .TRUE. + +! Set the MA48 storage cleanup flag. + MA48_WAS_USED = .FALSE. + +! Set the fast factor option for MA48, + USE_FAST_FACTOR = .TRUE. + +! Set the constant Jacobian flags. + J_IS_CONSTANT = .FALSE. + J_HAS_BEEN_COMPUTED = .FALSE. + +! Determine the working precision and define the value for UMAX +! expected by MA28. Note that it is different for single and +! double precision. + WPD = KIND(1.0D0) + WPS = KIND(1.0E0) + IF (WPD/=WP .AND. WPS/=WP) THEN + MSG = 'Illegal working precision in SET_NORMAL_OPTS.' + CALL XERRDV(MSG,80,2,0,0,0,0,ZERO,ZERO) + END IF + IF (WPD==WP) THEN +! Working precision is double. + UMAX = 0.999999999_WP + ELSE +! Working precision is single. + UMAX = 0.9999_WP + END IF + + MA28AD_CALLS = 0 + MA28BD_CALLS = 0 + MA28CD_CALLS = 0 + MC19AD_CALLS = 0 +!_______________________________________________________________________ +! *****MA48 build change point. Insert these statements. +! MA48AD_CALLS = 0 +! MA48BD_CALLS = 0 +! MA48CD_CALLS = 0 +!_______________________________________________________________________ + IRNCP = 0 + ICNCP = 0 + MINIRN = 0 + MINICN = 0 + MAX_MINIRN = 0 + MAX_MINICN = 0 + MAX_NNZ = 0 + +! Set the flag to warn the user if |(y(t)| < abserr. + YMAXWARN = .FALSE. + +! Load defaults for the optional input arrays for DVODE. + IUSER(1:8) = 0 + RUSER(1:8) = ZERO + +! Set the method flag. + MF = 10 + IF (PRESENT(SPARSE_J)) THEN + IF (SPARSE_J) THEN + MF = 227 + IF (PRESENT(USER_SUPPLIED_JACOBIAN)) THEN + MSG = 'You have indicated you wish to supply an' + CALL XERRDV(MSG,90,1,0,0,0,0,ZERO,ZERO) + MSG = 'exact sparse Jacobian in function' + CALL XERRDV(MSG,90,1,0,0,0,0,ZERO,ZERO) + MSG = 'SET_NORMAL_OPTS. In order to do this,' + CALL XERRDV(MSG,90,1,0,0,0,0,ZERO,ZERO) + MSG = 'you must use SET_OPTS. Execution will' + CALL XERRDV(MSG,90,1,0,0,0,0,ZERO,ZERO) + MSG = 'continue.' + CALL XERRDV(MSG,90,1,0,0,0,0,ZERO,ZERO) + END IF + END IF + END IF + + IF (PRESENT(BANDED_J)) THEN + IF (BANDED_J) THEN + IF (PRESENT(USER_SUPPLIED_JACOBIAN)) THEN + IF (USER_SUPPLIED_JACOBIAN) THEN + MF = 24 + ELSE + MF = 25 + END IF + ELSE + MF = 25 + END IF + END IF + END IF + + IF (PRESENT(DENSE_J)) THEN + IF (DENSE_J) THEN + IF (PRESENT(USER_SUPPLIED_JACOBIAN)) THEN + IF (USER_SUPPLIED_JACOBIAN) THEN + MF = 21 + ELSE + MF = 22 + END IF + ELSE + MF = 22 + END IF + END IF + END IF + +! Check for errors in MF. + MFA = IABS(MF) + MOSS = MFA/100 + METH = (MFA-100*MOSS)/10 + MITER = MFA - 100*MOSS - 10*METH + IF (METH<1 .OR. METH>2) THEN + MSG = 'Illegal value of METH in SET_NORMAL_OPTS.' + CALL XERRDV(MSG,100,2,0,0,0,0,ZERO,ZERO) + END IF + IF (MITER<0 .OR. MITER>7) THEN + MSG = 'Illegal value of MITER in SET_NORMAL_OPTS.' + CALL XERRDV(MSG,110,2,0,0,0,0,ZERO,ZERO) + END IF + IF (MOSS<0 .OR. MOSS>2) THEN + MSG = 'Illegal value of MOSS in SET_NORMAL_OPTS.' + CALL XERRDV(MSG,120,2,0,0,0,0,ZERO,ZERO) + END IF + +! Reset MF, now that MOSS is known. + MFSIGN = SIGN(1,MF) + MF = MF - 100*MOSS*MFSIGN + + IF (MITER==0) THEN + DENSE = .FALSE. + BANDED = .FALSE. + SPARSE = .FALSE. + ELSE IF (MITER==1 .OR. MITER==2) THEN + DENSE = .TRUE. + BANDED = .FALSE. + SPARSE = .FALSE. + ELSE IF (MITER==3) THEN + DENSE = .FALSE. + BANDED = .FALSE. + SPARSE = .FALSE. + ELSE IF (MITER==4 .OR. MITER==5) THEN + DENSE = .FALSE. + BANDED = .TRUE. + SPARSE = .FALSE. + ELSE IF (MITER==6 .OR. MITER==7) THEN + DENSE = .FALSE. + BANDED = .FALSE. + SPARSE = .TRUE. + END IF + +! Define the banded Jacobian band widths. + IF (BANDED) THEN + IF (PRESENT(LOWER_BANDWIDTH)) THEN + ML = LOWER_BANDWIDTH + IUSER(1) = ML + ELSE + MSG = 'In SET_NORMAL_OPTS you have indicated a' + CALL XERRDV(MSG,130,1,0,0,0,0,ZERO,ZERO) + MSG = 'banded Jacobian but you have not supplied' + CALL XERRDV(MSG,130,1,0,0,0,0,ZERO,ZERO) + MSG = 'the lower bandwidth.' + CALL XERRDV(MSG,130,2,0,0,0,0,ZERO,ZERO) + END IF + IF (PRESENT(UPPER_BANDWIDTH)) THEN + MU = UPPER_BANDWIDTH + IUSER(2) = MU + ELSE + MSG = 'In SET_NORMAL_OPTS you have indicated a' + CALL XERRDV(MSG,140,1,0,0,0,0,ZERO,ZERO) + MSG = 'banded Jacobian but you have not supplied' + CALL XERRDV(MSG,140,1,0,0,0,0,ZERO,ZERO) + MSG = 'the upper bandwidth.' + CALL XERRDV(MSG,140,2,0,0,0,0,ZERO,ZERO) + END IF + END IF + +! Define the sparse Jacobian options. + IF (SPARSE) THEN +! Set the MA28 message flag. + LP = 0 +! Set the MA28 pivot sequence frequency flag. + REDO_PIVOT_SEQUENCE = .FALSE. +! Set the MA28 singularity threshold. + EPS = 1.0E-4_WP +! Use scaling of the iteration matrix. + SCALE_MATRIX = .TRUE. +! Define the elbow room factor for the MA28 sparse work arrays. + ELBOW_ROOM = 2 +! NZSWAG is a swag for the number of nonzeros in the Jacobian. + NZ_SWAG = 0 +! Use partial pivoting. + U_PIVOT = ONE +! Indicate that SET_IAJA has not yet been called successfully. + IAJA_CALLED = .FALSE. +! Check for illegal method flags. + IF (MOSS==2 .AND. MITER/=7) THEN + MSG = 'In SET_NORMAL_OPTS MOSS=2 but MITER is not 7.' + CALL XERRDV(MSG,150,2,0,0,0,0,ZERO,ZERO) + END IF + IF (MOSS==1 .AND. MITER/=6) THEN + MSG = 'In SET_NORMAL_OPTS MOSS=1 but MITER is not 6.' + CALL XERRDV(MSG,160,2,0,0,0,0,ZERO,ZERO) + END IF +! IF (MOSS==0 .AND. MITER/=7) THEN +! MSG = 'In SET_NORMAL_OPTS MOSS=0 but MITER is not 7.' +! CALL XERRDV(MSG,170,2,0,0,0,0,ZERO,ZERO) +! END IF + END IF + +! Define the number of event functions. + IF (PRESENT(NEVENTS)) THEN + IF (NEVENTS>0) THEN + NG = NEVENTS + ELSE + NG = 0 + END IF + ELSE + NG = 0 + END IF + +! No solution bounds will be imposed. + BOUNDS = .FALSE. + +! Load the user options into the solution structure. + OPTS%MF = MF + OPTS%METH = METH + OPTS%MITER = MITER + OPTS%MOSS = MOSS + OPTS%DENSE = DENSE + OPTS%BANDED = BANDED + OPTS%SPARSE = SPARSE + OPTS%NG = NG + + IOPT = 1 + OPTS%IOPT = IOPT + +! Process the error tolerances. + +! Relative error tolerance. + NRE = 1 + ALLOCATE (OPTS%RTOL(NRE),STAT=IER) + CALL CHECK_STAT(IER,10) + IF (PRESENT(RELERR)) THEN + IF (RELERR2) THEN + MSG = 'Illegal value of METH in SET_INTERMEDIATE_OPTS.' + CALL XERRDV(MSG,330,2,0,0,0,0,ZERO,ZERO) + END IF + IF (MITER<0 .OR. MITER>7) THEN + MSG = 'Illegal value of MITER in SET_INTERMEDIATE_OPTS.' + CALL XERRDV(MSG,340,2,0,0,0,0,ZERO,ZERO) + END IF + IF (MOSS<0 .OR. MOSS>2) THEN + MSG = 'Illegal value of MOSS in SET_INTERMEDIATE_OPTS.' + CALL XERRDV(MSG,350,2,0,0,0,0,ZERO,ZERO) + END IF + +! Reset MF, now that MOSS is known. + MFSIGN = SIGN(1,MF) + MF = MF - 100*MOSS*MFSIGN + + IF (MITER==0) THEN + DENSE = .FALSE. + BANDED = .FALSE. + SPARSE = .FALSE. + ELSE IF (MITER==1 .OR. MITER==2) THEN + DENSE = .TRUE. + BANDED = .FALSE. + SPARSE = .FALSE. + ELSE IF (MITER==3) THEN + DENSE = .FALSE. + BANDED = .FALSE. + SPARSE = .FALSE. + ELSE IF (MITER==4 .OR. MITER==5) THEN + DENSE = .FALSE. + BANDED = .TRUE. + SPARSE = .FALSE. + ELSE IF (MITER==6 .OR. MITER==7) THEN + DENSE = .FALSE. + BANDED = .FALSE. + SPARSE = .TRUE. + END IF + +! Define the banded Jacobian band widths. + IF (BANDED) THEN + IF (PRESENT(LOWER_BANDWIDTH)) THEN + ML = LOWER_BANDWIDTH + IUSER(1) = ML + ELSE + MSG = 'In SET_INTERMEDIATE_OPTS you have indicated a' + CALL XERRDV(MSG,360,1,0,0,0,0,ZERO,ZERO) + MSG = 'banded Jacobian but you have not' + CALL XERRDV(MSG,360,1,0,0,0,0,ZERO,ZERO) + MSG = 'supplied the lower bandwidth.' + CALL XERRDV(MSG,360,2,0,0,0,0,ZERO,ZERO) + END IF + IF (PRESENT(UPPER_BANDWIDTH)) THEN + MU = UPPER_BANDWIDTH + IUSER(2) = MU + ELSE + MSG = 'In SET_INTERMEDIATE_OPTS you have indicated a' + CALL XERRDV(MSG,370,1,0,0,0,0,ZERO,ZERO) + MSG = 'banded Jacobian but you have not' + CALL XERRDV(MSG,370,1,0,0,0,0,ZERO,ZERO) + MSG = 'supplied the upper bandwidth.' + CALL XERRDV(MSG,370,2,0,0,0,0,ZERO,ZERO) + END IF +! Define the nonzero diagonals. + BNGRP = 0 + SUBS = .FALSE. + SUPS = .FALSE. + NSUBS = 0 + NSUPS = 0 + END IF + +! Define the sparse Jacobian options. + SCALE_MATRIX = .FALSE. + ELBOW_ROOM = 2 + IF (SPARSE) THEN +! NZSWAG for the number of nonzeros in the Jacobian. + IF (PRESENT(NZSWAG)) THEN + NZ_SWAG = MAX(NZSWAG,0) + ELSE + NZ_SWAG = 0 + END IF +! Indicate that SET_IAJA has not yet been called successfully. + IAJA_CALLED = .FALSE. +! Check for illegal method flags. + IF (MOSS==2 .AND. MITER/=7) THEN + MSG = 'In SET_INTERMEDIATE_OPTS MOSS=2 but MITER is not 7.' + CALL XERRDV(MSG,380,2,0,0,0,0,ZERO,ZERO) + END IF + IF (MOSS==1 .AND. MITER/=6) THEN + MSG = 'In SET_INTERMEDIATE_OPTS MOSS=1 but MITER is not 6.' + CALL XERRDV(MSG,390,2,0,0,0,0,ZERO,ZERO) + END IF +! IF (MOSS==0 .AND. MITER/=7) THEN +! MSG = 'In SET_INTERMEDIATE_OPTS MOSS=0 but MITER is not 7.' +! CALL XERRDV(MSG,400,2,0,0,0,0,ZERO,ZERO) +! END IF +! Allow MC19 scaling for the Jacobian. + SCALE_MATRIX = .FALSE. + END IF + +! Define the number of event functions. + IF (PRESENT(NEVENTS)) THEN + IF (NEVENTS>0) THEN + NG = NEVENTS + ELSE + NG = 0 + END IF + ELSE + NG = 0 + END IF + +! Process the constrained solution components. + IF (PRESENT(CONSTRAINED)) THEN + NDX = SIZE(CONSTRAINED) + IF (NDX<1) THEN + MSG = 'In SET_INTERMEDIATE_OPTS the size of CONSTRAINED < 1.' + CALL XERRDV(MSG,410,2,0,0,0,0,ZERO,ZERO) + END IF + IF (.NOT.(PRESENT(CLOWER)) .OR. .NOT.(PRESENT(CUPPER))) THEN + MSG = 'In SET_INTERMEDIATE_OPTS the arrays CLOWER and CUPPER' + CALL XERRDV(MSG,420,1,0,0,0,0,ZERO,ZERO) + MSG = 'are not present.' + CALL XERRDV(MSG,420,2,0,0,0,0,ZERO,ZERO) + END IF + IF (SIZE(CLOWER)/=NDX .OR. SIZE(CUPPER)/=NDX) THEN + MSG = 'In SET_INTERMEDIATE_OPTS the size of the solution bound' + CALL XERRDV(MSG,430,1,0,0,0,0,ZERO,ZERO) + MSG = 'arrays must be the same as the CONSTRAINED array.' + CALL XERRDV(MSG,430,2,0,0,0,0,ZERO,ZERO) + END IF +! Note: The contents of CONSTRAINED will be checked +! in subroutine DVODE after NEQ is known. + IF (ALLOCATED(IDX)) THEN + DEALLOCATE (IDX,LB,UB,STAT=IER) + CALL CHECK_STAT(IER,30) + END IF + ALLOCATE (IDX(NDX),LB(NDX),UB(NDX),STAT=IER) + CALL CHECK_STAT(IER,40) + IDX(1:NDX) = CONSTRAINED(1:NDX) + LB(1:NDX) = CLOWER(1:NDX) + UB(1:NDX) = CUPPER(1:NDX) + BOUNDS = .TRUE. + ELSE + IF (ALLOCATED(IDX)) THEN + DEALLOCATE (IDX,LB,UB,STAT=IER) + CALL CHECK_STAT(IER,50) + END IF + NDX = 0 + BOUNDS = .FALSE. + END IF + +! Is the Jacobian constant? + J_IS_CONSTANT = .FALSE. + J_HAS_BEEN_COMPUTED = .FALSE. + +! Load the user options into the solution structure. + OPTS%MF = MF + OPTS%METH = METH + OPTS%MITER = MITER + OPTS%MOSS = MOSS + OPTS%DENSE = DENSE + OPTS%BANDED = BANDED + OPTS%SPARSE = SPARSE + OPTS%NG = NG + +! Process the miscellaneous options. + +! Don't step past TCRIT variable. + IF (PRESENT(TCRIT)) THEN + RUSER(1) = TCRIT + ELSE + RUSER(1) = ZERO + END IF + +! DVODE optional parameters. + IOPT = 1 + IF (PRESENT(MAXORD)) THEN + IUSER(5) = MAXORD + IOPT = 1 + END IF + IF (PRESENT(MXSTEP)) THEN + IUSER(6) = MXSTEP + IOPT = 1 + END IF + IF (PRESENT(MXHNIL)) THEN + IUSER(7) = MXHNIL + IOPT = 1 + END IF + IF (PRESENT(H0)) THEN + RUSER(5) = H0 + IOPT = 1 + END IF + IF (PRESENT(HMAX)) THEN + RUSER(6) = HMAX + IOPT = 1 + END IF + IF (PRESENT(HMIN)) THEN + RUSER(7) = HMIN + IOPT = 1 + END IF + U_PIVOT = ONE + OPTS%IOPT = IOPT + +! Define the error tolerances. + +! Relative error tolerances. + NRE = 1 + ALLOCATE (OPTS%RTOL(NRE),STAT=IER) + CALL CHECK_STAT(IER,60) + IF (PRESENT(RELERR)) THEN + IF (RELERR ZERO) THEN + EPS = MA28_EPS + ELSE + EPS = 1.0E-4_WP + END IF + ELSE + EPS = 1.0E-4_WP + END IF + +! Set the MA28 pivot sequence frequency flag. + IF (PRESENT(MA28_RPS)) THEN + IF (MA28_RPS) THEN + REDO_PIVOT_SEQUENCE = MA28_RPS + ELSE + REDO_PIVOT_SEQUENCE = .FALSE. + END IF + ELSE + REDO_PIVOT_SEQUENCE = .FALSE. + END IF + + MA28AD_CALLS = 0 + MA28BD_CALLS = 0 + MA28CD_CALLS = 0 + MC19AD_CALLS = 0 +!_______________________________________________________________________ +! *****MA48 build change point. Insert these statements. +! MA48AD_CALLS = 0 +! MA48BD_CALLS = 0 +! MA48CD_CALLS = 0 +!_______________________________________________________________________ + IRNCP = 0 + ICNCP = 0 + MINIRN = 0 + MINICN = 0 + MAX_MINIRN = 0 + MAX_MINICN = 0 + MAX_NNZ = 0 + +! Set the flag to warn the user if |(y(t)| < abserr. + IF (PRESENT(YMAGWARN)) THEN + IF (YMAGWARN) THEN + YMAXWARN = .TRUE. + ELSE + YMAXWARN = .FALSE. + END IF + ELSE + YMAXWARN = .FALSE. + END IF + +! Load defaults for the optional input arrays for DVODE. + IUSER(1:8) = 0 + RUSER(1:8) = ZERO + +! Set the method flag. + IF (.NOT.(PRESENT(METHOD_FLAG))) THEN + MF = 10 + IF (PRESENT(SPARSE_J)) THEN + IF (SPARSE_J) THEN + IF (PRESENT(USER_SUPPLIED_JACOBIAN)) THEN + IF (USER_SUPPLIED_JACOBIAN) THEN + MF = 126 + ELSE + MF = 227 + END IF + ELSE + MF = 227 + END IF + IF (PRESENT(USER_SUPPLIED_SPARSITY)) THEN + IF (USER_SUPPLIED_SPARSITY) THEN + MF = MF - 100*(MF/100) + END IF + END IF + IF (PRESENT(SAVE_JACOBIAN)) THEN + IF (.NOT.SAVE_JACOBIAN) THEN + MF = -MF + END IF + END IF + END IF + END IF + + IF (PRESENT(BANDED_J)) THEN + IF (BANDED_J) THEN + IF (PRESENT(USER_SUPPLIED_JACOBIAN)) THEN + IF (USER_SUPPLIED_JACOBIAN) THEN + MF = 24 + ELSE + MF = 25 + END IF + ELSE + MF = 25 + END IF + IF (PRESENT(SAVE_JACOBIAN)) THEN + IF (.NOT.SAVE_JACOBIAN) THEN + MF = -MF + END IF + END IF + END IF + END IF + + IF (PRESENT(DENSE_J)) THEN + IF (DENSE_J) THEN + IF (PRESENT(USER_SUPPLIED_JACOBIAN)) THEN + IF (USER_SUPPLIED_JACOBIAN) THEN + MF = 21 + ELSE + MF = 22 + END IF + ELSE + MF = 22 + END IF + IF (PRESENT(SAVE_JACOBIAN)) THEN + IF (.NOT.SAVE_JACOBIAN) THEN + MF = -MF + END IF + END IF + END IF + END IF + ELSE + MF = METHOD_FLAG + END IF + +! Check for errors in MF. + MFA = IABS(MF) + MOSS = MFA/100 + METH = (MFA-100*MOSS)/10 + MITER = MFA - 100*MOSS - 10*METH + IF (METH<1 .OR. METH>2) THEN + MSG = 'Illegal value of METH in SET_OPTS.' + CALL XERRDV(MSG,570,2,0,0,0,0,ZERO,ZERO) + END IF + IF (MITER<0 .OR. MITER>7) THEN + MSG = 'Illegal value of MITER in SET_OPTS.' + CALL XERRDV(MSG,580,2,0,0,0,0,ZERO,ZERO) + END IF + IF (MOSS<0 .OR. MOSS>2) THEN + MSG = 'Illegal value of MOSS in SET_OPTS.' + CALL XERRDV(MSG,580,2,0,0,0,0,ZERO,ZERO) + END IF + +! Reset MF, now that MOSS is known. + MFSIGN = SIGN(1,MF) + MF = MF - 100*MOSS*MFSIGN + + IF (MITER==0) THEN + DENSE = .FALSE. + BANDED = .FALSE. + SPARSE = .FALSE. + ELSE IF (MITER==1 .OR. MITER==2) THEN + DENSE = .TRUE. + BANDED = .FALSE. + SPARSE = .FALSE. + ELSE IF (MITER==3) THEN + DENSE = .FALSE. + BANDED = .FALSE. + SPARSE = .FALSE. + ELSE IF (MITER==4 .OR. MITER==5) THEN + DENSE = .FALSE. + BANDED = .TRUE. + SPARSE = .FALSE. + ELSE IF (MITER==6 .OR. MITER==7) THEN + DENSE = .FALSE. + BANDED = .FALSE. + SPARSE = .TRUE. + END IF + +! Define the banded Jacobian band widths. + IF (BANDED) THEN + IF (PRESENT(LOWER_BANDWIDTH)) THEN + ML = LOWER_BANDWIDTH + IUSER(1) = ML + ELSE + MSG = 'In SET_OPTS you have indicated a' + CALL XERRDV(MSG,590,1,0,0,0,0,ZERO,ZERO) + MSG = 'banded Jacobian but you have not' + CALL XERRDV(MSG,590,1,0,0,0,0,ZERO,ZERO) + MSG = 'supplied the lower bandwidth.' + CALL XERRDV(MSG,590,2,0,0,0,0,ZERO,ZERO) + END IF + IF (PRESENT(UPPER_BANDWIDTH)) THEN + MU = UPPER_BANDWIDTH + IUSER(2) = MU + ELSE + MSG = 'In SET_OPTS you have indicated a' + CALL XERRDV(MSG,600,1,0,0,0,0,ZERO,ZERO) + MSG = 'banded Jacobian but you have not' + CALL XERRDV(MSG,600,1,0,0,0,0,ZERO,ZERO) + MSG = 'supplied the upper bandwidth.' + CALL XERRDV(MSG,600,2,0,0,0,0,ZERO,ZERO) + END IF +! Define the nonzero diagonals. + BNGRP = 0 + SUBS = .FALSE. + SUPS = .FALSE. + NSUBS = 0 + NSUPS = 0 + IF (PRESENT(SUB_DIAGONALS)) THEN + SUBS = .TRUE. + NSUBS = SIZE(SUB_DIAGONALS) + IF (NSUBS > 0) THEN + IF (ALLOCATED(SUBDS)) THEN + DEALLOCATE(SUBDS,STAT=IER) + CALL CHECK_STAT(IER,80) + END IF + ALLOCATE(SUBDS(NSUBS),STAT=IER) + CALL CHECK_STAT(IER,90) + SUBDS(1:NSUBS) = SUB_DIAGONALS(1:NSUBS) + ELSE + IF (ML > 0) THEN + MSG = 'You must indicated that the lower bandwidth' + CALL XERRDV(MSG,610,1,0,0,0,0,ZERO,ZERO) + MSG = 'is positive but you have not specified the' + CALL XERRDV(MSG,610,1,0,0,0,0,ZERO,ZERO) + MSG = 'indices for the lower sub diagonals.' + CALL XERRDV(MSG,610,2,0,0,0,0,ZERO,ZERO) + END IF + END IF + END IF + IF (PRESENT(SUP_DIAGONALS)) THEN + SUPS = .TRUE. + NSUPS = SIZE(SUP_DIAGONALS) + IF (NSUPS > 0) THEN + IF (ALLOCATED(SUPDS)) THEN + DEALLOCATE(SUPDS,STAT=IER) + CALL CHECK_STAT(IER,100) + END IF + ALLOCATE(SUPDS(NSUPS),STAT=IER) + CALL CHECK_STAT(IER,110) + SUPDS(1:NSUPS) = SUP_DIAGONALS(1:NSUPS) + ELSE + IF (ML > 0) THEN + MSG = 'You must indicated that the upper bandwidth' + CALL XERRDV(MSG,620,1,0,0,0,0,ZERO,ZERO) + MSG = 'is positive but you have not specified the' + CALL XERRDV(MSG,620,1,0,0,0,0,ZERO,ZERO) + MSG = 'indices for the upper sub diagonals.' + CALL XERRDV(MSG,620,2,0,0,0,0,ZERO,ZERO) + END IF + END IF + END IF + END IF + +! Define the sparse Jacobian options. + SCALE_MATRIX = .FALSE. + ELBOW_ROOM = 2 + IF (SPARSE) THEN +! NZSWAG for the number of nonzeros in the Jacobian. + IF (PRESENT(NZSWAG)) THEN + NZ_SWAG = MAX(NZSWAG,0) + ELSE + NZ_SWAG = 0 + END IF +! Indicate that SET_IAJA has not yet been called successfully. + IAJA_CALLED = .FALSE. +! Check for illegal method flags. + IF (MOSS==2 .AND. MITER/=7) THEN + MSG = 'In SET_OPTS MOSS=2 but MITER is not 7.' + CALL XERRDV(MSG,630,2,0,0,0,0,ZERO,ZERO) + END IF + IF (MOSS==1 .AND. MITER/=6) THEN + MSG = 'In SET_OPTS MOSS=1 but MITER is not 6.' + CALL XERRDV(MSG,640,2,0,0,0,0,ZERO,ZERO) + END IF +! IF (MOSS==0 .AND. MITER/=7) THEN +! MSG = 'In SET_OPTS MOSS=0 but MITER is not 7.' +! CALL XERRDV(MSG,650,2,0,0,0,0,ZERO,ZERO) +! END IF +! Allow the work array elbow room to be increased. + IF (PRESENT(MA28_ELBOW_ROOM)) THEN + ELBOW_ROOM = MAX(MA28_ELBOW_ROOM, ELBOW_ROOM) + END IF +! Allow MC19 scaling for the Jacobian. + SCALE_MATRIX = .FALSE. + IF (PRESENT(MC19_SCALING)) THEN + IF (MC19_SCALING) THEN +!_______________________________________________________________________ +! *****MA48 build change point. Insert these statements. +! IF (USE_MA48_FOR_SPARSE) THEN +! MSG = 'MC29AD scaling is not available at this time.' +! CALL XERRDV(MSG,660,1,0,0,0,0,ZERO,ZERO) +! MSG = 'Execution will continue.' +! CALL XERRDV(MSG,660,1,0,0,0,0,ZERO,ZERO) +! END IF +!_______________________________________________________________________ + SCALE_MATRIX = MC19_SCALING +! SCALE_MATRIX = .FALSE. + END IF + END IF + +!_______________________________________________________________________ +! *****MA48 build change point. Replace above with these statements. +! IF (PRESENT(MC19_SCALING)) THEN +! IF (MC19_SCALING) THEN +! IF (USE_MA48_FOR_SPARSE) THEN +! MSG = 'Please note that this version uses MC19AD rather' +! CALL XERRDV(MSG,670,1,0,0,0,0,ZERO,ZERO) +! MSG = 'than MC29AD to sacle the iteration matrix.' +! CALL XERRDV(MSG,670,1,0,0,0,0,ZERO,ZERO) +! MSG = 'Execution will continue.' +! CALL XERRDV(MSG,670,1,0,0,0,0,ZERO,ZERO) +! END IF +! SCALE_MATRIX = MC19_SCALING +! END IF +! END IF +!_______________________________________________________________________ + + END IF + +! Define the number of event functions. + IF (PRESENT(NEVENTS)) THEN + IF (NEVENTS>0) THEN + NG = NEVENTS + ELSE + NG = 0 + END IF + ELSE + NG = 0 + END IF + +! Process the constrained solution components. + IF (PRESENT(CONSTRAINED)) THEN + NDX = SIZE(CONSTRAINED) + IF (NDX<1) THEN + MSG = 'In SET_OPTS the size of CONSTRAINED < 1.' + CALL XERRDV(MSG,680,2,0,0,0,0,ZERO,ZERO) + END IF + IF (.NOT.(PRESENT(CLOWER)) .OR. .NOT.(PRESENT(CUPPER))) THEN + MSG = 'In SET_OPTS the arrays CLOWER and CUPPER are' + CALL XERRDV(MSG,690,1,0,0,0,0,ZERO,ZERO) + MSG = 'not present.' + CALL XERRDV(MSG,690,2,0,0,0,0,ZERO,ZERO) + END IF + IF (SIZE(CLOWER)/=NDX .OR. SIZE(CUPPER)/=NDX) THEN + MSG = 'In SET_OPTS the size of the solution bound arrays' + CALL XERRDV(MSG,700,1,0,0,0,0,ZERO,ZERO) + MSG = 'must be the same as the CONSTRAINED array.' + CALL XERRDV(MSG,700,2,0,0,0,0,ZERO,ZERO) + END IF +! Note: The contents of CONSTRAINED will be checked +! in subroutine DVODE after NEQ is known. + IF (ALLOCATED(IDX)) THEN + DEALLOCATE (IDX,LB,UB,STAT=IER) + CALL CHECK_STAT(IER,120) + END IF + ALLOCATE (IDX(NDX),LB(NDX),UB(NDX),STAT=IER) + CALL CHECK_STAT(IER,130) + IDX(1:NDX) = CONSTRAINED(1:NDX) + LB(1:NDX) = CLOWER(1:NDX) + UB(1:NDX) = CUPPER(1:NDX) + BOUNDS = .TRUE. + ELSE + IF (ALLOCATED(IDX)) THEN + DEALLOCATE (IDX,LB,UB,STAT=IER) + CALL CHECK_STAT(IER,140) + END IF + NDX = 0 + BOUNDS = .FALSE. + END IF + +! Is the Jacobian constant? + J_IS_CONSTANT = .FALSE. + J_HAS_BEEN_COMPUTED = .FALSE. + +!_______________________________________________________________________ + IF (PRESENT(CONSTANT_JACOBIAN)) THEN + IF (CONSTANT_JACOBIAN) THEN + J_IS_CONSTANT = .TRUE. +!_______________________________________________________________________ +! *****MA48 build change point. Insert these statements. + IF (USE_MA48_FOR_SPARSE .AND. SPARSE) THEN + MSG = 'The constant Jacobian option is not yet available' + CALL XERRDV(MSG,710,1,0,0,0,0,ZERO,ZERO) + MSG = 'with the sparse MA48 solution option. Execution' + CALL XERRDV(MSG,710,1,0,0,0,0,ZERO,ZERO) + MSG = 'will continue.' + CALL XERRDV(MSG,710,1,0,0,0,0,ZERO,ZERO) + J_IS_CONSTANT = .FALSE. + END IF +!_______________________________________________________________________ + END IF + END IF +! *****MA48 build change point. Replace above with these statements. +! IF (PRESENT(CONSTANT_JACOBIAN)) THEN +! IF (CONSTANT_JACOBIAN) THEN +! IF (USE_MA48_FOR_SPARSE) THEN +! MSG = 'The constant Jacobian option is not yet available' +! CALL XERRDV(MSG,720,1,0,0,0,0,ZERO,ZERO) +! MSG = 'with the sparse MA48 solution option. Execution' +! CALL XERRDV(MSG,720,1,0,0,0,0,ZERO,ZERO) +! MSG = 'will continue.' +! CALL XERRDV(MSG,720,1,0,0,0,0,ZERO,ZERO) +! END IF +! ELSE +! J_IS_CONSTANT = .TRUE. +! END IF +! END IF +!_______________________________________________________________________ + IF (J_IS_CONSTANT) THEN + IF (PRESENT(SAVE_JACOBIAN)) THEN + IF (.NOT.SAVE_JACOBIAN) THEN + MSG = 'You have specified that the Jacobian is constant.' + CALL XERRDV(MSG,730,1,0,0,0,0,ZERO,ZERO) + MSG = 'In this case you cannot also specify that' + CALL XERRDV(MSG,730,1,0,0,0,0,ZERO,ZERO) + MSG = 'SAVE_JACOBIAN=.FALSE.' + CALL XERRDV(MSG,730,2,0,0,0,0,ZERO,ZERO) + END IF + END IF + IF (PRESENT(USER_SUPPLIED_JACOBIAN)) THEN + IF (USER_SUPPLIED_JACOBIAN .AND. SPARSE) THEN + MSG = 'You have specified that the Jacobian is constant' + CALL XERRDV(MSG,740,1,0,0,0,0,ZERO,ZERO) + MSG = 'and that you wish to supply an analytic Jacobian' + CALL XERRDV(MSG,740,1,0,0,0,0,ZERO,ZERO) + MSG = 'for a sparse problem. In this case your request' + CALL XERRDV(MSG,740,1,0,0,0,0,ZERO,ZERO) + MSG = 'to use a constant Jacobian will be ignored.' + CALL XERRDV(MSG,740,1,0,0,0,0,ZERO,ZERO) + MSG = 'Execution will continue.' + CALL XERRDV(MSG,740,1,0,0,0,0,ZERO,ZERO) + END IF + END IF + END IF + +! Load the user options into the solution structure. + OPTS%MF = MF + OPTS%METH = METH + OPTS%MITER = MITER + OPTS%MOSS = MOSS + OPTS%DENSE = DENSE + OPTS%BANDED = BANDED + OPTS%SPARSE = SPARSE + OPTS%NG = NG + +! Process the miscellaneous options. + +! Don't step past TCRIT variable. + IF (PRESENT(TCRIT)) THEN + RUSER(1) = TCRIT + ELSE + RUSER(1) = ZERO + END IF + +! DVODE optional parameters. + IOPT = 1 + IF (PRESENT(MAXORD)) THEN + IUSER(5) = MAXORD + IOPT = 1 + END IF + IF (PRESENT(MXSTEP)) THEN + IUSER(6) = MXSTEP + IOPT = 1 + END IF + IF (PRESENT(MXHNIL)) THEN + IUSER(7) = MXHNIL + IOPT = 1 + END IF + IF (PRESENT(H0)) THEN + RUSER(5) = H0 + IOPT = 1 + END IF + IF (PRESENT(HMAX)) THEN + RUSER(6) = HMAX + IOPT = 1 + END IF + IF (PRESENT(HMIN)) THEN + RUSER(7) = HMIN + IOPT = 1 + END IF + IF (PRESENT(SETH)) THEN + RUSER(8) = SETH + IOPT = 1 + END IF + IF (PRESENT(UPIVOT)) THEN + U_PIVOT = UPIVOT + IF (U_PIVOTONE) U_PIVOT = ONE + ELSE + U_PIVOT = ONE + END IF +!_______________________________________________________________________ +! *****MA48 build change point. Insert this statement. +! COPY_OF_U_PIVOT = U_PIVOT +!_______________________________________________________________________ + OPTS%IOPT = IOPT + +! Define the error tolerances. + +! Relative error tolerances. + IF (PRESENT(RELERR_VECTOR)) THEN + IF (MINVAL(RELERR_VECTOR)ZERO) THEN + EMIN = MAX(FMIN,ZERO) + ELSE + EMIN = ZERO + END IF + +! Solution perturbation factors. + IF (NTURB>0) THEN + IF (NTURB<1 .OR. (NTURB>1 .AND. NTURB/=NEQ)) GOTO 30 + IF (NTURB==NEQ) THEN + DO I = 1, NEQ + DTURB(I) = MAX(DTURB(I),HUNDRETH) + END DO + ELSE + MTURB = 1 + DTRB = MAX(DTURB(1),HUNDRETH) + END IF + ELSE + MTURB = 1 + DTRB = HUNDRETH + END IF + + JADIM = MIN(NEQ*NEQ,MAX(1000,NZ_SWAG)) + ADDTOJA = MAX(1000,NZ_SWAG) +! Loop point for array allocation. +10 CONTINUE + IF (ALLOCATED(IA)) THEN + DEALLOCATE (IA,JA,FTEMP,FPTEMP,STAT=JER) + CALL CHECK_STAT(JER,210) + END IF + IMIN = NEQ + 1 + IADIM = NEQ + 1 + JMIN = 0 + JADIM = JADIM + ADDTOJA + IF (JADIM>MAX_ARRAY_SIZE) THEN + MSG = 'Maximum array size exceeded. Stopping.' + CALL XERRDV(MSG,880,2,0,0,0,0,ZERO,ZERO) + END IF + ALLOCATE (IA(IADIM),JA(JADIM),FTEMP(NEQ),FPTEMP(NEQ),STAT=JER) + CALL CHECK_STAT(JER,220) + +! f = y'(t,y). + CALL DFN(NEQ,T,Y,FTEMP) + IA(1) = 1 + K = 1 + +! Calculate unit roundoff and powers of it used if JACSP +! is used. + UROUND = EPSILON(ONE) + +! Successively perturb each of the solution components and +! calculate the corresponding derivatives. + DO J = 1, NEQ + IF (MTURB==NEQ) DTRB = DTURB(J) + YJ = Y(J) + YJSAVE = YJ + IF (ABS(YJ)<=ZERO) YJ = HUN*UROUND + YPJ = YJ*(ONE+DTRB) + RJ = ABS(YPJ-YJ) + IF (ABS(RJ)<=ZERO) RJ = HUN*UROUND + IF (ABS(RJ)<=ZERO) GOTO 30 + Y(J) = YPJ + CALL DFN(NEQ,T,Y,FPTEMP) + DO 20 I = 1, NEQ +! Estimate the Jacobian element. + AIJ = ABS(FPTEMP(I)-FTEMP(I))/RJ + IF ((AIJ<=EMIN) .AND. (I/=J)) GOTO 20 +! Need more storage for JA. + IF (K>JADIM) GOTO 10 + JMIN = K + JA(K) = I + K = K + 1 +20 CONTINUE + JP1 = J + 1 + IA(JP1) = K + Y(J) = YJSAVE + END DO + GOTO 40 +30 CONTINUE + MSG = 'An error occurred in subroutine SET_IAJA.' + CALL XERRDV(MSG,890,2,0,0,0,0,ZERO,ZERO) + +40 CONTINUE +! Set the flags to indicate to DVODE_F90 that IA and JA have +! been calculated successfully. + SPARSE = .TRUE. + IAJA_CALLED = .TRUE. + +! Trim JA. + ALLOCATE (JATEMP(JMIN),STAT=JER) + CALL CHECK_STAT(JER,230) + JATEMP(1:JMIN) = JA(1:JMIN) + DEALLOCATE (JA,STAT=JER) + CALL CHECK_STAT(JER,240) + ALLOCATE (JA(JMIN),STAT=JER) + CALL CHECK_STAT(JER,250) + JA(1:JMIN) = JATEMP(1:JMIN) + DEALLOCATE (JATEMP,STAT=JER) + CALL CHECK_STAT(JER,260) + JADIM = JMIN + RETURN + + END SUBROUTINE SET_IAJA +!_______________________________________________________________________ + + SUBROUTINE DVODE(F,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTS,JAC,GFUN) +! .. +! This is the core driver (modified original DVODE.F driver). +! .. +! The documentation prologue was moved nearer the top of the file. +! .. + IMPLICIT NONE +! .. +! .. Structure Arguments .. + TYPE (VODE_OPTS) :: OPTS +! .. +! .. Scalar Arguments .. + REAL (WP), INTENT (INOUT) :: T, TOUT + INTEGER, INTENT (INOUT) :: ISTATE + INTEGER, INTENT (IN) :: ITASK, NEQ +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: Y(*) +! .. +! .. Subroutine Arguments .. + EXTERNAL F, GFUN, JAC +! .. +! .. Local Scalars .. + REAL (WP) :: ATOLI, BIG, EWTI, H0, HMAX, HMX, RH, RTOLI, SIZEST, & + TCRIT, TNEXT, TOLSF, TP + INTEGER :: I, IER, IFLAG, IMXER, IOPT, IPCUTH, IRFP, IRT, ITOL, JCO, & + JER, KGO, LENIW, LENJ, LENP, LENRW, LENWM, LF0, MBAND, MF, MFA, ML, & + MU, NG, NITER, NSLAST + LOGICAL :: IHIT + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, ALLOCATED, EPSILON, MAX, MIN, SIGN, SQRT +! .. +! The following internal PRIVATE variable blocks contain variables which +! are communicated between subroutines in the DVODE package, or which +! are to be saved between calls to DVODE. +! In each block, real variables precede integers. +! The variables stored in the internal PRIVATE variable blocks are as +! follows: +! ACNRM = Weighted r.m.s. norm of accumulated correction vectors. +! CCMXJ = Threshhold on DRC for updating the Jacobian. (See DRC.) +! CONP = The saved value of TQ(5). +! CRATE = Estimated corrector convergence rate constant. +! DRC = Relative change in H*RL1 since last DVJAC call. +! EL = Real array of integration coefficients. See DVSET. +! ETA = Saved tentative ratio of new to old H. +! ETAMAX = Saved maximum value of ETA to be allowed. +! H = The step size. +! HMIN = The minimum absolute value of the step size H to be used. +! HMXI = Inverse of the maximum absolute value of H to be used. +! HMXI = 0.0 is allowed and corresponds to an infinite HMAX. +! HNEW = The step size to be attempted on the next step. +! HSCAL = Stepsize in scaling of YH array. +! PRL1 = The saved value of RL1. +! RC = Ratio of current H*RL1 to value on last DVJAC call. +! RL1 = The reciprocal of the coefficient EL(2). +! TAU = Real vector of past NQ step sizes, length 13. +! TQ = A real vector of length 5 in which DVSET stores constants +! used for the convergence test, the error test, and the +! selection of H at a new order. +! TN = The independent variable, updated on each step taken. +! UROUND = The machine unit roundoff. The smallest positive real number +! such that 1.0 + UROUND /= 1.0 +! ICF = Integer flag for convergence failure in DVNLSD: +! 0 means no failures. +! 1 means convergence failure with out of date Jacobian +! (recoverable error). +! 2 means convergence failure with current Jacobian or +! singular matrix (unrecoverable error). +! INIT = Saved integer flag indicating whether initialization of the +! problem has been done (INIT = 1) or not. +! IPUP = Saved flag to signal updating of Newton matrix. +! JCUR = Output flag from DVJAC showing Jacobian status: +! JCUR = 0 means J is not current. +! JCUR = 1 means J is current. +! JSTART = Integer flag used as input to DVSTEP: +! 0 means perform the first step. +! 1 means take a new step continuing from the last. +! -1 means take the next step with a new value of MAXORD, +! HMIN, HMXI, N, METH, MITER, and/or matrix parameters. +! On return, DVSTEP sets JSTART = 1. +! JSV = Integer flag for Jacobian saving, = sign(MF). +! KFLAG = A completion code from DVSTEP with the following meanings: +! 0 the step was successful. +! -1 the requested error could not be achieved. +! -2 corrector convergence could not be achieved. +! -3, -4 fatal error in DVNLSD(can not occur here). +! KUTH = Input flag to DVSTEP showing whether H was reduced by the +! driver. KUTH = 1 if H was reduced, = 0 otherwise. +! L = Integer variable, NQ + 1, current order plus one. +! LMAX = MAXORD + 1 (used for dimensioning). +! LOCJS = A pointer to the saved Jacobian, whose storage starts at +! WM(LOCJS), if JSV = 1. +! LYH = Saved integer pointer to segments of RWORK and IWORK. +! MAXORD = The maximum order of integration method to be allowed. +! METH/MITER = The method flags. See MF. +! MSBJ = The maximum number of steps between J evaluations, = 50. +! MXHNIL = Saved value of optional input MXHNIL. +! MXSTEP = Saved value of optional input MXSTEP. +! N = The number of first-order ODEs, = NEQ. +! NEWH = Saved integer to flag change of H. +! NEWQ = The method order to be used on the next step. +! NHNIL = Saved counter for occurrences of T + H = T. +! NQ = Integer variable, the current integration method order. +! NQNYH = Saved value of NQ*NYH. +! NQWAIT = A counter controlling the frequency of order changes. +! An order change is about to be considered if NQWAIT = 1. +! NSLJ = The number of steps taken as of the last Jacobian update. +! NSLP = Saved value of NST as of last Newton matrix update. +! NYH = Saved value of the initial value of NEQ. +! HU = The step size in t last used. +! NCFN = Number of nonlinear convergence failures so far. +! NETF = The number of error test failures of the integrator so far. +! NFE = The number of f evaluations for the problem so far. +! NJE = The number of Jacobian evaluations so far. +! NLU = The number of matrix LU decompositions so far. +! NNI = Number of nonlinear iterations so far. +! NQU = The method order last used. +! NST = The number of steps taken for the problem so far. +! Block 0. +! Retrieve the necessary flags from the OPTIONS structure and manage +! the storage allocation. +! .. +! .. FIRST EXECUTABLE STATEMENT DVODE +! .. +! Retrieve the local flags from the options structure. + IOPT = OPTS%IOPT + ITOL = OPTS%ITOL + MF = OPTS%MF + METH = OPTS%METH + MITER = OPTS%MITER + MOSS = OPTS%MOSS + NG = OPTS%NG + +! Allocate the necessary storage for RWORK and IWORK. (Assume that +! both or neither of the arrays are allocated.) + +! If we are starting a new problem, deallocate the old RWORK and +! IWORK arrays if they were allocated in a previous call. Also +! manage the event residual arrays. + + IF (ISTATE==1) THEN + IF (ALLOCATED(DTEMP)) THEN + DEALLOCATE (DTEMP,YTEMP,STAT=JER) + CALL CHECK_STAT(JER,270) + END IF + IF (ALLOCATED(RWORK)) THEN + DEALLOCATE (RWORK,IWORK,ACOR,SAVF,EWT,WM,STAT=JER) + CALL CHECK_STAT(JER,280) + END IF + IF (ALLOCATED(JROOT)) THEN + DEALLOCATE (JROOT,G0,G1,GX,STAT=JER) + CALL CHECK_STAT(JER,290) + END IF + IF (ALLOCATED(YMAX)) THEN + DEALLOCATE (YMAX,STAT=JER) + CALL CHECK_STAT(JER,300) + END IF + END IF + +! If the user has made changes and called DVODE_F90 with ISTATE=3, +! temporarily save the necessary portion of the Nordsieck array +! and then release RWORK and IWORK. Also, save the contents of +! the WM array and release WM. Note: ACOR, SAVF, and EWT do not +! need to be saved. + + IF (ISTATE==3) THEN + IF (IOPT/=1) THEN + MAXORD = MORD(METH) + ELSE + MAXORD = IUSER(5) + IF (MAXORD<0) GOTO 520 + IF (MAXORD==0) MAXORD = 100 + MAXORD = MIN(MAXORD,MORD(METH)) + END IF + IF (MAXORD=NEQ) GOTO 500 + IF (MU<0 .OR. MU>=NEQ) GOTO 510 + LWMDIM = (3*ML+2*MU+2)*NEQ + ELSE IF (MF==-14 .OR. MF==-15) THEN + ML = IUSER(1) + MU = IUSER(2) + IF (ML<0 .OR. ML>=NEQ) GOTO 500 + IF (MU<0 .OR. MU>=NEQ) GOTO 510 + LWMDIM = (2*ML+MU+1)*NEQ + ELSE IF (MF==16 .OR. MF==17) THEN + LWMDIM = 0 + ELSE IF (MF==-16 .OR. MF==-17) THEN + LWMDIM = 0 + ELSE IF (MF==20) THEN + LWMDIM = 0 + ELSE IF (MF==21 .OR. MF==22) THEN + LWMDIM = 2*NEQ**2 + ELSE IF (MF==-21 .OR. MF==-22) THEN + LWMDIM = NEQ**2 + ELSE IF (MF==23) THEN + LWMDIM = NEQ + ELSE IF (MF==24 .OR. MF==25) THEN + ML = IUSER(1) + MU = IUSER(2) + IF (ML<0 .OR. ML>=NEQ) GOTO 500 + IF (MU<0 .OR. MU>=NEQ) GOTO 510 + LWMDIM = (3*ML+2*MU+2)*NEQ + ELSE IF (MF==-24 .OR. MF==-25) THEN + ML = IUSER(1) + MU = IUSER(2) + IF (ML<0 .OR. ML>=NEQ) GOTO 500 + IF (MU<0 .OR. MU>=NEQ) GOTO 510 + LWMDIM = (2*ML+MU+1)*NEQ + ELSE IF (MF==26 .OR. MF==27) THEN + LWMDIM = 0 + ELSE IF (MF==-26 .OR. MF==-27) THEN + LWMDIM = 0 + END IF +! LWMDIM = LWMDIM + 2 + ALLOCATE (RWORK(LRW),WM(LWMDIM),STAT=JER) + CALL CHECK_STAT(JER,370) + RWORK(1:LRW) = ZERO + WM(1:LWMDIM) = ZERO + IF (.NOT.ALLOCATED(DTEMP)) THEN + ALLOCATE (DTEMP(NEQ),YTEMP(NEQ),STAT=JER) + CALL CHECK_STAT(JER,380) + END IF + ALLOCATE (ACOR(NEQ),SAVF(NEQ),EWT(NEQ),STAT=JER) + CALL CHECK_STAT(JER,390) +! If necessary, reload the saved portion of the Nordsieck +! array and the WM array, saved above. + IF (ISTATE==3) THEN + RWORK(1:LYHTEMP) = YHTEMP(1:LYHTEMP) + I = MIN(LWMTEMP,LWMDIM) +! WM(1:LWMTEMP) = WMTEMP(1:LWMTEMP) + WM(1:I) = WMTEMP(1:I) + DEALLOCATE (YHTEMP,WMTEMP,STAT=JER) + CALL CHECK_STAT(JER,400) + END IF + RWORK(1:LRWUSER) = RUSER(1:LRWUSER) +! IUSER: = LIWUSER if MITER = 0 or 3 (MF = 10, 13, 20, 23) +! = LIWUSER + NEQ otherwise +! (ABS(MF) = 11,12,14,15,16,17,21,22,24,25,26,27). + LIW = LIWUSER + NEQ + IF (MITER==0 .OR. MITER==3) LIW = LIWUSER + ALLOCATE (IWORK(LIW),STAT=JER) + CALL CHECK_STAT(JER,410) + IWORK(1:LIWUSER) = IUSER(1:LIWUSER) +! Allocate the YMAX vector. + ALLOCATE (YMAX(NEQ),STAT=JER) + CALL CHECK_STAT(JER,420) + END IF +! Allocate the event arrays if they haven't already been allocated +! in a previous call. + IF (ALLOCATED(JROOT)) THEN + ELSE + IF (NG>0) THEN + ALLOCATE (JROOT(NG),G0(NG),G1(NG),GX(NG),STAT=JER) + CALL CHECK_STAT(JER,430) + END IF + END IF + +! Block A. +! This code block is executed on every call. It tests ISTATE and +! ITASK for legality and branches appropriately. +! If ISTATE > 1 but the flag INIT shows that initialization has +! not yet been done, an error return occurs. +! If ISTATE = 1 and TOUT = T, return immediately. +! The user portion of RWORK and IWORK are reloaded since something +! may have changed since the previous visit. + + RWORK(1:LRWUSER) = RUSER(1:LRWUSER) + IWORK(1:LIWUSER) = IUSER(1:LIWUSER) + IF (ISTATE<1 .OR. ISTATE>3) GOTO 420 + IF (ITASK<1 .OR. ITASK>5) GOTO 430 + ITASKC = ITASK + IF (ISTATE==1) GOTO 10 + IF (INIT/=1) GOTO 440 + IF (ISTATE==2) GOTO 130 + GOTO 20 +10 INIT = 0 + IF (ABS(TOUT-T)<=ZERO) THEN + RUSER(1:LRWUSER) = RWORK(1:LRWUSER) + IUSER(1:LIWUSER) = IWORK(1:LIWUSER) + RETURN + END IF + +! Block B. +! The next code block is executed for the initial call (ISTATE = 1), +! or for a continuation call with parameter changes (ISTATE = 3). +! It contains checking of all input and various initializations. + +! First check legality of the non-optional input NEQ, ITOL, IOPT, +! MF, ML, and MU. + +20 IF (NEQ<=0) GOTO 450 + IF (ISTATE==1) GOTO 30 + IF (NEQ>N) GOTO 460 + IF (NEQ/=N) GOTO 465 +30 N = NEQ + IF (ITOL<1 .OR. ITOL>4) GOTO 470 + IF (IOPT<0 .OR. IOPT>1) GOTO 480 + JSV = SIGN(1,MF) + INEWJ = (1-JSV)/2 + MFA = ABS(MF) + METH = MFA/10 + MITER = MFA - 10*METH + IF (METH<1 .OR. METH>2) GOTO 490 + IF (MITER<0 .OR. MITER>7) GOTO 490 + IF (MITER<=3) GOTO 40 + IF (MITER<=5) THEN + ML = IWORK(1) + MU = IWORK(2) + IF (ML<0 .OR. ML>=N) GOTO 500 + IF (MU<0 .OR. MU>=N) GOTO 510 + END IF +40 CONTINUE + IF (NG<0) GOTO 700 + IF (ISTATE==1) GOTO 50 + IF (IRFND==0 .AND. NG/=NGC) GOTO 710 +50 NGC = NG +! Next process and check the optional input. + IF (IOPT==1) GOTO 60 + MAXORD = MORD(METH) + MXSTEP = MXSTP0 + MXHNIL = MXHNL0 + IF (ISTATE==1) H0 = ZERO + HMXI = ZERO + HMIN = ZERO + GOTO 80 +60 MAXORD = IWORK(5) + IF (MAXORD<0) GOTO 520 + IF (MAXORD==0) MAXORD = 100 + MAXORD = MIN(MAXORD,MORD(METH)) + MXSTEP = IWORK(6) + IF (MXSTEP<0) GOTO 530 + IF (MXSTEP==0) MXSTEP = MXSTP0 + MXHNIL = IWORK(7) + IF (MXHNIL<0) GOTO 540 + IF (MXHNIL==0) MXHNIL = MXHNL0 + IF (ISTATE/=1) GOTO 70 + H0 = RWORK(5) + IF ((TOUT-T)*H0ZERO) HMXI = ONE/HMAX + HMIN = RWORK(7) + IF (HMINNEQ) THEN + MSG = 'The size of the CONSTRAINED vector' + CALL XERRDV(MSG,900,1,0,0,0,0,ZERO,ZERO) + MSG = 'must be between 1 and NEQ.' + CALL XERRDV(MSG,900,2,0,0,0,0,ZERO,ZERO) + END IF + DO I = 1, NDX + IF (IDX(I)<1 .OR. IDX(I)>N) THEN + MSG = 'Each component of THE CONSTRAINED' + CALL XERRDV(MSG,910,1,0,0,0,0,ZERO,ZERO) + MSG = 'vector must be between 1 and N.' + CALL XERRDV(MSG,910,2,0,0,0,0,ZERO,ZERO) + END IF + END DO + END IF +! Check the sub diagonal and super diagonal arrays. + IF (MITER==4 .OR. MITER==5) THEN + IF (SUBS) THEN + DO I = 1, NSUBS + IF (SUBDS(I) < 2 .OR. SUBDS(I) > ML+1) THEN + MSG = 'Each element of SUB_DIAGONALS' + CALL XERRDV(MSG,920,1,0,0,0,0,ZERO,ZERO) + MSG = 'must be between 2 and ML + 1.' + CALL XERRDV(MSG,920,2,0,0,0,0,ZERO,ZERO) + END IF + END DO + END IF + IF (SUPS) THEN + DO I = 1, NSUPS + IF (SUPDS(I) < 2 .OR. SUPDS(I) > MU + 1) THEN + MSG = 'Each element of SUP_DIAGONALS' + CALL XERRDV(MSG,930,1,0,0,0,0,ZERO,ZERO) + MSG = 'must be between 2 and MU + 1.' + CALL XERRDV(MSG,930,2,0,0,0,0,ZERO,ZERO) + END IF + END DO + END IF +! Compute the banded column grouping. + IF (SUBS .OR. SUPS) THEN + CALL BGROUP(N,EWT,ACOR,YMAX,ML,MU) + BUILD_IAJA = .FALSE. + BUILD_IAJA = .TRUE. + IF (BUILD_IAJA) THEN + CALL BANDED_IAJA(N,ML,MU) + NZB = IAB(N+1) - 1 + END IF + END IF + END IF + + IF ((MITER==2 .OR. MITER==5) .AND. USE_JACSP) THEN +! Allocate the arrays needed by DVJAC/JACSPD. + IF (ALLOCATED(INDROWDS)) THEN + DEALLOCATE (INDROWDS, INDCOLDS, NGRPDS, IPNTRDS, JPNTRDS, & + IWADS, IWKDS, IOPTDS, YSCALEDS, WKDS, FACDS) + CALL CHECK_STAT(IER,440) + END IF + ALLOCATE (INDROWDS(1), INDCOLDS(1), NGRPDS(1), IPNTRDS(1), & + JPNTRDS(1), IWADS(1), IWKDS(50+N), IOPTDS(5), YSCALEDS(N), & + WKDS(3*N), FACDS(N), STAT=IER) + CALL CHECK_STAT(IER,450) +! For use in DVJAC: + IOPTDS(4) = 0 + END IF + +! Set work array pointers and check lengths LRW and LIW. Pointers +! to segments of RWORK and IWORK are named by prefixing L to the +! name of the segment. e.g., the segment YH starts at RWORK(LYH). +! Within WM, LOCJS is the location of the saved Jacobian (JSV > 0). + +80 LYH = 21 + IF (ISTATE==1) NYH = N + LENRW = LYH + (MAXORD+1)*NYH - 1 + IWORK(17) = LENRW + IF (LENRW>LRW) GOTO 580 + IF (LENRW/=LRW) GOTO 580 + LWM = 1 +! Save MAXORD in case the calling program calls with ISTATE=3. + PREVIOUS_MAXORD = MAXORD + JCO = MAX(0,JSV) +! IF (MITER==0) LENWM = 2 + IF (MITER==0) LENWM = 0 + IF (MITER==1 .OR. MITER==2) THEN +! LENWM = 2 + (1+JCO)*N*N +! LOCJS = N*N + 3 + LENWM = (1+JCO)*N*N + LOCJS = N*N + 1 + END IF +! IF (MITER==3) LENWM = N + 2 + IF (MITER==3) LENWM = N + IF (MITER==4 .OR. MITER==5) THEN + MBAND = ML + MU + 1 + LENP = (MBAND+ML)*N + LENJ = MBAND*N +! LENWM = 2 + LENP + JCO*LENJ +! LOCJS = LENP + 3 + LENWM = LENP + JCO*LENJ + LOCJS = LENP + 1 + END IF + IF (MITER==6 .OR. MITER==7) THEN +! LENWM = 2 + LENWM = 0 + END IF + IF (LENWM>LWMDIM) GOTO 730 + IF (LENWM/=LWMDIM) GOTO 730 + LIWM = 1 + LENIW = 30 + N + IF (MITER==0 .OR. MITER==3) LENIW = 30 + IWORK(18) = LENIW + IF (LENIW>LIW) GOTO 590 +! Check RTOL and ATOL for legality. + RTOLI = OPTS%RTOL(1) + ATOLI = OPTS%ATOL(1) + DO I = 1, N + IF (ITOL>=3) RTOLI = OPTS%RTOL(I) + IF (ITOL==2 .OR. ITOL==4) ATOLI = OPTS%ATOL(I) + IF (RTOLI0) WM1 = SQRT(UROUND) +! ISTATC controls the determination of the sparsity arrays +! if the sparse solution option is used. + ISTATC = ISTATE + GOTO 130 + +! Block C. +! The next block is for the initial call only (ISTATE = 1). +! It contains all remaining initializations, the initial call to F, +! and the calculation of the initial step size. +! The error weights in EWT are inverted after being loaded. + +100 UROUND = EPSILON(ONE) + U125 = UROUND ** 0.125_WP + U325 = UROUND ** 0.325_WP +! ISTATC controls the determination of the sparsity arrays if +! the sparse solution option is used. + ISTATC = ISTATE + TN = T + IF (ITASK/=4 .AND. ITASK/=5) GOTO 110 + TCRIT = RWORK(1) + IF ((TCRIT-TOUT)*(TOUT-T)ZERO .AND. (T+H0-TCRIT)*H0>ZERO) H0 = TCRIT - T +110 JSTART = 0 + IF (MITER>0) WM1 = SQRT(UROUND) + CCMXJ = PT2 + MSBJ = 50 + MSBG = 75 + NHNIL = 0 + NST = 0 + NJE = 0 + NNI = 0 + NCFN = 0 + NETF = 0 + NLU = 0 + NSLJ = 0 + NSLAST = 0 + HU = ZERO + NQU = 0 + MB28 = 0 +!_______________________________________________________________________ +! *****MA48 build change point. Insert this statement. +! MB48 = 0 +!_______________________________________________________________________ + NSLG = 0 + NGE = 0 +! Initial call to F. (LF0 points to YH(*,2).) + LF0 = LYH + NYH + CALL F(N,T,Y,RWORK(LF0)) + NFE = 1 +! Load the initial value vector in YH. + CALL DCOPY_F90(N,Y,1,RWORK(LYH),1) +! Load and invert the EWT array. (H is temporarily set to 1.0.) + NQ = 1 + H = ONE + CALL DEWSET(N,ITOL,OPTS%RTOL,OPTS%ATOL,RWORK(LYH),EWT) + DO I = 1, N + IF (EWT(I)<=ZERO) GOTO 620 + EWT(I) = ONE/EWT(I) + END DO + NNZ = 0 + NGP = 0 + IF (OPTS%SPARSE) THEN + ISTATC = ISTATE + END IF + IF (ABS(H0)>ZERO) GOTO 120 +! Call DVHIN to set initial step size H0 to be attempted. + CALL DVHIN(N,T,RWORK(LYH),RWORK(LF0),F,TOUT,EWT,ITOL,OPTS%ATOL,Y,ACOR, & + H0,NITER,IER) + NFE = NFE + NITER + IF (IER/=0) GOTO 630 +! Adjust H0 if necessary to meet HMAX bound. +120 RH = ABS(H0)*HMXI + IF (RH>ONE) H0 = H0/RH +! Load H with H0 and scale YH(*,2) by H0. + H = H0 + CALL DSCAL_F90(N,H0,RWORK(LF0),1) +! GOTO 270 +! Check for a zero of g at T. + IRFND = 0 + TOUTC = TOUT + IF (NGC==0) GOTO 210 + CALL DVCHECK(1,GFUN,NEQ,Y,RWORK(LYH),NYH,G0,G1,GX,IRT) + IF (IRT==0) GOTO 210 + GOTO 720 + +! Block D. +! The next code block is for continuation calls only (ISTATE = 2 or 3) +! and is to check stop conditions before taking a step. + +130 NSLAST = NST + IRFP = IRFND + IF (NGC==0) GOTO 140 + IF (ITASK==1 .OR. ITASK==4) TOUTC = TOUT + CALL DVCHECK(2,GFUN,NEQ,Y,RWORK(LYH),NYH,G0,G1,GX,IRT) + IF (IRT/=1) GOTO 140 + IRFND = 1 + ISTATE = 3 + T = T0ST + GOTO 330 +140 CONTINUE + IRFND = 0 + IF (IRFP==1 .AND. (ABS(TLAST-TN)>ZERO) .AND. ITASK==2) GOTO 310 + KUTH = 0 + GOTO (150,200,160,170,180) ITASK +150 IF ((TN-TOUT)*HZERO) GOTO 640 + IF ((TN-TOUT)*HZERO) GOTO 650 + IF ((TCRIT-TOUT)*HZERO) GOTO 650 +190 HMX = ABS(TN) + ABS(H) + IHIT = ABS(TN-TCRIT) <= HUN*UROUND*HMX + IF (IHIT) GOTO 310 + TNEXT = TN + HNEW*(ONE+FOUR*UROUND) + IF ((TNEXT-TCRIT)*H<=ZERO) GOTO 200 + H = (TCRIT-TN)*(ONE-FOUR*UROUND) + KUTH = 1 + +! Block E. +! The next block is normally executed for all calls and contains +! the call to the one-step core integrator DVSTEP. This is a +! looping point for the integration steps. +! First check for too many steps being taken, update EWT(if not +! at start of problem), check for too much accuracy being +! requested, and check for H below the roundoff level in T. + +200 CONTINUE + IF ((NST-NSLAST)>=MXSTEP) GOTO 340 + CALL DEWSET(N,ITOL,OPTS%RTOL,OPTS%ATOL,RWORK(LYH),EWT) + DO I = 1, N + IF (EWT(I)<=ZERO) GOTO 350 + EWT(I) = ONE/EWT(I) + END DO +210 TOLSF = UROUND*DVNORM(N,RWORK(LYH),EWT) + IPCUTH = -1 + IF (TOLSF<=ONE) GOTO 220 + TOLSF = TOLSF*TWO + IF (NST==0) GOTO 670 + GOTO 360 +220 CONTINUE + IPCUTH = IPCUTH + 1 + IF (IPCUTH>=IPCUTH_MAX) THEN + MSG = 'Too many step reductions to prevent' + CALL XERRDV(MSG,940,1,0,0,0,0,ZERO,ZERO) + MSG = 'an infeasible prediction.' + CALL XERRDV(MSG,940,1,0,0,0,0,ZERO,ZERO) +! Retract the solution to TN: + CALL DVNRDN(RWORK(LYH),NYH,N,NQ) + ACOR(1:N) = ZERO + ISTATE = -7 + GOTO 410 + END IF + IF (ABS((TN+H)-TN)>ZERO) GOTO 230 + NHNIL = NHNIL + 1 + IF (NHNIL>MXHNIL) GOTO 230 + MSG = 'Warning: internal T(=R1) and H(=R2) are such that' + CALL XERRDV(MSG,950,1,0,0,0,0,ZERO,ZERO) + MSG = 'in the machine, T + H = T on the next step.' + CALL XERRDV(MSG,950,1,0,0,0,0,ZERO,ZERO) + MSG = '(H = step size). The solver will continue anyway.' + CALL XERRDV(MSG,950,1,0,0,0,2,TN,H) + IF (NHNILLB(I) .AND. RWORK(LYH+ & + IDX(I)-1)UB(I))) THEN +! Retract: + CALL DVNRDN(RWORK(LYH),NYH,N,NQ) + H = HALF*H + ETA = HALF +! Rescale: + CALL DVNRDS(RWORK(LYH),NYH,N,L,ETA) + GOTO 220 + END IF + END DO +! Retract: + CALL DVNRDN(RWORK(LYH),NYH,N,NQ) + ACOR(1:N) = ZERO + END IF + +! CALL DVSTEP(Y,YH,LDYH,YH1,EWT,SAVF,ACOR,WM,IWM,F,JAC, & +! VNLS,OPTS%ATOL,ITOL) +!_______________________________________________________________________ + + IF (MITER/=6 .AND. MITER/=7) THEN + CALL DVSTEP(Y,RWORK(LYH),NYH,RWORK(LYH),EWT,SAVF,ACOR,WM, & + IWORK(LIWM),F,JAC,DVNLSD,OPTS%ATOL,ITOL) + ELSE + CALL DVSTEP(Y,RWORK(LYH),NYH,RWORK(LYH),EWT,SAVF,ACOR,WM, & + IWORK(LIWM),F,JAC,DVNLSS28,OPTS%ATOL,ITOL) + END IF +! *****MA48 build change point. Replace above with these statements. +! IF (MITER /= 6 .AND. MITER /= 7) THEN +! CALL DVSTEP(Y,RWORK(LYH),NYH,RWORK(LYH),EWT,SAVF,ACOR,WM, & +! IWORK(LIWM),F,JAC,DVNLSD,OPTS%ATOL,ITOL) +! ELSE +! IF (USE_MA48_FOR_SPARSE) THEN +! CALL DVSTEP(Y,RWORK(LYH),NYH,RWORK(LYH),EWT,SAVF,ACOR,WM, & +! IWORK(LIWM), F, JAC, DVNLSS48,OPTS%ATOL,ITOL) +! ELSE +! CALL DVSTEP(Y,RWORK(LYH),NYH,RWORK(LYH),EWT,SAVF,ACOR,WM, & +! IWORK(LIWM),F,JAC,DVNLSS28,OPTS%ATOL,ITOL) +! END IF +! END IF +!_______________________________________________________________________ + + KGO = 1 - KFLAG +! Branch on KFLAG. Note: In this version, KFLAG can not be set to +! -3; KFLAG = 0, -1, -2. + GOTO (240,370,380) KGO + +! Block F. +! The following block handles the case of a successful return from the +! core integrator (KFLAG = 0). Test for stop conditions. + +240 INIT = 1 + KUTH = 0 + GOTO (250,310,270,280,300) ITASK +! ITASK = 1. If TOUT has been reached, interpolate. +250 CONTINUE + IF (NGC==0) GOTO 260 + CALL DVCHECK(3,GFUN,NEQ,Y,RWORK(LYH),NYH,G0,G1,GX,IRT) + IF (IRT/=1) GOTO 260 + IRFND = 1 + ISTATE = 3 + T = T0ST + GOTO 330 +260 CONTINUE + IF ((TN-TOUT)*H=ZERO) GOTO 310 + GOTO 200 +! ITASK = 4. See if TOUT or TCRIT was reached. Adjust H if necessary. +280 IF ((TN-TOUT)*H=SIZEST) GOTO 400 + BIG = SIZEST + IMXER = I +400 END DO + IWORK(16) = IMXER +! Set Y vector, T, and optional output. +410 CONTINUE + CALL DCOPY_F90(N,RWORK(LYH),1,Y,1) + T = TN + RWORK(11) = HU + RWORK(12) = H + RWORK(13) = TN + IWORK(10) = NGE + IWORK(11) = NST + IWORK(12) = NFE + IWORK(13) = NJE + IWORK(14) = NQU + IWORK(15) = NQ + IWORK(19) = NLU + IWORK(20) = NNI + IWORK(21) = NCFN + IWORK(22) = NETF + TLAST = T + RUSER(1:LRWUSER) = RWORK(1:LRWUSER) + IUSER(1:LIWUSER) = IWORK(1:LIWUSER) + RETURN + +! Block I. +! The following block handles all error returns due to illegal input +! (ISTATE = -3), as detected before calling the core integrator. +! First the error message routine is called. If the illegal input +! is a negative ISTATE, the run is aborted (apparent infinite loop). + +420 MSG = 'ISTATE(=I1) is illegal.' + CALL XERRDV(MSG,1020,1,1,ISTATE,0,0,ZERO,ZERO) + IF (ISTATE<0) GOTO 750 + GOTO 740 +430 MSG = 'ITASK(=I1) is illegal.' + CALL XERRDV(MSG,1030,1,1,ITASK,0,0,ZERO,ZERO) + GOTO 740 +440 MSG = 'ISTATE(=I1) > 1 but DVODE is not initialized.' + CALL XERRDV(MSG,1040,1,1,ISTATE,0,0,ZERO,ZERO) + GOTO 740 +450 MSG = 'NEQ (=I1) < 1.' + CALL XERRDV(MSG,1050,1,1,NEQ,0,0,ZERO,ZERO) + GOTO 740 +460 MSG = 'ISTATE = 3 and NEQ increased (I1 to I2).' + CALL XERRDV(MSG,1060,1,2,N,NEQ,0,ZERO,ZERO) + GOTO 740 +465 MSG = 'This version of DVODE requires does not allow NEQ to be reduced.' + CALL XERRDV(MSG,1070,2,0,0,0,0,ZERO,ZERO) + GOTO 740 +470 MSG = 'ITOL(=I1) is illegal.' + CALL XERRDV(MSG,1080,1,1,ITOL,0,0,ZERO,ZERO) + GOTO 740 +480 MSG = 'IOPT(=I1) is illegal.' + CALL XERRDV(MSG,1090,1,1,IOPT,0,0,ZERO,ZERO) + GOTO 740 +490 MSG = 'MF(=I1) is illegal.' + CALL XERRDV(MSG,1100,1,1,MF,0,0,ZERO,ZERO) + GOTO 740 +500 MSG = 'ML(=I1) illegal: < 0 or >= NEQ (=I2)' + CALL XERRDV(MSG,1110,1,2,ML,NEQ,0,ZERO,ZERO) + GOTO 740 +510 MSG = 'MU(=I1) illegal: < 0 or >= NEQ (=I2)' + CALL XERRDV(MSG,1120,1,2,MU,NEQ,0,ZERO,ZERO) + GOTO 740 +520 MSG = 'MAXORD(=I1) < 0.' + CALL XERRDV(MSG,1130,1,1,MAXORD,0,0,ZERO,ZERO) + GOTO 740 +530 MSG = 'MXSTEP(=I1) < 0.' + CALL XERRDV(MSG,1140,1,1,MXSTEP,0,0,ZERO,ZERO) + GOTO 740 +540 MSG = 'MXHNIL(=I1) < 0.' + CALL XERRDV(MSG,1150,1,1,MXHNIL,0,0,ZERO,ZERO) + GOTO 740 +550 MSG = 'TOUT(=R1) is behind T(=R2).' + CALL XERRDV(MSG,1160,1,0,0,0,2,TOUT,T) + MSG = 'The integration direction is given by H0 (=R1).' + CALL XERRDV(MSG,1160,1,0,0,0,1,H0,ZERO) + GOTO 740 +560 MSG = 'HMAX(=R1) < 0.' + CALL XERRDV(MSG,1170,1,0,0,0,1,HMAX,ZERO) + GOTO 740 +570 MSG = 'HMIN(=R1) < 0.' + CALL XERRDV(MSG,1180,1,0,0,0,1,HMIN,ZERO) + GOTO 740 +580 CONTINUE + MSG = 'RWORK length needed, LENRW(=I1) > LRW(=I2)' + CALL XERRDV(MSG,1190,1,2,LENRW,LRW,0,ZERO,ZERO) + GOTO 740 +590 CONTINUE + MSG = 'IWORK length needed, LENIW(=I1) > LIW(=I2)' + CALL XERRDV(MSG,1200,1,2,LENIW,LIW,0,ZERO,ZERO) + GOTO 740 +600 MSG = 'RTOL(I1) is R1 < 0.' + CALL XERRDV(MSG,1210,1,1,I,0,1,RTOLI,ZERO) + GOTO 740 +610 MSG = 'ATOL(I1) is R1 < 0.' + CALL XERRDV(MSG,1220,1,1,I,0,1,ATOLI,ZERO) + GOTO 740 +620 EWTI = EWT(I) + MSG = 'EWT(I1) is R1 <= 0.' + CALL XERRDV(MSG,1230,1,1,I,0,1,EWTI,ZERO) + GOTO 740 +630 CONTINUE + MSG = 'TOUT(=R1) too close to T(=R2) to start.' + CALL XERRDV(MSG,1240,1,0,0,0,2,TOUT,T) + GOTO 740 +640 CONTINUE + MSG = 'ITASK = I1 and TOUT(=R1) < TCUR - HU(=R2).' + CALL XERRDV(MSG,1250,1,1,ITASK,0,2,TOUT,TP) + GOTO 740 +650 CONTINUE + MSG = 'ITASK = 4 or 5 and TCRIT(=R1) < TCUR(=R2).' + CALL XERRDV(MSG,1260,1,0,0,0,2,TCRIT,TN) + GOTO 740 +660 CONTINUE + MSG = 'ITASK = 4 or 5 and TCRIT(=R1) < TOUT(=R2).' + CALL XERRDV(MSG,1270,1,0,0,0,2,TCRIT,TOUT) + GOTO 740 +670 MSG = 'At the start of the problem, too much' + CALL XERRDV(MSG,1280,1,0,0,0,0,ZERO,ZERO) + MSG = 'accuracy was requested for precision' + CALL XERRDV(MSG,1280,1,0,0,0,1,TOLSF,ZERO) + MSG = 'of machine: see TOLSF(=R1).' + CALL XERRDV(MSG,1280,1,0,0,0,1,TOLSF,ZERO) + RWORK(14) = TOLSF + GOTO 740 +680 MSG = 'Trouble from DVINDY. ITASK = I1, TOUT = R1.' + CALL XERRDV(MSG,1290,1,1,ITASK,0,1,TOUT,ZERO) + GOTO 740 +690 MSG = 'SETH must be nonnegative.' + CALL XERRDV(MSG,1300,1,0,0,0,0,ZERO,ZERO) + GOTO 740 +700 MSG = 'NG(=I1) < 0.' + CALL XERRDV(MSG,1310,0,1,NG,0,0,ZERO,ZERO) + GOTO 740 +710 MSG = 'NG changed (from I1 to I2) illegally, i.e.,' + CALL XERRDV(MSG,1320,1,0,0,0,0,ZERO,ZERO) + MSG = 'not immediately after a root was found.' + CALL XERRDV(MSG,1320,1,2,NGC,NG,0,ZERO,ZERO) + GOTO 740 +720 MSG = 'One or more components of g has a root' + CALL XERRDV(MSG,1330,1,0,0,0,0,ZERO,ZERO) + MSG = 'too near to the initial point.' + CALL XERRDV(MSG,1330,1,0,0,0,0,ZERO,ZERO) + GOTO 740 +730 CONTINUE + MSG = 'WM length needed, LENWM(=I1) > LWMDIM(=I2)' + CALL XERRDV(MSG,1340,1,2,LENWM,LWMDIM,0,ZERO,ZERO) + +740 CONTINUE + ISTATE = -3 + RUSER(1:LRWUSER) = RWORK(1:LRWUSER) + IUSER(1:LIWUSER) = IWORK(1:LIWUSER) + RETURN + +750 MSG = 'Run aborted: apparent infinite loop.' + CALL XERRDV(MSG,1350,2,0,0,0,0,ZERO,ZERO) + RUSER(1:LRWUSER) = RWORK(1:LRWUSER) + IUSER(1:LIWUSER) = IWORK(1:LIWUSER) + RETURN + + END SUBROUTINE DVODE +!_______________________________________________________________________ + + SUBROUTINE DVHIN(N,T0,Y0,YDOT,F,TOUT,EWT,ITOL,ATOL,Y,TEMP,H0, & + NITER,IER) +! .. +! Calculate the initial step size. +! .. +! This routine computes the step size, H0, to be attempted on the +! first step, when the user has not supplied a value for this. +! First we check that TOUT - T0 differs significantly from zero. Then +! an iteration is done to approximate the initial second derivative +! and this is used to define h from w.r.m.s.norm(h**2 * yddot / 2) = 1. +! A bias factor of 1/2 is applied to the resulting h. +! The sign of H0 is inferred from the initial values of TOUT and T0. +! Communication with DVHIN is done with the following variables: +! N = Size of ODE system, input. +! T0 = Initial value of independent variable, input. +! Y0 = Vector of initial conditions, input. +! YDOT = Vector of initial first derivatives, input. +! F = Name of subroutine for right-hand side f(t,y), input. +! TOUT = First output value of independent variable +! UROUND = Machine unit roundoff +! EWT, ITOL, ATOL = Error weights and tolerance parameters +! as described in the driver routine, input. +! Y, TEMP = Work arrays of length N. +! H0 = Step size to be attempted, output. +! NITER = Number of iterations (and of f evaluations) to compute H0, +! output. +! IER = The error flag, returned with the value +! IER = 0 if no trouble occurred, or +! IER = -1 if TOUT and T0 are considered too close to proceed. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP), INTENT (INOUT) :: H0 + REAL (WP), INTENT (IN) :: T0, TOUT + INTEGER :: IER + INTEGER, INTENT (IN) :: ITOL, N + INTEGER, INTENT (INOUT) :: NITER +! .. +! .. Array Arguments .. + REAL (WP), INTENT (IN) :: ATOL(*), EWT(*), Y0(*) + REAL (WP), INTENT (INOUT) :: TEMP(*), Y(*), YDOT(*) +! .. +! .. Subroutine Arguments .. + EXTERNAL F +! .. +! .. Local Scalars .. + REAL (WP) :: AFI, ATOLI, DELYI, H, HG, HLB, HNEW, HRAT, HUB, T1, & + TDIST, TROUND, YDDNRM + INTEGER :: I, ITER +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +! .. +! .. FIRST EXECUTABLE STATEMENT DVHIN +! .. + NITER = 0 + TDIST = ABS(TOUT-T0) + TROUND = UROUND*MAX(ABS(T0),ABS(TOUT)) + IF (TDISTDELYI) HUB = DELYI/AFI + END DO +! Set initial guess for H as geometric mean of upper and +! lower bounds. + ITER = 0 + HG = SQRT(HLB*HUB) +! If the bounds have crossed, exit with the mean value. + IF (HUBTWO) THEN + HNEW = SQRT(TWO/YDDNRM) + ELSE + HNEW = SQRT(HG*HUB) + END IF + ITER = ITER + 1 + +! Test the stopping conditions. +! Stop if the new and previous h values differ by a factor of < 2. +! Stop if four iterations have been done. Also, stop with previous h +! if HNEW/HG > 2 after first iteration, as this probably means that +! the second derivative value is bad because of cancellation error. + + IF (ITER>=4) GOTO 20 + HRAT = HNEW/HG + IF ((HRAT>HALF) .AND. (HRAT=2) .AND. (HNEW>TWO*HG)) THEN + HNEW = HG + GOTO 20 + END IF + HG = HNEW + GOTO 10 + +! Iteration done. Apply bounds, bias factor, and sign. Then exit. +20 H0 = HNEW*HALF + IF (H0HUB) H0 = HUB +30 H0 = SIGN(H0,TOUT-T0) + NITER = ITER + IER = 0 + RETURN +! Error return for TOUT - T0 too small. +40 IER = -1 + RETURN + + END SUBROUTINE DVHIN +!_______________________________________________________________________ + + SUBROUTINE DVINDY_CORE(T,K,YH,LDYH,DKY,IFLAG) +! .. +! Interpolate the solution and derivative. +! .. +! DVINDY_CORE computes interpolated values of the K-th derivative +! of the dependent variable vector y, and stores it in DKY. This +! routine is called within the package with K = 0 and T = TOUT, +! but may also be called by the user for any K up to the current +! order. (See detailed instructions in the usage documentation.) +! The computed values in DKY are gotten by interpolation using the +! Nordsieck history array YH. This array corresponds uniquely to a +! vector-valued polynomial of degree NQCUR or less, and DKY is set +! to the K-th derivative of this polynomial at T. +! The formula for DKY is: +! q +! DKY(i) = sum c(j,K) * (T - TN)**(j-K) * H**(-j) * YH(i,j+1) +! j=K +! where c(j,K) = j*(j-1)*...*(j-K+1), q = NQCUR, TN = TCUR, H = HCUR. +! The quantities NQ = NQCUR, L = NQ+1, N, TN, and H are +! communicated by PRIVATE variables. The above sum is done in reverse +! order. +! IFLAG is returned negative if either K or T is out of bounds. +! Discussion above and comments in driver explain all variables. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP), INTENT (IN) :: T + INTEGER, INTENT (INOUT) :: IFLAG + INTEGER, INTENT (IN) :: K, LDYH +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: DKY(*), YH(LDYH,*) +! .. +! .. Local Scalars .. + REAL (WP) :: C, R, S, TFUZZ, TN1, TP + INTEGER :: IC, J, JB, JB2, JJ, JJ1, JP1 + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, REAL, SIGN +! .. +! .. FIRST EXECUTABLE STATEMENT DVINDY_CORE +! .. + IFLAG = 0 + IF (K<0 .OR. K>NQ) GOTO 40 +! TFUZZ = HUN * UROUND * (TN + HU) + TFUZZ = HUN*UROUND*SIGN(ABS(TN)+ABS(HU),HU) + TP = TN - HU - TFUZZ + TN1 = TN + TFUZZ + IF ((T-TP)*(T-TN1)>ZERO) GOTO 50 + S = (T-TN)/H + IC = 1 + IF (K==0) GOTO 10 + JJ1 = L - K + DO JJ = JJ1, NQ + IC = IC*JJ + END DO +10 C = REAL(IC) + DKY(1:N) = C*YH(1:N,L) + IF (K==NQ) GOTO 30 + JB2 = NQ - K + DO JB = 1, JB2 + J = NQ - JB + JP1 = J + 1 + IC = 1 + IF (K==0) GOTO 20 + JJ1 = JP1 - K + DO JJ = JJ1, J + IC = IC*JJ + END DO +20 C = REAL(IC) + DKY(1:N) = C*YH(1:N,JP1) + S*DKY(1:N) + END DO +30 R = H**(-K) + CALL DSCAL_F90(N,R,DKY,1) + RETURN +40 MSG = 'Error in DVINDY, K(=I1) is illegal.' + CALL XERRDV(MSG,1360,1,1,K,0,0,ZERO,ZERO) + IFLAG = -1 + RETURN +50 MSG = 'Error in DVINDY, T(=R1) is illegal. T is not' + CALL XERRDV(MSG,1370,1,0,0,0,1,T,ZERO) + MSG = 'in interval TCUR - HU(= R1) to TCUR(=R2)' + CALL XERRDV(MSG,1370,1,0,0,0,2,TP,TN) + IFLAG = -2 + RETURN + + END SUBROUTINE DVINDY_CORE +!_______________________________________________________________________ + + SUBROUTINE DVINDY_BNDS(T,K,YH,LDYH,DKY,IFLAG) +! .. +! Interpolate the solution and derivative and enforce nonnegativity +! (used only if user calls DVINDY and the BOUNDS option is in +! use). +! .. +! This version of DVINDY_CORE enforces nonnegativity and is called +! only by DVINDY (which is called only by the user). It uses the +! private YNNEG array produced by a call to DVINDY_CORE from DVINDY +! to enforce nonnegativity. +! DVINDY_BNDS computes interpolated values of the K-th derivative +! of the dependent variable vector y, and stores it in DKY. This +! routine is called within the package with K = 0 and T = TOUT, +! but may also be called by the user for any K up to the current +! order. (See detailed instructions in the usage documentation.) +! The computed values in DKY are gotten by interpolation using the +! Nordsieck history array YH. This array corresponds uniquely to a +! vector-valued polynomial of degree NQCUR or less, and DKY is set +! to the K-th derivative of this polynomial at T. +! The formula for DKY is: +! q +! DKY(i) = sum c(j,K) * (T - TN)**(j-K) * H**(-j) * YH(i,j+1) +! j=K +! where c(j,K) = j*(j-1)*...*(j-K+1), q = NQCUR, TN = TCUR, H = HCUR. +! The quantities NQ = NQCUR, L = NQ+1, N, TN, and H are +! communicated by PRIVATE variables. The above sum is done in reverse +! order. +! IFLAG is returned negative if either K or T is out of bounds. +! Discussion above and comments in driver explain all variables. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP), INTENT (IN) :: T + INTEGER, INTENT (INOUT) :: IFLAG + INTEGER, INTENT (IN) :: K, LDYH +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: DKY(*), YH(LDYH,*) +! .. +! .. Local Scalars .. + REAL (WP) :: C, R, S, TFUZZ, TN1, TP + INTEGER :: I, IC, J, JB, JB2, JJ, JJ1, JP1 + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, REAL, SIGN +! .. +! .. FIRST EXECUTABLE STATEMENT DVINDY_BNDS +! .. + IFLAG = 0 + IF (K==0) GOTO 50 + IF (K<0 .OR. K>NQ) GOTO 40 +! TFUZZ = HUN * UROUND * (TN + HU) + TFUZZ = HUN*UROUND*SIGN(ABS(TN)+ABS(HU),HU) + TP = TN - HU - TFUZZ + TN1 = TN + TFUZZ + IF ((T-TP)*(T-TN1)>ZERO) GOTO 60 + S = (T-TN)/H + IC = 1 + IF (K==0) GOTO 10 + JJ1 = L - K + DO JJ = JJ1, NQ + IC = IC*JJ + END DO +10 C = REAL(IC) + DKY(1:N) = C*YH(1:N,L) + IF (BOUNDS) THEN + DO I = 1, NDX + IF (YNNEG(IDX(I))UB(I)) DKY(IDX(I)) & + = ZERO + END DO + END IF + IF (K==NQ) GOTO 30 + JB2 = NQ - K + DO JB = 1, JB2 + J = NQ - JB + JP1 = J + 1 + IC = 1 + IF (K==0) GOTO 20 + JJ1 = JP1 - K + DO JJ = JJ1, J + IC = IC*JJ + END DO +20 C = REAL(IC) + DKY(1:N) = C*YH(1:N,JP1) + S*DKY(1:N) + IF (BOUNDS) THEN + DO I = 1, NDX + IF (YNNEG(IDX(I))UB(I)) DKY(IDX(I)) & + = ZERO + END DO + END IF + END DO +30 R = H**(-K) + CALL DSCAL_F90(N,R,DKY,1) + RETURN +40 MSG = 'Error in DVINDY, K(=I1) is illegal.' + CALL XERRDV(MSG,1380,1,1,K,0,0,ZERO,ZERO) + IFLAG = -1 + RETURN +50 MSG = 'DVINDY_BNDS cannot be called with k = 0.' + CALL XERRDV(MSG,1390,1,0,0,0,0,ZERO,ZERO) + IFLAG = -1 + RETURN +60 MSG = 'Error in DVINDY, T(=R1) is illegal. T is not' + CALL XERRDV(MSG,1400,1,0,0,0,1,T,ZERO) + MSG = 'not in interval TCUR - HU(= R1) to TCUR(=R2)' + CALL XERRDV(MSG,1400,1,0,0,0,2,TP,TN) + IFLAG = -2 + RETURN + + END SUBROUTINE DVINDY_BNDS +!_______________________________________________________________________ + + SUBROUTINE DVINDY(T,K,DKY,IFLAG) +! .. +! This is a dummy interface to allow the user to interpolate the +! solution and the derivative (not called by DVODE_F90). +! .. +! May be used if the user wishes to interpolate solution or +! derivative following a successful return from DVODE_F90 +! DVINDY computes interpolated values of the K-th derivative of +! the dependent variable vector y, and stores it in DKY. This +! routine is called within the package with K = 0 and T = TOUT, +! but may also be called by the user for any K up to the current +! order. (See detailed instructions in the usage documentation.) +! The computed values in DKY are gotten by interpolation using the +! Nordsieck history array YH. This array corresponds uniquely to a +! vector-valued polynomial of degree NQCUR or less, and DKY is set +! to the K-th derivative of this polynomial at T. +! IFLAG is returned negative if either K or T is out of bounds. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP), INTENT (IN) :: T + INTEGER, INTENT (INOUT) :: IFLAG + INTEGER, INTENT (IN) :: K +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: DKY(*) +! .. +! .. Local Scalars .. + INTEGER :: I, IER +! .. +! .. Intrinsic Functions .. + INTRINSIC ALLOCATED, SIZE +! .. +! .. FIRST EXECUTABLE STATEMENT DVINDY +! .. + CALL DVINDY_CORE(T,K,RWORK(LYH),N,DKY,IFLAG) + IF (.NOT.BOUNDS) RETURN + + IF (K==0) THEN +! Interpolate only the solution. + CALL DVINDY_CORE(T,K,RWORK(LYH),N,DKY,IFLAG) +! Enforce bounds. + DO I = 1, NDX + IF (DKY(IDX(I))UB(I)) DKY(IDX(I)) = UB(I) + END DO + RETURN + END IF + +! k > 0 - derivatives requested. + +! Make sure space is available for the interpolated solution. + IF (ALLOCATED(YNNEG)) THEN + IF (SIZE(YNNEG)= N, the first dimension of YH. +! N is the number of ODEs in the system. +! YH1 = A one-dimensional array occupying the same space as YH. +! EWT = An array of length N containing multiplicative weights +! for local error measurements. Local errors in y(i) are +! compared to 1.0/EWT(i) in various error tests. +! SAVF = An array of working storage, of length N. +! also used for input of YH(*,MAXORD+2) when JSTART = -1 +! and MAXORD < the current order NQ. +! ACOR = A work array of length N, used for the accumulated +! corrections. On a successful return, ACOR(i) contains +! the estimated one-step local error in y(i). +! WM,IWM = Real and integer work arrays associated with matrix +! operations in DVNLSD. +! F = Dummy name for the user supplied subroutine for f. +! JAC = Dummy name for the user supplied Jacobian subroutine. +! DVNLSD = Dummy name for the nonlinear system solving subroutine, +! whose real name is dependent on the method used. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: LDYH, ITOL +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: ACOR(*), EWT(*), SAVF(*), & + WM(*), Y(*), YH(LDYH,*), YH1(*) + REAL (WP), INTENT (IN) :: ATOL(*) + INTEGER, INTENT (INOUT) :: IWM(*) +! .. +! .. Subroutine Arguments .. + EXTERNAL F, JAC, VNLS +! .. +! .. Local Scalars .. + REAL (WP) :: CNQUOT, DDN, DSM, DUP, ETAQ, ETAQM1, ETAQP1, FLOTL, R, & + TOLD + INTEGER :: I, I1, I2, IBACK, J, JB, NCF, NFLAG +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL +! .. +! .. FIRST EXECUTABLE STATEMENT DVSTEP +! .. + KFLAG = 0 + TOLD = TN + NCF = 0 + JCUR = 0 + NFLAG = 0 + IF (JSTART>0) GOTO 10 + IF (JSTART==-1) GOTO 30 + +! On the first call, the order is set to 1, and other variables are +! initialized. ETAMAX is the maximum ratio by which H can be increased +! in a single step. It is normally 10, but is larger during the first +! step to compensate for the small initial H. If a failure occurs +! (in corrector convergence or error test), ETAMAX is set to 1 for +! the next increase. + + LMAX = MAXORD + 1 + NQ = 1 + L = 2 + NQNYH = NQ*LDYH + TAU(1) = H + PRL1 = ONE + RC = ZERO + ETAMAX = ETAMX1 + NQWAIT = 2 + HSCAL = H + GOTO 70 + +! Take preliminary actions on a normal continuation step (JSTART > 0). +! If the driver changed H, then ETA must be reset and NEWH set to 1. +! If a change of order was dictated on the previous step, then it is +! done here and appropriate adjustments in the history are made. +! On an order decrease, the history array is adjusted by DVJUST. +! On an order increase, the history array is augmented by a column. +! On a change of step size H, the history array YH is rescaled. + +10 CONTINUE + IF (KUTH==1) THEN + ETA = MIN(ETA,H/HSCAL) + NEWH = 1 + END IF +20 IF (NEWH==0) GOTO 70 + IF (NEWQ==NQ) GOTO 60 + IF (NEWQNQ) THEN + CALL DVJUST(YH,LDYH,1) + NQ = NEWQ + L = NQ + 1 + NQWAIT = L + GOTO 60 + END IF + +! The following block handles preliminaries needed when JSTART = -1. +! If N was reduced, zero out part of YH to avoid undefined references. +! If MAXORD was reduced to a value less than the tentative order NEWQ, +! then NQ is set to MAXORD, and a new H ratio ETA is chosen. +! Otherwise, we take the same preliminary actions as for JSTART > 0. +! In any case, NQWAIT is reset to L = NQ + 1 to prevent further +! changes in order for that many steps. The new H ratio ETA is +! limited by the input H if KUTH = 1, by HMIN if KUTH = 0, and by +! HMXI in any case. Finally, the history array YH is rescaled. + +30 CONTINUE + LMAX = MAXORD + 1 + IF (N==LDYH) GOTO 40 + I1 = 1 + (NEWQ+1)*LDYH + I2 = (MAXORD+1)*LDYH + IF (I1>I2) GOTO 40 + YH1(I1:I2) = ZERO +40 IF (NEWQ<=MAXORD) GOTO 50 + FLOTL = REAL(LMAX) + IF (MAXORDONE) GOTO 100 + +! After a successful step, update the YH and TAU arrays and decrement +! NQWAIT. If NQWAIT is then 1 and NQ < MAXORD, then ACOR is saved +! for use in a possible order increase on the next step. +! If ETAMAX = 1 (a failure occurred this step), keep NQWAIT >= 2. + + KFLAG = 0 + NST = NST + 1 + HU = H + NQU = NQ + DO IBACK = 1, NQ + I = L - IBACK + TAU(I+1) = TAU(I) + END DO + TAU(1) = H + + IF (BOUNDS) THEN +! Original: +! CALL DAXPY_F90(N,EL(1),ACOR,1,YH(1,1),1) +! CALL DAXPY_F90(N,EL(2),ACOR,1,YH(1,2),1) + CALL DAXPY_F90(N,EL(1),ACOR,1,YH(1:N,1),1) + CALL DAXPY_F90(N,EL(2),ACOR,1,YH(1:N,2),1) +! Take care of roundoff causing y(t) to be slightly unequal +! to the constraint bound. + DO J = 1, NDX + YH(IDX(J),1) = MAX(YH(IDX(J),1),LB(J)) + Y(IDX(J)) = MAX(Y(IDX(J)),LB(J)) + YH(IDX(J),1) = MIN(YH(IDX(J),1),UB(J)) + Y(IDX(J)) = MIN(Y(IDX(J)),UB(J)) + END DO +! Update the higher derivatives and project to zero if necessary. + IF (L>2) THEN + DO J = 3, L +! Original: +! CALL DAXPY_F90(N,EL(J),ACOR,1,YH(1,J),1) + CALL DAXPY_F90(N,EL(J),ACOR,1,YH(1:N,J),1) + END DO + END IF + ELSE +! Proceed as usual. + DO J = 1, L +! Original: +! CALL DAXPY_F90(N,EL(J),ACOR,1,YH(1,J),1) + CALL DAXPY_F90(N,EL(J),ACOR,1,YH(1:N,J),1) + END DO + END IF + + NQWAIT = NQWAIT - 1 + IF ((L==LMAX) .OR. (NQWAIT/=1)) GOTO 90 +! Original: +! CALL DCOPY_F90(N,ACOR,1,YH(1,LMAX),1) + CALL DCOPY_F90(N,ACOR,1,YH(1:N,LMAX),1) + CONP = TQ(5) +90 IF (ABS(ETAMAX-ONE)>0) GOTO 130 + IF (NQWAIT<2) NQWAIT = 2 + NEWQ = NQ + NEWH = 0 + ETA = ONE + HNEW = H + GOTO 250 + +! The error test failed. KFLAG keeps track of multiple failures. +! Restore TN and the YH array to their previous values, and prepare +! to try the step again. Compute the optimum step size for the +! same order. After repeated failures, H is forced to decrease +! more rapidly. + +100 KFLAG = KFLAG - 1 + NETF = NETF + 1 + NFLAG = -2 + TN = TOLD + I1 = NQNYH + 1 + DO JB = 1, NQ + I1 = I1 - LDYH + DO I = I1, NQNYH + YH1(I) = YH1(I) - YH1(I+LDYH) + END DO + END DO + IF (ABS(H)<=HMIN*ONEPSM) GOTO 220 + ETAMAX = ONE + IF (KFLAG<=KFC) GOTO 110 +! Compute ratio of new H to current H at the current order. + FLOTL = REAL(L) + ETA = ONE/((BIAS2*DSM)**(ONE/FLOTL)+ADDON) + ETA = MAX(ETA,HMIN/ABS(H),ETAMIN) + IF ((KFLAG<=-2) .AND. (ETA>ETAMXF)) ETA = ETAMXF + GOTO 60 + +! Control reaches this section if 3 or more consecutive failures +! have occurred. It is assumed that the elements of the YH array +! have accumulated errors of the wrong order. The order is reduced +! by one, if possible. Then H is reduced by a factor of 0.1 and +! the step is retried. After a total of 7 consecutive failures, +! an exit is taken with KFLAG = -1. + +!110 IF (KFLAG==KFH) GOTO 220 +110 IF (KFLAG==CONSECUTIVE_EFAILS) GOTO 220 + IF (NQ==1) GOTO 120 + ETA = MAX(ETAMIN,HMIN/ABS(H)) + CALL DVJUST(YH,LDYH,-1) + L = NQ + NQ = NQ - 1 + NQWAIT = L + GOTO 60 +120 ETA = MAX(ETAMIN,HMIN/ABS(H)) + H = H*ETA + HSCAL = H + TAU(1) = H + CALL F(N,TN,Y,SAVF) + NFE = NFE + 1 + IF (BOUNDS) THEN + DO I = 1, NDX + IF (ABS(YH(IDX(I),1)-LB(I))<=ZERO) SAVF(IDX(I)) = & + MAX(SAVF(IDX(I)),ZERO) + IF (ABS(YH(IDX(I),1)-UB(I))<=ZERO) SAVF(IDX(I)) = & + MIN(SAVF(IDX(I)),ZERO) + END DO + END IF + YH(1:N,2) = H*SAVF(1:N) + NQWAIT = 10 + GOTO 70 + +! If NQWAIT = 0, an increase or decrease in order by one is considered. +! Factors ETAQ, ETAQM1, ETAQP1 are computed by which H could be +! multiplied at order q, q-1, or q+1, respectively. The largest of +! these is determined, and the new order and step size set accordingly. +! A change of H or NQ is made only if H increases by at least a factor +! of THRESH. If an order change is considered and rejected, then NQWAIT +! is set to 2 (reconsider it after 2 steps). + +! Compute ratio of new H to current H at the current order. +130 FLOTL = REAL(L) + ETAQ = ONE/((BIAS2*DSM)**(ONE/FLOTL)+ADDON) + IF (NQWAIT/=0) GOTO 170 + NQWAIT = 2 + ETAQM1 = ZERO + IF (NQ==1) GOTO 140 +! Compute ratio of new H to current H at the current order +! less one. +! Original: +! DDN = DVNORM(N,YH(1,L),EWT)/TQ(1) + DDN = DVNORM(N,YH(1:N,L),EWT)/TQ(1) + ETAQM1 = ONE/((BIAS1*DDN)**(ONE/(FLOTL-ONE))+ADDON) +140 ETAQP1 = ZERO + IF (L==LMAX) GOTO 150 +! Compute ratio of new H to current H at current order plus one. + CNQUOT = (TQ(5)/CONP)*(H/TAU(2))**L + SAVF(1:N) = ACOR(1:N) - CNQUOT*YH(1:N,LMAX) + DUP = DVNORM(N,SAVF,EWT)/TQ(3) + ETAQP1 = ONE/((BIAS3*DUP)**(ONE/(FLOTL+ONE))+ADDON) +150 IF (ETAQ>=ETAQP1) GOTO 160 + IF (ETAQP1>ETAQM1) GOTO 190 + GOTO 180 +160 IF (ETAQ= N, the first dimension of YH, input. +! SAVF = A work array of length N. +! EWT = An error weight vector of length N, input. +! ACOR = A work array of length N, used for the accumulated +! corrections to the predicted y vector. +! WM,IWM = Real and integer work arrays associated with matrix +! operations in chord iteration (MITER /= 0). +! F = Dummy name for user supplied routine for f. +! JAC = Dummy name for user supplied Jacobian routine. +! NFLAG = Input/output flag, with values and meanings as follows: +! INPUT +! 0 first call for this time step. +! -1 convergence failure in previous call to DVNLSD. +! -2 error test failure in DVSTEP. +! OUTPUT +! 0 successful completion of nonlinear solver. +! -1 convergence failure or singular matrix. +! -2 unrecoverable error in matrix preprocessing +! (cannot occur here). +! -3 unrecoverable error in solution (cannot occur +! here). +! IPUP = Own variable flag with values and meanings as follows: +! 0, do not update the Newton matrix. +! MITER /= 0 update Newton matrix, because it is the +! initial step, order was changed, the error +! test failed, or an update is indicated by +! the scalar RC or step counter NST. +! For more details, see comments in driver subroutine. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: ITOL, LDYH + INTEGER, INTENT (INOUT) :: NFLAG +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: ACOR(*), EWT(*), SAVF(*), & + WM(*), Y(*), YH(LDYH,*) + REAL (WP), INTENT (IN) :: ATOL(*) + INTEGER, INTENT (INOUT) :: IWM(*) +! .. +! .. Subroutine Arguments .. + EXTERNAL F, JAC +! .. +! .. Local Scalars .. + REAL (WP) :: ACNRMNEW, CSCALE, DCON, DEL, DELP + INTEGER :: I, IERPJ, IERSL, M +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +! .. +! .. FIRST EXECUTABLE STATEMENT DVNLSD +! .. +! On the first step, on a change of method order, or after a +! nonlinear convergence failure with NFLAG = -2, set IPUP = MITER +! to force a Jacobian update when MITER /= 0. + IF (JSTART==0) NSLP = 0 + IF (NFLAG==0) ICF = 0 + IF (NFLAG==-2) IPUP = MITER + IF ((JSTART==0) .OR. (JSTART==-1)) IPUP = MITER +! If this is functional iteration, set CRATE = 1 and drop to 220 + IF (MITER==0) THEN + CRATE = ONE + GOTO 10 + END IF + +! RC is the ratio of new to old values of the coefficient H/EL(2)=h/l1. +! When RC differs from 1 by more than CCMAX, IPUP is set to MITER to +! force DVJAC to be called, if a Jacobian is involved. In any case, +! DVJAC is called at least every MSBP steps. + + DRC = ABS(RC-ONE) + IF (DRC>CCMAX .OR. NST>=NSLP+MSBP) IPUP = MITER + +! Up to MAXCOR corrector iterations are taken. A convergence test is +! made on the r.m.s. norm of each correction, weighted by the error +! weight vector EWT. The sum of the corrections is accumulated in the +! vector ACOR(i). The YH array is not altered in the corrector loop. + +10 M = 0 + DELP = ZERO +! Original: +! CALL DCOPY_F90(N,YH(1,1),1,Y,1) + CALL DCOPY_F90(N,YH(1:N,1),1,Y(1:N),1) + CALL F(N,TN,Y,SAVF) + NFE = NFE + 1 + IF (BOUNDS) THEN + DO I = 1, NDX + IF (ABS(YH(IDX(I),1)-LB(I))<=ZERO) SAVF(IDX(I)) = & + MAX(SAVF(IDX(I)),ZERO) + IF (ABS(YH(IDX(I),1)-UB(I))<=ZERO) SAVF(IDX(I)) = & + MIN(SAVF(IDX(I)),ZERO) + END DO + END IF + IF (IPUP<=0) GOTO 20 + +! If indicated, the matrix P = I - h*rl1*J is reevaluated and +! preprocessed before starting the corrector iteration. IPUP +! is set to 0 as an indicator that this has been done. + + CALL DVJAC(Y,YH,LDYH,EWT,ACOR,SAVF,WM,IWM,F,JAC,IERPJ, & + ATOL,ITOL) + IPUP = 0 + RC = ONE + DRC = ZERO + CRATE = ONE + NSLP = NST +! If matrix is singular, take error return to force cut in +! step size. + IF (IERPJ/=0) GOTO 70 +20 ACOR(1:N) = ZERO +! This is a looping point for the corrector iteration. +30 IF (MITER/=0) GOTO 40 + +! In the case of functional iteration, update Y directly from +! the result of the last function evaluation. + + SAVF(1:N) = RL1*(H*SAVF(1:N)-YH(1:N,2)) + Y(1:N) = SAVF(1:N) - ACOR(1:N) + DEL = DVNORM(N,Y,EWT) + Y(1:N) = YH(1:N,1) + SAVF(1:N) + CALL DCOPY_F90(N,SAVF,1,ACOR,1) + GOTO 50 + +! In the case of the chord method, compute the corrector error, and +! solve the linear system with that as right-hand side and P as +! coefficient matrix. The correction is scaled by the factor +! 2/(1+RC) to account for changes in h*rl1 since the last DVJAC call. + +40 Y(1:N) = (RL1*H)*SAVF(1:N) - (RL1*YH(1:N,2)+ACOR(1:N)) + CALL DVSOL(WM,IWM,Y,IERSL) + NNI = NNI + 1 + IF (IERSL>0) GOTO 60 + IF (METH==2 .AND. ABS(RC-ONE)>ZERO) THEN + CSCALE = TWO/(ONE+RC) + CALL DSCAL_F90(N,CSCALE,Y,1) + END IF + DEL = DVNORM(N,Y,EWT) + CALL DAXPY_F90(N,ONE,Y,1,ACOR,1) + Y(1:N) = YH(1:N,1) + ACOR(1:N) + +! Test for convergence. If M > 0, an estimate of the convergence +! rate constant is stored in CRATE, and this is used in the test. + +50 IF (M/=0) CRATE = MAX(CRDOWN*CRATE,DEL/DELP) + DCON = DEL*MIN(ONE,CRATE)/TQ(4) + IF (DCON<=ONE) GOTO 80 + M = M + 1 + IF (M==MAXCOR) GOTO 60 + IF (M>=2 .AND. DEL>RDIV*DELP) GOTO 60 + DELP = DEL + CALL F(N,TN,Y,SAVF) + NFE = NFE + 1 + IF (BOUNDS) THEN + DO I = 1, NDX + IF (ABS(YH(IDX(I),1)-LB(I))<=ZERO) SAVF(IDX(I)) = & + MAX(SAVF(IDX(I)),ZERO) + IF (ABS(YH(IDX(I),1)-UB(I))<=ZERO) SAVF(IDX(I)) = & + MIN(SAVF(IDX(I)),ZERO) + END DO + END IF + GOTO 30 + +60 IF (MITER==0 .OR. JCUR==1) GOTO 70 + ICF = 1 + IPUP = MITER + GOTO 10 + +70 CONTINUE + NFLAG = -1 + ICF = 2 + IPUP = MITER + RETURN + +! Return for successful step. +80 CONTINUE + +! Enforce bounds. + IF (BOUNDS) THEN + CHANGED_ACOR = .FALSE. + IF (M==0) THEN + ACNRM = DEL + ELSE + ACNRM = DVNORM(N,ACOR,EWT) + END IF + IF (MITER/=0) THEN +! Since Y(:) = YH(:,1) + ACOR(:) ... + DO I = 1, NDX + IF (Y(IDX(I))UB(I)) THEN + CHANGED_ACOR = .TRUE. + ACOR(IDX(I)) = UB(I) - YH(IDX(I),1) + SAVF(IDX(I)) = ACOR(IDX(I)) + END IF + END DO + ELSE +! Since Y(:) = YH(:,1) + SAVF(:) and +! since CALL DCOPY_F90(N,SAVF,1,ACOR,1) ... + DO I = 1, NDX + IF (Y(IDX(I))UB(IDX(I))) THEN + CHANGED_ACOR = .TRUE. + ACOR(IDX(I)) = UB(I) - YH(IDX(I),1) + END IF + END DO + END IF + IF (CHANGED_ACOR) THEN + IF (M==0) THEN + ACNRMNEW = DEL + ELSE + ACNRMNEW = DVNORM(N,ACOR,EWT) + END IF +! ACNRM = ACNRMNEW + ACNRM = MAX(ACNRM,ACNRMNEW) + ELSE + END IF + NFLAG = 0 + JCUR = 0 + ICF = 0 + ELSE +! No projections are required. + NFLAG = 0 + JCUR = 0 + ICF = 0 + IF (M==0) ACNRM = DEL + IF (M>0) ACNRM = DVNORM(N,ACOR,EWT) + END IF + RETURN + + END SUBROUTINE DVNLSD +!_______________________________________________________________________ + + SUBROUTINE DVJAC(Y,YH,LDYH,EWT,FTEM,SAVF,WM,IWM,F,JAC,IERPJ, & + ATOL,ITOL) +! .. +! Compute and process the matrix P = I - h*rl1*J, where J is an +! approximation to the Jacobian for dense and banded solutions. +! .. +! This is a version of DVJAC that allows use of the known nonzero +! diagonals if it is available. +! DVJAC is called by DVNLSD to compute and process the matrix +! P = I - h*rl1*J, where J is an approximation to the Jacobian. +! Here J is computed by the user-supplied routine JAC if +! MITER = 1 or 4, or by finite differencing if MITER = 2, 3, or 5. +! If MITER = 3, a diagonal approximation to J is used. +! If JSV = -1, J is computed from scratch in all cases. +! If JSV = 1 and MITER = 1, 2, 4, or 5, and if the saved value of J is +! considered acceptable, then P is constructed from the saved J. +! J is stored in wm and replaced by P. If MITER /= 3, P is then +! subjected to LU decomposition in preparation for later solution +! of linear systems with P as coefficient matrix. This is done +! by DGEFA_F90 if MITER = 1 or 2, and by DGBFA_F90 if MITER = 4 or 5. +! Communication with DVJAC is done with the following variables. +! (For more details, please see the comments in the driver subroutine.) +! Y = Vector containing predicted values on entry. +! YH = The Nordsieck array, an LDYH by LMAX array, input. +! LDYH = A constant >= N, the first dimension of YH, input. +! EWT = An error weight vector of length N. +! SAVF = Array containing f evaluated at predicted y, input. +! WM = Real work space for matrices. In the output, it contains +! the inverse diagonal matrix if MITER = 3 and the LU +! decomposition of P if MITER is 1, 2, 4, or 5. +! Storage of matrix elements starts at WM(1). +! Storage of the saved Jacobian starts at WM(LOCJS). +! IWM = Integer work space containing pivot information, +! starting at IWM(31), if MITER is 1, 2, 4, or 5. +! IWM also contains band parameters ML = IWM(1) and +! MU = IWM(2) if MITER is 4 or 5. +! F = Dummy name for the user supplied subroutine for f. +! JAC = Dummy name for the user supplied Jacobian subroutine. +! RL1 = 1/EL(2) (input). +! IERPJ = Output error flag, = 0 if no trouble, 1 if the P +! matrix is found to be singular. +! JCUR = Output flag to indicate whether the Jacobian matrix +! (or approximation) is now current. +! JCUR = 0 means J is not current. +! JCUR = 1 means J is current. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (INOUT) :: IERPJ + INTEGER, INTENT (IN) :: LDYH, ITOL +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: EWT(*), FTEM(*), SAVF(*), WM(*), & + Y(*),YH(LDYH,*) + REAL (WP), INTENT (IN) :: ATOL(*) + INTEGER, INTENT (INOUT) :: IWM(*) +! .. +! .. Subroutine Arguments .. + EXTERNAL F, JAC +! .. +! .. Local Scalars .. + REAL (WP) :: CON, DI, FAC, HRL1, R, R0, SRUR, YI, YJ, YJJ + INTEGER :: I, I1, I2, IER, II, J, J1, JJ, JJ1, JJ2, JOK, K, & + K1, K2, LENP, MBA, MBAND, MEB1, MEBAND, ML, ML1, MU, NG, NP1 +! K1, K2, LENP, MBA, MBAND, MEB1, MEBAND, ML, ML3, MU, NG, NP1 +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL +! .. +! .. FIRST EXECUTABLE STATEMENT DVJAC +! .. + IERPJ = 0 + HRL1 = H*RL1 +! See whether J should be evaluated (JOK = -1) or not (JOK = 1). + JOK = JSV + IF (JSV==1) THEN + IF (NST==0 .OR. NST>NSLJ+MSBJ) JOK = -1 + IF (ICF==1 .AND. DRC= MBAND) GOTO 60 + +! Otherwise, use the original algorithm. + IF (USE_JACSP) THEN +! Approximate the Jacobian using Doug Salane's JACSP. + NSLJ = NST + JCUR = 1 + WM(1:LENP) = ZERO + IOPTDS(1) = 1 + IOPTDS(2) = MBAND + IOPTDS(3) = 1 + IOPTDS(5) = ML +! INFORDS(4) was initialized in DVODE. + LWKDS = 3 * N + LIWKDS = 50 + N +! NRFJACDS = MEBAND*N +! NCFJACDS = 1 + NRFJACDS = MEBAND + NCFJACDS = N + MBA = MIN(MBAND,N) + MAXGRPDS = MBA +! Calculate the YSCALEDS vector for JACSPDV. + IF (LIKE_ORIGINAL_VODE) THEN + FAC = DVNORM(N,SAVF,EWT) +! JACSPDB multiplies YSCALEDS(*) BY UROUND**0.825: +! R0 = THOU*ABS(H)*UROUND*REAL(N)*FAC + R0 = THOU*ABS(H)*REAL(N)*FAC + IF (ABS(R0)<=ZERO) R0 = ONE + SRUR = WM1 + DO J = 1, N +! JACSPDB multiplies YSCALEDS(*) BY UROUND**0.825: +! R = MAX(ABS(Y(J)),R0/EWT(J)) + R = MAX(ABS(Y(J))/U325,(R0/EWT(J))*U125) + YSCALEDS(J) = R + END DO + ELSE + IF (ITOL == 1 .OR. ITOL == 3) THEN + DO J = 1, N + YSCALEDS(J) = MAX(ABS(Y(J)),ATOL(1),UROUND) + END DO + ELSE + DO J = 1, N + YSCALEDS(J) = MAX(ABS(Y(J)),ATOL(J),UROUND) + END DO + END IF + END IF + CALL JACSPDB(F,N,TN,Y,SAVF,WM(1),NRFJACDS, & + YSCALEDS,FACDS,IOPTDS,WKDS,LWKDS,IWKDS,LIWKDS, & + MAXGRPDS,NGRPDS,JPNTRDS,INDROWDS) + NFE = NFE + IWKDS(7) + NJE = NJE + 1 + ELSE + NSLJ = NST + JCUR = 1 + MBA = MIN(MBAND,N) + MEB1 = MEBAND - 1 + SRUR = WM1 + FAC = DVNORM(N,SAVF,EWT) + R0 = THOU*ABS(H)*UROUND*REAL(N)*FAC + IF (ABS(R0)<=ZERO) R0 = ONE + DO J = 1, MBA + DO I = J, N, MBAND + YI = Y(I) + R = MAX(SRUR*ABS(YI),R0/EWT(I)) + Y(I) = Y(I) + R + END DO + CALL F(N,TN,Y,FTEM) + DO JJ = J, N, MBAND + Y(JJ) = YH(JJ,1) + YJJ = Y(JJ) + R = MAX(SRUR*ABS(YJJ),R0/EWT(JJ)) + FAC = ONE/R + I1 = MAX(JJ-MU,1) + I2 = MIN(JJ+ML,N) +! II = JJ*MEB1 - ML + 2 + II = JJ*MEB1 - ML + DO I = I1, I2 + WM(II+I) = (FTEM(I)-SAVF(I))*FAC + END DO + END DO + END DO + NFE = NFE + MBA + NJE = NJE + 1 + END IF + IF (J_IS_CONSTANT) J_HAS_BEEN_COMPUTED = .TRUE. + GOTO 90 + 60 CONTINUE + +! User supplied diagonals information is available. +! WM(3:LENP+2) = ZERO + WM(1:LENP) = ZERO + NJE = NJE + 1 + NSLJ = NST + JCUR = 1 + MBA = MIN(MBAND,N) + MEB1 = MEBAND - 1 + SRUR = WM1 + FAC = DVNORM(N,SAVF,EWT) + R0 = THOU*ABS(H)*UROUND*REAL(N)*FAC + IF (ABS(R0)<=ZERO) R0 = ONE +! For each group of columns... + DO NG = 1, BNGRP +! Find the first and last columns in the group. + JJ1 = BIGP(NG) + JJ2 = BIGP(NG+1) - 1 +! For each column in this group... + DO JJ = JJ1, JJ2 + J = BJGP(JJ) + R = MAX(SRUR*ABS(Y(J)),R0/EWT(J)) + Y(J) = Y(J) + R + END DO + CALL F(N,TN,Y,FTEM) +! For each column in this group... + DO JJ = JJ1, JJ2 + J = BJGP(JJ) + Y(J) = YH(J,1) + R = MAX(SRUR*ABS(Y(J)),R0/EWT(J)) + FAC = ONE/R + IF (BUILD_IAJA) THEN +! Use the IAB, JAB sparse structure arrays to +! determine the first and last nonzeros in +! column J. + K1 = IAB(J) + K2 = IAB(J+1) - 1 + ELSE +! Extract the positions of the first and +! last nonzeros in this column directly. + CALL BANDED_GET_BJNZ(N,ML,MU,J,IWM(31),I) + DO K = 1, N + IF (IWM(30+K) /= 0) THEN + K1 = K + GOTO 70 + END IF + END DO + 70 CONTINUE + DO K = N, 1, -1 + IF (IWM(30+K) /= 0) THEN + K2 = K + GOTO 80 + END IF + END DO + 80 CONTINUE + END IF +! Load the nonzeros for column J in the banded matrix. + IF (BUILD_IAJA) THEN + DO K = K1, K2 + I = JAB(K) +! II = J * MEB1 - ML + 2 + II = J * MEB1 - ML + WM(II+I) = (FTEM(I)-SAVF(I))*FAC + END DO + ELSE + DO K = K1, K2 + I = IWM(30+K) + IF (I /= 0) THEN +! II = J * MEB1 - ML + 2 + II = J * MEB1 - ML + WM(II+I) = (FTEM(I)-SAVF(I))*FAC + END IF + END DO + END IF + END DO + END DO + NFE = NFE + BNGRP + IF (J_IS_CONSTANT) J_HAS_BEEN_COMPUTED = .TRUE. + 90 CONTINUE +! IF (JSV==1) CALL DACOPY(MBAND,N,WM(ML3),MEBAND,WM(LOCJS),MBAND) + IF (JSV==1) CALL DACOPY(MBAND,N,WM(ML1),MEBAND,WM(LOCJS),MBAND) + END IF + + IF (JOK==1) THEN + JCUR = 0 +! CALL DACOPY(MBAND,N,WM(LOCJS),MBAND,WM(ML3),MEBAND) + CALL DACOPY(MBAND,N,WM(LOCJS),MBAND,WM(ML1),MEBAND) + END IF + +! Multiply Jacobian by scalar, add identity, and do LU +! decomposition. + CON = -HRL1 +! CALL DSCAL_F90(LENP,CON,WM(3),1) + CALL DSCAL_F90(LENP,CON,WM(1),1) +! II = MBAND + 2 + II = MBAND + DO I = 1, N + WM(II) = WM(II) + ONE + II = II + MEBAND + END DO + NLU = NLU + 1 +! ______________________________________________________________________ + +! CALL DGBFA_F90(WM(3),MEBAND,N,ML,MU,IWM(31),IER) + CALL DGBFA_F90(WM(1),MEBAND,N,ML,MU,IWM(31),IER) + IF (IER/=0) IERPJ = 1 +! *****LAPACK build change point. Replace above with these statements. +! IF (.NOT.USE_LAPACK) THEN +!! CALL DGBFA_f90(WM(3),MEBAND,N,ML,MU,IWM(31),IER) +! CALL DGBFA_f90(WM(1),MEBAND,N,ML,MU,IWM(31),IER) +! IF (IER /= 0) IERPJ = 1 +! ELSE +!! CALL DGBTRF(N,N,ML,MU,WM(3),MEBAND,IWM(31),IER) +! CALL DGBTRF(N,N,ML,MU,WM(1),MEBAND,IWM(31),IER) +! IF (IER /= 0) IERPJ = 1 +! END IF +! ______________________________________________________________________ + RETURN +! End of code block for MITER = 4 or 5. + + END SUBROUTINE DVJAC +!_______________________________________________________________________ + + SUBROUTINE DACOPY(NROW,NCOL,A,NROWA,B,NROWB) +! .. +! Copy one array to another. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: NCOL, NROW, NROWA, NROWB +! .. +! .. Array Arguments .. + REAL (WP), INTENT (IN) :: A(NROWA,NCOL) + REAL (WP), INTENT (INOUT) :: B(NROWB,NCOL) +! .. +! .. Local Scalars .. + INTEGER :: IC +! .. +! .. FIRST EXECUTABLE STATEMENT DACOPY +! .. + DO IC = 1, NCOL + CALL DCOPY_F90(NROW,A(1,IC),1,B(1,IC),1) + END DO + RETURN + + END SUBROUTINE DACOPY +!_______________________________________________________________________ + + SUBROUTINE DVSOL(WM,IWM,X,IERSL) +! .. +! Manage the solution of the linear system arising from a chord +! iteration for dense and banded solutions. +! .. +! This routine manages the solution of the linear system arising from +! a chord iteration. It is called if MITER /= 0. +! If MITER is 1 or 2, it calls DGESL_F90 to accomplish this. +! If MITER = 3 it updates the coefficient H*RL1 in the diagonal +! matrix, and then computes the solution. +! If MITER is 4 or 5, it calls DGBSL_F90. +! Communication with DVSOL uses the following variables: +! WM = Real work space containing the inverse diagonal matrix if +! MITER = 3 and the LU decomposition of the matrix otherwise. +! Storage of matrix elements starts at WM(1). +! WM also contains the following matrix-related data: +! WM1 = SQRT(UROUND) (not used here), +! WM2 = HRL1, the previous value of H*RL1, used if MITER = 3. +! IWM = Integer work space containing pivot information, starting at +! IWM(31), if MITER is 1, 2, 4, or 5. IWM also contains band +! parameters ML = IWM(1) and MU = IWM(2) if MITER is 4 or 5. +! X = The right-hand side vector on input, and the solution vector +! on output, of length N. +! IERSL = Output flag. IERSL = 0 if no trouble occurred. +! IERSL = 1 if a singular matrix arose with MITER = 3. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (INOUT) :: IERSL +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: WM(*), X(*) + INTEGER, INTENT (INOUT) :: IWM(*) +! .. +! .. Local Scalars .. + REAL (WP) :: DI, HRL1, PHRL1, R + INTEGER :: I, MEBAND, ML, MU +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS +! .. +! ______________________________________________________________________ + +! *****LAPACK build change point. Insert this statement. +! INTEGER INFO +! CHARACTER*1 TRANS +! ______________________________________________________________________ + +! .. FIRST EXECUTABLE STATEMENT DVSOL +! .. + IERSL = 0 + GOTO (10,10,20,50,50) MITER + +10 CONTINUE +! ______________________________________________________________________ + +! CALL DGESL_F90(WM(3),N,N,IWM(31),X,0) + CALL DGESL_F90(WM(1),N,N,IWM(31),X,0) +! *****LAPACK build change point. Replace above with these statements. +! IF (.NOT.USE_LAPACK) THEN +!! CALL DGESL_f90(WM(3),N,N,IWM(31),X,0) +! CALL DGESL_f90(WM(1),N,N,IWM(31),X,0) +! ELSE +! TRANS = 'N' +!! CALL DGETRS(TRANS,N,1,WM(3),N,IWM(31),X,N,INFO) +! CALL DGETRS(TRANS,N,1,WM(1),N,IWM(31),X,N,INFO) +! IF (INFO /= 0) THEN +! WRITE(6,*) 'Stopping in DVSOL with INFO = ', INFO +! STOP +! END IF +! END IF +! ______________________________________________________________________ + + RETURN + +20 PHRL1 = WM2 + HRL1 = H*RL1 + WM2 = HRL1 + IF (ABS(HRL1-PHRL1)<=ZERO) GOTO 30 + R = HRL1/PHRL1 + DO I = 1, N +! DI = ONE - R*(ONE-ONE/WM(I+2)) + DI = ONE - R*(ONE-ONE/WM(I)) + IF (ABS(DI)<=ZERO) GOTO 40 +! WM(I+2) = ONE/DI + WM(I) = ONE/DI + END DO + +30 DO I = 1, N +! X(I) = WM(I+2)*X(I) + X(I) = WM(I)*X(I) + END DO + RETURN +40 IERSL = 1 + RETURN + +50 ML = IWM(1) + MU = IWM(2) + MEBAND = 2*ML + MU + 1 +! ______________________________________________________________________ + +! CALL DGBSL_F90(WM(3),MEBAND,N,ML,MU,IWM(31),X,0) + CALL DGBSL_F90(WM(1),MEBAND,N,ML,MU,IWM(31),X,0) +! *****LAPACK build change point. Replace above with these statements. +! IF (.NOT.USE_LAPACK) THEN +!! CALL DGBSL_F90(WM(3),MEBAND,N,ML,MU,IWM(31),X,0) +! CALL DGBSL_F90(WM(1),MEBAND,N,ML,MU,IWM(31),X,0) +! ELSE +! TRANS = 'N' +!! CALL DGBTRS(TRANS,N,ML,MU,1,WM(3),MEBAND,IWM(31),X,N,INFO) +! CALL DGBTRS(TRANS,N,ML,MU,1,WM(1),MEBAND,IWM(31),X,N,INFO) +! IF (INFO /= 0) THEN +! WRITE(6,*) 'Stopping in DVSOL with INFO = ', INFO +! STOP +! END IF +! END IF +! ______________________________________________________________________ + + RETURN + + END SUBROUTINE DVSOL +!_______________________________________________________________________ + + SUBROUTINE DVSRCO(RSAV,ISAV,JOB) +! .. +! Save or restore (depending on JOB) the contents of the PRIVATE +! variable blocks, which are used internally by DVODE (not called +! by DVODE_F90). +! .. +! RSAV = real array of length 49 or more. +! ISAV = integer array of length 41 or more. +! JOB = flag indicating to save or restore the PRIVATE variable +! blocks: +! JOB = 1 if PRIVATE variables is to be saved +! (written to RSAV/ISAV). +! JOB = 2 if PRIVATE variables is to be restored +! (read from RSAV/ISAV). +! A call with JOB = 2 presumes a prior call with JOB = 1. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: JOB +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: RSAV(*) + INTEGER, INTENT (INOUT) :: ISAV(*) +! .. +! .. FIRST EXECUTABLE STATEMENT DVSRCO +! .. + IF (JOB/=2) THEN +! Save the contents of the PRIVATE blocks. + RSAV(1) = ACNRM + RSAV(2) = CCMXJ + RSAV(3) = CONP + RSAV(4) = CRATE + RSAV(5) = DRC + RSAV(6:18) = EL(1:13) + RSAV(19) = ETA + RSAV(20) = ETAMAX + RSAV(21) = H + RSAV(22) = HMIN + RSAV(23) = HMXI + RSAV(24) = HNEW + RSAV(25) = HSCAL + RSAV(26) = PRL1 + RSAV(27) = RC + RSAV(28) = RL1 + RSAV(29:41) = TAU(1:13) + RSAV(42:46) = TQ(1:5) + RSAV(47) = TN + RSAV(48) = UROUND + RSAV(LENRV1+1) = HU + ISAV(1) = ICF + ISAV(2) = INIT + ISAV(3) = IPUP + ISAV(4) = JCUR + ISAV(5) = JSTART + ISAV(6) = JSV + ISAV(7) = KFLAG + ISAV(8) = KUTH + ISAV(9) = L + ISAV(10) = LMAX + ISAV(11) = LYH + ISAV(12) = 0 + ISAV(13) = 0 + ISAV(14) = 0 + ISAV(15) = LWM + ISAV(16) = LIWM + ISAV(17) = LOCJS + ISAV(18) = MAXORD + ISAV(19) = METH + ISAV(20) = MITER + ISAV(21) = MSBJ + ISAV(22) = MXHNIL + ISAV(23) = MXSTEP + ISAV(24) = N + ISAV(25) = NEWH + ISAV(26) = NEWQ + ISAV(27) = NHNIL + ISAV(28) = NQ + ISAV(29) = NQNYH + ISAV(30) = NQWAIT + ISAV(31) = NSLJ + ISAV(32) = NSLP + ISAV(33) = NYH + ISAV(LENIV1+1) = NCFN + ISAV(LENIV1+2) = NETF + ISAV(LENIV1+3) = NFE + ISAV(LENIV1+4) = NJE + ISAV(LENIV1+5) = NLU + ISAV(LENIV1+6) = NNI + ISAV(LENIV1+7) = NQU + ISAV(LENIV1+8) = NST + RETURN + ELSE +! Replace the contents of the PRIVATE blocks. + ACNRM = RSAV(1) + CCMXJ = RSAV(2) + CONP = RSAV(3) + CRATE = RSAV(4) + DRC = RSAV(5) + EL(1:13) = RSAV(6:18) + ETA = RSAV(19) + ETAMAX = RSAV(20) + H = RSAV(21) + HMIN = RSAV(22) + HMXI = RSAV(23) + HNEW = RSAV(24) + HSCAL = RSAV(25) + PRL1 = RSAV(26) + RC = RSAV(27) + RL1 = RSAV(28) + TAU(1:13) = RSAV(29:41) + TQ(1:5) = RSAV(42:46) + TN = RSAV(47) + UROUND = RSAV(48) + HU = RSAV(LENRV1+1) + ICF = ISAV(1) + INIT = ISAV(2) + IPUP = ISAV(3) + JCUR = ISAV(4) + JSTART = ISAV(5) + JSV = ISAV(6) + KFLAG = ISAV(7) + KUTH = ISAV(8) + L = ISAV(9) + LMAX = ISAV(10) + LYH = ISAV(11) + LWM = ISAV(15) + LIWM = ISAV(16) + LOCJS = ISAV(17) + MAXORD = ISAV(18) + METH = ISAV(19) + MITER = ISAV(20) + MSBJ = ISAV(21) + MXHNIL = ISAV(22) + MXSTEP = ISAV(23) + N = ISAV(24) + NEWH = ISAV(25) + NEWQ = ISAV(26) + NHNIL = ISAV(27) + NQ = ISAV(28) + NQNYH = ISAV(29) + NQWAIT = ISAV(30) + NSLJ = ISAV(31) + NSLP = ISAV(32) + NYH = ISAV(33) + NCFN = ISAV(LENIV1+1) + NETF = ISAV(LENIV1+2) + NFE = ISAV(LENIV1+3) + NJE = ISAV(LENIV1+4) + NLU = ISAV(LENIV1+5) + NNI = ISAV(LENIV1+6) + NQU = ISAV(LENIV1+7) + NST = ISAV(LENIV1+8) + RETURN + END IF + + END SUBROUTINE DVSRCO +!_______________________________________________________________________ + + SUBROUTINE DEWSET(N,ITOL,RTOL,ATOL,YCUR,EWT) +! .. +! Set the error weight vector. +! .. +! This subroutine sets the error weight vector EWT according to +! EWT(i) = RTOL(i)*ABS(YCUR(i)) + ATOL(i), i = 1,...,N, +! with the subscript on RTOL and/or ATOL possibly replaced by 1 +! above, depending on the value of ITOL. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: ITOL, N +! .. +! .. Array Arguments .. + REAL (WP), INTENT (IN) :: ATOL(*), RTOL(*), YCUR(N) + REAL (WP), INTENT (OUT) :: EWT(N) +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS +! .. +! .. FIRST EXECUTABLE STATEMENT DEWSET +! .. + GOTO (10,20,30,40) ITOL +10 CONTINUE + EWT(1:N) = RTOL(1)*ABS(YCUR(1:N)) + ATOL(1) + RETURN +20 CONTINUE + EWT(1:N) = RTOL(1)*ABS(YCUR(1:N)) + ATOL(1:N) + RETURN +30 CONTINUE + EWT(1:N) = RTOL(1:N)*ABS(YCUR(1:N)) + ATOL(1) + RETURN +40 CONTINUE + EWT(1:N) = RTOL(1:N)*ABS(YCUR(1:N)) + ATOL(1:N) + RETURN + + END SUBROUTINE DEWSET +!_______________________________________________________________________ + + FUNCTION DVNORM(N,V,W) +! .. +! Calculate weighted root-mean-square (rms) vector norm. +! .. +! This routine computes the weighted root-mean-square norm +! of the vector of length N contained in the array V, with +! weights contained in the array W of length N. +! DVNORM = SQRT((1/N) * SUM(V(i)*W(i))**2) +! .. + IMPLICIT NONE +! .. +! .. Function Return Value .. + REAL (WP) :: DVNORM +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: N +! .. +! .. Array Arguments .. + REAL (WP), INTENT (IN) :: V(N), W(N) +! .. +! .. Local Scalars .. + REAL (WP) :: SUM + INTEGER :: I +! .. +! .. Intrinsic Functions .. + INTRINSIC SQRT +! .. +! .. FIRST EXECUTABLE STATEMENT DVNORM +! .. + SUM = ZERO + DO I = 1, N + SUM = SUM + (V(I)*W(I))**2 + END DO + DVNORM = SQRT(SUM/N) + RETURN + + END FUNCTION DVNORM +!_______________________________________________________________________ + +! The modified SLATEC error handling routines begin here. + SUBROUTINE XERRDV(MSG,NERR,LEVEL,NI,I1,I2,NR,R1,R2) +! .. +! Write error messages with values. +! .. +! This is an adaptation of subroutine XERRWD (NMES eliminated). +! Subroutines XERRDV, XSETF, XSETUN, and Functions IXSAV +! as given here, constitute a simplified version of the SLATEC +! error handling package. +! +! All arguments are input arguments. +! MSG = The message (character array). +! NERR = The error number (not used). +! LEVEL = The error level +! 0 or 1 means recoverable (control returns to caller). +! 2 means fatal (run is aborted--see note below). +! NI = Number of integers (0, 1, or 2) to be printed with message. +! I1,I2 = Integers to be printed, depending on NI. +! NR = Number of reals (0, 1, or 2) to be printed with message. +! R1,R2 = Reals to be printed, depending on NR. +! Note: This routine is machine-dependent and specialized for use +! in limited context, in the following ways: +! 1. The argument MSG is assumed to be of type CHARACTER, and +! the message is printed with a format of (1X,A). +! 2. The message is assumed to take only one line. +! Multi-line messages are generated by repeated calls. +! 3. If LEVEL = 2, control passes to the statement: STOP +! to abort the run. This statement may be machine-dependent. +! 4. R1 and R2 are assumed to be in real(wp) and are printed +! in D21.13 format. +! Internal Notes: +! For a different default logical unit number, IXSAV(or a subsidiary +! function that it calls) will need to be modified. +! For a different run-abort command, change the statement following +! statement 100 at the end. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP) :: R1, R2 + INTEGER :: I1, I2, LEVEL, NERR, NI, NR + CHARACTER (*) :: MSG + LOGICAL PRINT_NERR +! .. +! .. Local Scalars .. + INTEGER :: LUNIT, MESFLG +! .. +! .. FIRST EXECUTABLE STATEMENT XERRDV +! .. +! Get logical unit number and message print flag. + LUNIT = IXSAV(1,0,.FALSE.) + MESFLG = IXSAV(2,0,.FALSE.) + IF (MESFLG==0) GOTO 10 + + PRINT_NERR = .FALSE. + IF (PRINT_NERR) PRINT *, MSG, 'Message number = ', NERR + +! Write the message. + WRITE (LUNIT,90000) MSG +90000 FORMAT (1X,A) + IF (NI==1) WRITE (LUNIT,90001) I1 +90001 FORMAT ('In the above message, I1 = ',I10) + IF (NI==2) WRITE (LUNIT,90002) I1, I2 +90002 FORMAT ('In the above message, I1 = ',I10,3X,'I2 = ',I10) + IF (NR==1) WRITE (LUNIT,90003) R1 +90003 FORMAT ('In the above message, R1 = ',D21.13) + IF (NR==2) WRITE (LUNIT,90004) R1, R2 +90004 FORMAT ('In the above message, R1 = ',D21.13,3X,'R2 = ',D21.13) + +! Abort the run if LEVEL = 2. +10 IF (LEVEL/=2) RETURN + WRITE (LUNIT,90005) +90005 FORMAT ('LEVEL = 2 in XERRDV. Stopping.') + STOP + + END SUBROUTINE XERRDV +!_______________________________________________________________________ + + SUBROUTINE XSETF(MFLAG) +! .. +! Reset the error print control flag. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: MFLAG +! .. +! .. Local Scalars .. + INTEGER :: JUNK +! .. +! .. FIRST EXECUTABLE STATEMENT XSETF +! .. + IF (MFLAG==0 .OR. MFLAG==1) JUNK = IXSAV(2,MFLAG,.TRUE.) +! Get rid of a compiler warning message: + IF (JUNK/=JUNK) STOP + RETURN + + END SUBROUTINE XSETF +!_______________________________________________________________________ + + SUBROUTINE XSETUN(LUN) +! .. +! Reset the logical unit number for error messages. +! .. +! XSETUN sets the logical unit number for error messages to LUN. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: LUN +! .. +! .. Local Scalars .. + INTEGER :: JUNK +! .. +! .. FIRST EXECUTABLE STATEMENT XSETUN +! .. + IF (LUN>0) JUNK = IXSAV(1,LUN,.TRUE.) +! Get rid of a compiler warning message: + IF (JUNK/=JUNK) STOP + RETURN + + END SUBROUTINE XSETUN +!_______________________________________________________________________ + + FUNCTION IXSAV(IPAR,IVALUE,ISET) +! .. +! Save and recall error message control parameters. +! .. +! IXSAV saves and recalls one of two error message parameters: +! LUNIT, the logical unit number to which messages are printed, +! and MESFLG, the message print flag. +! This is a modification of the SLATEC library routine J4SAVE. +! Saved local variables: +! LUNIT = Logical unit number for messages. The default is +! obtained by a call to IUMACH(may be machine-dependent). +! MESFLG = Print control flag: +! 1 means print all messages (the default). +! 0 means no printing. +! On input: +! IPAR = Parameter indicator (1 for LUNIT, 2 for MESFLG). +! IVALUE = The value to be set for the parameter, if ISET = .TRUE. +! ISET = Logical flag to indicate whether to read or write. +! If ISET = .TRUE., the parameter will be given +! the value IVALUE. If ISET = .FALSE., the parameter +! will be unchanged, and IVALUE is a dummy argument. +! On return: +! IXSAV = The (old) value of the parameter. +! .. + IMPLICIT NONE +! .. +! .. Function Return Value .. + INTEGER :: IXSAV +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: IPAR, IVALUE + LOGICAL, INTENT (IN) :: ISET +! .. +! .. Local Scalars .. + INTEGER, SAVE :: LUNIT, MESFLG +! .. +! .. Data Statements .. + DATA LUNIT/ -1/, MESFLG/1/ +! .. +! .. FIRST EXECUTABLE STATEMENT IXSAV +! .. + IF (IPAR==1) THEN + IF (LUNIT==-1) LUNIT = IUMACH() +! i Get rid of a compiler warning message: + IF (LUNIT/=LUNIT) STOP + IXSAV = LUNIT + IF (ISET) LUNIT = IVALUE + END IF + + IF (IPAR==2) THEN + IXSAV = MESFLG + IF (ISET) MESFLG = IVALUE + END IF + RETURN + + END FUNCTION IXSAV +!_______________________________________________________________________ + + FUNCTION IUMACH() +! .. +! Provide the standard output unit number. +! .. +! INTEGER LOUT, IUMACH +! LOUT = IUMACH() +! Function Return Values: +! LOUT: the standard logical unit for Fortran output. +! Internal Notes: +! The built-in value of 6 is standard on a wide range +! of Fortran systems. This may be machine-dependent. +! .. +! .. Function Return Value .. + INTEGER :: IUMACH +! .. +! .. FIRST EXECUTABLE STATEMENT IUMACH +! .. + IUMACH = 6 + RETURN + + END FUNCTION IUMACH +! The modified error handling routines end here. +!_______________________________________________________________________ + + SUBROUTINE CHECK_STAT(IER,CALLED_FROM_WHERE) +! .. +! Print an error message if a storage allocation error +! occurred. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: CALLED_FROM_WHERE, IER +! .. +! .. Local Scalars .. + INTEGER :: I1 + CHARACTER (80) :: MSG +! .. +! .. FIRST EXECUTABLE STATEMENT CHECK_STAT +! .. + IF (IER/=0) THEN + I1 = CALLED_FROM_WHERE + MSG = 'A storage allocation error occurred.' + CALL XERRDV(MSG,1410,1,0,0,0,0,ZERO,ZERO) + MSG = 'The error occurred at location I1.' + CALL XERRDV(MSG,1410,2,1,I1,0,0,ZERO,ZERO) + END IF + RETURN + + END SUBROUTINE CHECK_STAT +!_______________________________________________________________________ + + SUBROUTINE DVPREPS(NEQ,Y,YH,LDYH,SAVF,EWT,F,JAC) +! .. +! Determine the sparsity structure and allocate the necessary arrays +! for MA28 based sparse solutions. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: LDYH, NEQ +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: EWT(*), Y(*) + REAL (WP) :: SAVF(*) + REAL (WP), INTENT (IN) :: YH(LDYH,*) +! .. +! .. Subroutine Arguments .. + EXTERNAL F, JAC +! .. +! .. Local Scalars .. + REAL (WP) :: DQ, DYJ, ERWT, FAC, YJ + INTEGER :: I, IER, J, JFOUND, K, KMAX, KMIN, KNEW, NP1, NZ + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, ALLOCATED, MAX, SIGN +! .. +! .. FIRST EXECUTABLE STATEMENT DVPREPS +! .. + NZ_SWAG = MAX(MAX(1000,NZ_SWAG),10*N) + NP1 = N + 1 + NZ_ALL = NZ_SWAG +! ADDTONNZ = MAX(1000,NZ_SWAG) + ADDTONNZ = NZ_SWAG +10 CONTINUE + IF (ALLOCATED(IAN)) THEN + DEALLOCATE (IAN,JAN,IGP,JGP,FTEMP1,IKEEP28,IW28,ICN,PMAT, & + JVECT,STAT=IER) + CALL CHECK_STAT(IER,490) + IF (ALLOCATED(JMAT)) THEN + DEALLOCATE (JMAT,STAT=IER) + CALL CHECK_STAT(IER,500) + END IF + END IF + NZ_ALL = NZ_ALL + ADDTONNZ + LICN_ALL = ELBOW_ROOM * NZ_ALL + LIRN_ALL = ELBOW_ROOM * NZ_ALL + IF (LICN_ALL>MAX_ARRAY_SIZE .OR. LIRN_ALL>MAX_ARRAY_SIZE) THEN + MSG = 'Maximum array size exceeded. Stopping.' + CALL XERRDV(MSG,1420,2,0,0,0,0,ZERO,ZERO) + END IF +! Note: ICN may need to be reallocated in DVJACS28. + ALLOCATE (IAN(NP1),JAN(LIRN_ALL),IGP(NP1),JGP(N),FTEMP1(N), & + IKEEP28(N,5),IW28(N,8),ICN(LICN_ALL),PMAT(LICN_ALL), & + JVECT(LIRN_ALL),STAT=IER) + CALL CHECK_STAT(IER,510) + IF (JSV==1) THEN + ALLOCATE (JMAT(NZ_ALL),STAT=IER) + CALL CHECK_STAT(IER,520) + JMAT(1:NZ_ALL) = ZERO + END IF + + IF (MOSS==0) GOTO 30 + IF (ISTATC==3) GOTO 20 + +! ISTATE = 1 and MOSS /= 0. + +! Perturb Y for structure determination: + DO I = 1, N + ERWT = ONE/EWT(I) + FAC = ONE + ONE/(I+ONE) + Y(I) = Y(I) + FAC*SIGN(ERWT,Y(I)) + END DO + GOTO (60,70) MOSS + +20 CONTINUE +! ISTATE = 3 and MOSS /= 0. + +! Load Y from YH(*,1): + Y(1:N) = YH(1:N,1) + GOTO (60,70) MOSS + +! MOSS = 0 + +! Process user's IA,JA. Add diagonal entries if necessary: +30 CONTINUE + IF (IAJA_CALLED) THEN + ELSE + MSG = 'You have indicated that you wish to supply the' + CALL XERRDV(MSG,1430,1,0,0,0,0,ZERO,ZERO) + MSG = 'sparsity arrays IA and JA directly but you did' + CALL XERRDV(MSG,1430,1,0,0,0,0,ZERO,ZERO) + MSG = 'not call SET_IAJA after calling SET_OPTS.' + CALL XERRDV(MSG,1430,2,0,0,0,0,ZERO,ZERO) + END IF + KNEW = 1 + KMIN = IA(1) + IAN(1) = 1 + DO J = 1, N + JFOUND = 0 + KMAX = IA(J+1) - 1 + IF (KMIN>KMAX) GOTO 40 + DO K = KMIN, KMAX + I = JA(K) + IF (I==J) JFOUND = 1 + IF (KNEW>NZ_ALL) THEN + IF (LP /= 0) THEN + MSG = 'NZ_ALL (=I1) is not large enough.' + CALL XERRDV(MSG,1440,1,0,0,0,0,ZERO,ZERO) + MSG = 'Allocating more space for another try.' + CALL XERRDV(MSG,1440,1,1,NZ_ALL,0,0,ZERO,ZERO) + END IF + GOTO 10 + END IF + JAN(KNEW) = I + KNEW = KNEW + 1 + END DO + IF (JFOUND==1) GOTO 50 +40 IF (KNEW>NZ_ALL) THEN + IF (LP /= 0) THEN + MSG = 'NZ_ALL (=I1) is not large enough.' + CALL XERRDV(MSG,1450,1,0,0,0,0,ZERO,ZERO) + MSG = 'Allocating more space for another try.' + CALL XERRDV(MSG,1450,1,1,NZ_ALL,0,0,ZERO,ZERO) + END IF + GOTO 10 + END IF + JAN(KNEW) = J + KNEW = KNEW + 1 +50 IAN(J+1) = KNEW + KMIN = KMAX + 1 + END DO + GOTO 90 + +60 CONTINUE + +! MOSS = 1. + +! Compute structure from user-supplied Jacobian routine JAC. + NZ = 0 + CALL JAC(NEQ,TN,Y,IAN,JAN,NZ,PMAT) + IF (NZ<=0) THEN + MSG = 'Illegal value of NZ from JAC in DVPREPS.' + CALL XERRDV(MSG,1460,2,0,0,0,0,ZERO,ZERO) + END IF + IF (NZ>NZ_ALL) THEN + IF (LP /= 0) THEN + MSG = 'NZ_ALL (=I1) is not large enough.' + CALL XERRDV(MSG,1470,1,0,0,0,0,ZERO,ZERO) + MSG = 'Allocating more space for another try.' + CALL XERRDV(MSG,1470,1,1,NZ_ALL,0,0,ZERO,ZERO) + END IF + GOTO 10 + END IF + CALL JAC(NEQ,TN,Y,IAN,JAN,NZ,PMAT) + CALL SET_ICN(N,IAN,ICN) + CALL CHECK_DIAG(N,IAN,JAN,ICN) + GOTO 90 + +! MOSS = 2. + +! Compute structure from results of N+1 calls to F. +70 K = 1 + IAN(1) = 1 + DO I = 1, N + ERWT = ONE/EWT(I) + FAC = ONE + ONE/(I+ONE) + Y(I) = Y(I) + FAC*SIGN(ERWT,Y(I)) + END DO + CALL F(NEQ,TN,Y,SAVF) + NFE = NFE + 1 + DO J = 1, N + IF (K>NZ_ALL) THEN + IF (LP /= 0) THEN + MSG = 'NZ_ALL (=I1) is not large enough.' + CALL XERRDV(MSG,1480,1,0,0,0,0,ZERO,ZERO) + MSG = 'Allocating more space for another try.' + CALL XERRDV(MSG,1480,1,1,NZ_ALL,0,0,ZERO,ZERO) + END IF + GOTO 10 + END IF + YJ = Y(J) + ERWT = ONE/EWT(J) + DYJ = SIGN(ERWT,YJ) + Y(J) = YJ + DYJ + CALL F(NEQ,TN,Y,FTEMP1) + NFE = NFE + 1 + Y(J) = YJ + DO 80 I = 1, N + DQ = (FTEMP1(I)-SAVF(I))/DYJ + IF ((ABS(DQ)<=SETH) .AND. (I/=J)) GOTO 80 + JAN(K) = I + K = K + 1 +80 END DO + IAN(J+1) = K + END DO +90 CONTINUE + IF (MOSS==0 .OR. ISTATC/=1) GOTO 100 +! If ISTATE = 1 and MOSS /= 0, restore Y from YH. + Y(1:N) = YH(1:N,1) +100 NNZ = IAN(NP1) - 1 + LENIGP = 0 + MAXG = 0 + IF (MITER==7) THEN +! Compute grouping of column indices. + MAXG = NP1 + CALL DGROUP(N,IAN,JAN,MAXG,NGP,IGP,JGP,IKEEP28(1,1), & + IKEEP28(1,2),IER) + IF (IER/=0) THEN + MSG = 'An error occurred in DGROUP.' + CALL XERRDV(MSG,1490,2,0,0,0,0,ZERO,ZERO) + END IF + LENIGP = NGP + 1 + END IF + + IF (USE_JACSP .AND. MITER==7) THEN +! Use Doug Salane's Jacobian routines to determine the column +! grouping; and allocate and initialize the necessary arrays +! for use in DVJACS48. + IF (ALLOCATED(INDROWDS)) THEN + DEALLOCATE (INDROWDS, INDCOLDS, NGRPDS, IPNTRDS, JPNTRDS, IWADS, & + IWKDS, IOPTDS, YSCALEDS, WKDS, FACDS) + CALL CHECK_STAT(IER,530) + END IF +! We could delete IWADS and use IW28 array. + ALLOCATE (INDROWDS(NNZ), INDCOLDS(NNZ), NGRPDS(N+1), IPNTRDS(N+1), & + JPNTRDS(N+1), IWADS(6*N), IWKDS(50+N), IOPTDS(5), YSCALEDS(N), & + WKDS(3*N), FACDS(N) ,STAT=IER) + CALL CHECK_STAT(IER,540) + INDROWDS(1:NNZ) = JAN(1:NNZ) + CALL SET_ICN(N,IAN,INDCOLDS) + CALL CHECK_DIAG(N,IAN,INDROWDS,INDCOLDS) + LIWADS = 6 * N + CALL DVDSM(N,N,NNZ,INDROWDS,INDCOLDS,NGRPDS,MAXGRPDS,MINGRPDS, & + INFODS,IPNTRDS,JPNTRDS,IWADS,LIWADS) + IF (INFODS /= 1) THEN + MSG = 'An error occurred in subroutine DSM. INFO = I1.' + CALL XERRDV(MSG,1500,2,1,INFODS,0,0,ZERO,ZERO) + END IF +! For use in DVJACS28: + IOPTDS(4) = 0 +! Define the IGP and JGP arrays needed by DVJACS28. + CALL DGROUPDS(N,MAXGRPDS,NGRPDS,IGP,JGP) + NGP = MAXGRPDS + LENIGP = MAXGRPDS + 1 + END IF + +! Trim the arrays to the final sizes. + + IF (NZ_ALL>NNZ) THEN + NZ_ALL = NNZ + MAX_NNZ = MAX(MAX_NNZ,NNZ) + LIRN_ALL = ELBOW_ROOM * NNZ + ICN(1:NNZ) = JAN(1:NNZ) + DEALLOCATE (JAN,STAT=IER) + CALL CHECK_STAT(IER,550) + ALLOCATE (JAN(LIRN_ALL),STAT=IER) + CALL CHECK_STAT(IER,560) + JAN(1:NNZ) = ICN(1:NNZ) + CALL CHECK_STAT(IER,570) + DEALLOCATE (ICN,PMAT,JVECT,STAT=IER) + CALL CHECK_STAT(IER,580) + IF (ALLOCATED(JMAT)) THEN + DEALLOCATE (JMAT,STAT=IER) + CALL CHECK_STAT(IER,590) + END IF + LICN_ALL = ELBOW_ROOM * NNZ + ALLOCATE (ICN(LICN_ALL),PMAT(LICN_ALL),JVECT(LIRN_ALL),STAT=IER) + CALL CHECK_STAT(IER,600) + IF (JSV==1) THEN + ALLOCATE (JMAT(NNZ),STAT=IER) + CALL CHECK_STAT(IER,610) + JMAT(1:NNZ) = ZERO + END IF + IF (MITER==7) THEN + JVECT(1:LENIGP) = IGP(1:LENIGP) + DEALLOCATE (IGP,STAT=IER) + CALL CHECK_STAT(IER,620) + ALLOCATE (IGP(LENIGP),STAT=IER) + CALL CHECK_STAT(IER,630) + IGP(1:LENIGP) = JVECT(1:LENIGP) + END IF + END IF + IF (SCALE_MATRIX) THEN + IF (ALLOCATED(CSCALEX)) THEN + DEALLOCATE (CSCALEX,RSCALEX,WSCALEX,STAT=IER) + CALL CHECK_STAT(IER,640) + ALLOCATE (CSCALEX(N),RSCALEX(N),WSCALEX(N,5),STAT=IER) + CALL CHECK_STAT(IER,650) + ELSE + ALLOCATE (CSCALEX(N),RSCALEX(N),WSCALEX(N,5),STAT=IER) + CALL CHECK_STAT(IER,660) + END IF + END IF + IF (LP /= 0) THEN + MSG = 'The final DVPRPEPS storage allocations are:' + CALL XERRDV(MSG,1510,1,0,0,0,0,ZERO,ZERO) + MSG = ' NZ_ALL (=I1):' + CALL XERRDV(MSG,1510,1,1,NZ_ALL,0,0,ZERO,ZERO) + MSG = ' LIRN_ALL (=I1) and LICN_ALL (=I2):' + CALL XERRDV(MSG,1510,1,2,LIRN_ALL,LICN_ALL,0,ZERO,ZERO) + END IF + RETURN + + END SUBROUTINE DVPREPS +!_______________________________________________________________________ + + SUBROUTINE DVRENEW(NEQ,Y,SAVF,EWT,F) +! .. +! In the event MA28BD encounters a zero pivot in the LU factorization +! of the iteration matrix due to an out-of-date MA28AD pivot sequence, +! re-calculate the sparsity structure using finite differences. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: NEQ +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: EWT(*), SAVF(*), Y(*) +! .. +! .. Subroutine Arguments .. + EXTERNAL F +! .. +! .. Local Scalars .. + REAL (WP) :: DQ, DYJ, ERWT, FAC, YJ + INTEGER :: ADDTONZ, I, IER, J, K, KVAL, NP1 + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, ALLOCATED, MAX, SIGN +! .. +! .. FIRST EXECUTABLE STATEMENT DVRENEW +! .. +! .. Caution: +! This routine must not be called before DVPREPS has been called. +! +! Note: +! On entry to DVRENEW, the allocated array sizes of arrays that +! may change size are: +! ICN, PMAT = length LICN_ALL +! JAN, JVECT = length LIRN_ALL +! JMAT = length NZ_ALL +! IGP = length LENIGP on first entry; N+1 thereafter + +! Check if a numerical Jacobian is being used and stop if +! it is not. + IF (MITER /= 7) THEN + MSG = 'DVRENEW can be used only if MITER = 7.' + CALL XERRDV(MSG,1520,2,0,0,0,0,ZERO,ZERO) + END IF + +! Save Y and SAVF. + IF (.NOT.ALLOCATED(YTEMP)) THEN + ALLOCATE (YTEMP(N),DTEMP(N),STAT=IER) + CALL CHECK_STAT(IER,670) + END IF + YTEMP(1:N) = Y(1:N) + DTEMP(1:N) = SAVF(1:N) + +! Define the amount to be added to the array lengths +! if necessary. + NP1 = N + 1 + NNZ = IAN(NP1) - 1 + ADDTONZ = ELBOW_ROOM * NNZ + +! Just change the size of IGP to N+1 if have not already +! done so. + IF (SIZE(IGP) /= NP1) THEN + DEALLOCATE (IAN,STAT=IER) + CALL CHECK_STAT(IER,680) + ALLOCATE (IAN(NP1),STAT=IER) + CALL CHECK_STAT(IER,690) + END IF + +! Go to the differencing section to determine the new +! sparsity structure. + GOTO 20 + +10 CONTINUE + +! Reallocate the arrays if necessary. + IF (KVAL > LIRN_ALL) THEN +! Note: JAN and JVECT may need to be reallocated in DVJACS28. + LIRN_ALL = LIRN_ALL + ADDTONZ + IF (LIRN_ALL>MAX_ARRAY_SIZE) THEN + MSG = 'Maximum array size exceeded. Stopping in DVRENEW.' + CALL XERRDV(MSG,1530,2,0,0,0,0,ZERO,ZERO) + END IF + DEALLOCATE (JAN,JVECT,STAT=IER) + CALL CHECK_STAT(IER,700) + ALLOCATE (JAN(LIRN_ALL),JVECT(LIRN_ALL),STAT=IER) + CALL CHECK_STAT(IER,710) + END IF + IF (KVAL > LICN_ALL) THEN +! Note: ICN and PMAT may need to be reallocated in DVJACS28. + LICN_ALL = LICN_ALL + ADDTONZ + IF (LICN_ALL>MAX_ARRAY_SIZE) THEN + MSG = 'Maximum array size exceeded. Stopping in DVRENEW.' + CALL XERRDV(MSG,1540,2,0,0,0,0,ZERO,ZERO) + END IF + DEALLOCATE (ICN,PMAT,STAT=IER) + CALL CHECK_STAT(IER,720) + ALLOCATE (ICN(LICN_ALL),PMAT(LICN_ALL),STAT=IER) + CALL CHECK_STAT(IER,730) + END IF + + 20 CONTINUE + +! Perturb Y for structure determination: + DO I = 1, N + ERWT = ONE/EWT(I) + FAC = ONE + ONE/(I+ONE) + Y(I) = Y(I) + FAC*SIGN(ERWT,Y(I)) + END DO + +! Compute structure from results of N+1 calls to F. + K = 1 + IAN(1) = 1 + DO I = 1, N + ERWT = ONE/EWT(I) + FAC = ONE + ONE/(I+ONE) + Y(I) = Y(I) + FAC*SIGN(ERWT,Y(I)) + END DO + CALL F(NEQ,TN,Y,SAVF) + NFE = NFE + 1 + DO J = 1, N + KVAL = K + IF (KVAL > LIRN_ALL) THEN + IF (LP /= 0) THEN + MSG = 'LIRN_ALL (=I1) is not large enough.' + CALL XERRDV(MSG,1550,1,0,0,0,0,ZERO,ZERO) + MSG = 'Allocating more space for another try.' + CALL XERRDV(MSG,1550,1,1,LIRN_ALL,0,0,ZERO,ZERO) + END IF + GOTO 10 + END IF + IF (KVAL > LICN_ALL) THEN + IF (LP /= 0) THEN + MSG = 'LICN_ALL (=I1) is not large enough.' + CALL XERRDV(MSG,1560,1,0,0,0,0,ZERO,ZERO) + MSG = 'Allocating more space for another try.' + CALL XERRDV(MSG,1560,1,1,LICN_ALL,0,0,ZERO,ZERO) + END IF + GOTO 10 + END IF + YJ = Y(J) + ERWT = ONE/EWT(J) + DYJ = SIGN(ERWT,YJ) + Y(J) = YJ + DYJ + CALL F(NEQ,TN,Y,FTEMP1) + NFE = NFE + 1 + Y(J) = YJ + DO 80 I = 1, N + DQ = (FTEMP1(I)-SAVF(I))/DYJ + IF ((ABS(DQ)<=SETH) .AND. (I/=J)) GOTO 80 + JAN(K) = I + K = K + 1 +80 END DO + IAN(J+1) = K + END DO + + NNZ = IAN(NP1) - 1 + IF (NNZ > NZ_ALL .AND. JSV == 1) THEN +! Increase the size of JMAT if necessary. + NZ_ALL = NNZ + IF (LP /= 0) THEN + MSG = 'NZ_ALL (=I1) is not large enough.' + CALL XERRDV(MSG,1570,1,0,0,0,0,ZERO,ZERO) + MSG = 'Allocating more space for another try.' + CALL XERRDV(MSG,1570,1,1,NZ_ALL,0,0,ZERO,ZERO) + END IF + IF (NZ_ALL>MAX_ARRAY_SIZE) THEN + MSG = 'Maximum array size exceeded. Stopping in DVRENEW.' + CALL XERRDV(MSG,1580,2,0,0,0,0,ZERO,ZERO) + END IF + DEALLOCATE (JMAT,STAT=IER) + CALL CHECK_STAT(IER,740) + ALLOCATE (JMAT(NZ_ALL),STAT=IER) + CALL CHECK_STAT(IER,750) + JMAT(1:NZ_ALL) = ZERO + END IF + +! Compute grouping of column indices. + + LENIGP = 0 + MAXG = 0 + MAXG = NP1 + CALL DGROUP(N,IAN,JAN,MAXG,NGP,IGP,JGP,IKEEP28(1,1), & + IKEEP28(1,2),IER) + IF (IER/=0) THEN + MSG = 'An error occurred in DGROUP.' + CALL XERRDV(MSG,1590,2,0,0,0,0,ZERO,ZERO) + END IF + LENIGP = NGP + 1 + +! Restore Y and SAVF. + Y(1:N) = YTEMP(1:N) + SAVF(1:N) = DTEMP(1:N) + RETURN + + END SUBROUTINE DVRENEW +!_______________________________________________________________________ + + SUBROUTINE DGROUP(N,IA,JA,MAXG,NGRP,IGP,JGP,INCL,JDONE,IER) +! .. +! Construct groupings of the column indices of the Jacobian matrix, +! used in the numerical evaluation of the Jacobian by finite +! differences for sparse solutions. +! .. +! Input: +! N = the order of the matrix +! IA,JA = sparse structure descriptors of the matrix by rows +! MAXG = length of available storage in the IGP array +! INCL and JDONE are working arrays of length N. +! Output: +! NGRP = number of groups +! JGP = array of length N containing the column indices by +! groups +! IGP = pointer array of length NGRP + 1 to the locations +! in JGP of the beginning of each group +! IER = error indicator. IER = 0 if no error occurred, or +! 1 if MAXG was insufficient +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (INOUT) :: IER, NGRP + INTEGER, INTENT (IN) :: MAXG, N +! .. +! .. Array Arguments .. + INTEGER, INTENT (IN) :: IA(*), JA(*) + INTEGER, INTENT (INOUT) :: IGP(*), INCL(*), JDONE(*), JGP(*) +! .. +! .. Local Scalars .. + INTEGER :: I, J, K, KMAX, KMIN, NCOL, NG +! .. +! .. FIRST EXECUTABLE STATEMENT DGROUP +! .. + IER = 0 + JDONE(1:N) = 0 + NCOL = 1 + DO NG = 1, MAXG + IGP(NG) = NCOL + INCL(1:N) = 0 + DO 20 J = 1, N +! Reject column J if it is already in a group. + IF (JDONE(J)==1) GOTO 20 + KMIN = IA(J) + KMAX = IA(J+1) - 1 + DO 10 K = KMIN, KMAX +! Reject column J if it overlaps any column already +! in this group. + I = JA(K) + IF (INCL(I)==1) GOTO 20 +10 END DO +! Accept column J into group NG. + JGP(NCOL) = J + NCOL = NCOL + 1 + JDONE(J) = 1 + DO K = KMIN, KMAX + I = JA(K) + INCL(I) = 1 + END DO +20 END DO +! Stop if this group is empty (grouping is complete). + IF (NCOL==IGP(NG)) GOTO 30 + END DO +! Error return if not all columns were chosen (MAXG too small). + IF (NCOL<=N) GOTO 40 + NG = MAXG +30 NGRP = NG - 1 + RETURN +40 IER = 1 + RETURN + + END SUBROUTINE DGROUP +!_______________________________________________________________________ + + SUBROUTINE BGROUP(N,BJA,BINCL,BDONE,ML,MU) +! .. +! Construct groupings of the column indices of the Jacobian +! matrix, used in the numerical evaluation of the Jacobian +! by finite differences for banded solutions when the nonzero +! sub and super diagonals are known. BGROUP is similar to +! DGROUP but it does not require the sparse structure arrays +! and it uses real rather than integer work arrays. +! .. +! Input: +! +! N = the order of the matrix (number of odes) +! BINCL = real working array of length N +! BJA = real working array of length N +! BDONE = real working array of length N +! ML = integer lower bandwidth +! MU = integer upper bandwidth +! +! Output: (PRIVATE information used in DVJAC) +! +! BNGRP = integer number of groups +! BJGP = integer array of length N containing the +! column indices by groups +! BIGP = integer pointer array of length BNGRP + 1 +! to the locations in BJGP of the beginning +! of each group +! +! Note: +! On output: +! For I = 1, ..., BNGRP: +! Start of group I: +! J = BIGP(I) +! Number of columns in group I: +! K = BIGP(I+1) - BIGP(I) +! Columns in group I: +! BJGP(J-1+L), L=1, ..., K +! +! Note: +! The three arrays BJA, BINCL, and BDONE are REAL to avoid the +! necessity to allocate three new INTEGER arrays. BGROUP can +! be called only at an integration start or restart because +! DVODE_F90 work arrays are used for these arrays. +! +! Note: +! The PRIVATE banded information SUBDS, NSUBDS, SUPDS, NSUPS, +! ML, and MU must be defined before calling BGROUP. +! ML = lower bandwidth +! MU = upper bandwidth +! NSUBS = number of strict sub diagonals +! SUBDS(I) = row in which the Ith sub diagonal +! begins, I=1, ..., NSUBS +! NSUPS = number of strict super diagonals +! SUPDS(I) = column in which the Ith super diagonal +! begins, I=1, ..., NSUPS +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: N, ML, MU +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: BINCL(*), BJA(*), BDONE(*) +! .. +! .. Local Scalars .. + INTEGER :: I, IBDONE, IBINCL, IER, J, K, KBEGIN, KFINI, & + KI, KJ, MAXG, NCOL, NG + REAL (WP) :: FUDGE, ONE_PLUS_FUDGE + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, REAL +! .. +! .. FIRST EXECUTABLE STATEMENT BGROUP +! .. +! Storage for the column grouping information. + IF (ALLOCATED(BIGP)) THEN + DEALLOCATE (BIGP,BJGP,STAT=IER) + CALL CHECK_STAT(IER,760) + END IF + ALLOCATE (BIGP(N+1),BJGP(N),STAT=IER) + CALL CHECK_STAT(IER,770) + + FUDGE = 0.4_WP + ONE_PLUS_FUDGE = 1.0_WP + FUDGE + MAXG = N + 1 +! BDONE(1:N) = 0 ... + BDONE(1:N) = FUDGE + NCOL = 1 + DO NG = 1, MAXG + BIGP(NG) = NCOL +! BINCL(1:N) = 0 ... + BINCL(1:N) = FUDGE + DO 30 J = 1, N +! Reject column J if it is already in a group. +! IF (BDONE(J) == 1) GOTO 30 ... + IBDONE = INT(BDONE(J)) + IF (IBDONE == 1) GOTO 30 +! Vertical extent of band = KBEGIN to KFINI. +! KJ = number of nonzeros in column J. +! BJA(K) = K implies nonzero at (k,j). + KBEGIN = MAX(J-MU,1) + KFINI = MIN(J+ML,N) + KJ = 0 +! BJA(1:N) = 0 ... + BJA(1:N) = FUDGE +! Locate the row positions of the nonzeros in column J. +! Restrict attention to the band: + DO 10 K = KBEGIN, KFINI + IF (K < J) THEN + IF (NSUPS > 0) THEN + DO I = NSUPS, 1, -1 +! KI = SUPDS(I) + J - 1 + KI = J + 1 - SUPDS(I) + IF (K == KI) THEN + KJ = KJ + 1 +! BJA(K) = K ... + BJA(K) = REAL(K) + FUDGE + END IF + END DO + END IF + ELSEIF (K == J) THEN + KJ = KJ + 1 +! BJA(K) = K ... + BJA(K) = REAL(K) + FUDGE + ELSE + IF (NSUBS > 0) THEN + DO I = NSUBS, 1, -1 + KI = SUBDS(I) + J -1 + IF (K == KI) THEN + KJ = KJ + 1 +! BJA(K) = K ... + BJA(K) = REAL(K) + FUDGE + END IF + END DO + END IF + END IF + 10 CONTINUE +! At this point BJA contains the row numbers for +! the nonzeros in column J. + DO 20 K = KBEGIN, KFINI +! Reject column J if it overlaps any column +! already in this group. +! I = BJA(K) + I = INT(BJA(K)) + IBINCL = INT(BINCL(I)) +! IF (BINCL(I) == 1 .AND. I == K) GOTO 30 + IF (IBINCL == 1 .AND. I == K) GOTO 30 + 20 END DO +! Accept column J into group NG. + BJGP(NCOL) = J + NCOL = NCOL + 1 +! BDONE(J) = 1 ... + BDONE(J) = ONE_PLUS_FUDGE + DO K = 1, N +! IF (I == K) BINCL(I) = 1 ... + I = INT(BJA(K)) + IF (I == K) BINCL(I) = ONE_PLUS_FUDGE + END DO + 30 END DO +! Done if this group is empty (grouping is complete). + IF (NCOL == BIGP(NG)) GOTO 40 + END DO + +! Should not get here since MAXG = N + 1. +! Terminal error if not all columns were chosen +! because MAXG too small. + IF (NCOL <= N) THEN + MSG = 'An impossible error occurred in subroutine BGROUP.' + CALL XERRDV(MSG,1600,2,0,0,0,0,ZERO,ZERO) + END IF + + NG = MAXG + 40 BNGRP = NG - 1 + +! Trim BIGP to it's actual size if necessary. + IF (NG < MAXG) THEN +! BJA(1:NG) = BIGP(1:NG) ... + DO I = 1, NG + BJA(I) = REAL(BIGP(I)) + FUDGE + END DO + DEALLOCATE (BIGP,STAT=IER) + CALL CHECK_STAT(IER,780) + ALLOCATE (BIGP(NG),STAT=IER) + CALL CHECK_STAT(IER,790) +! BIGP(1:NG) = BJA(1:NG) ... + DO I = 1, NG + BIGP(I) = INT(BJA(I)) + END DO + END IF + RETURN + + END SUBROUTINE BGROUP +!_______________________________________________________________________ + + SUBROUTINE BANDED_IAJA(N,ML,MU) +! .. +! Build the sparse structure descriptor arrays for a banded +! matrix if the nonzero diagonals are known. +! .. +! Input: +! +! N = the order of the matrix (number of odes) +! ML = integer lower bandwidth +! MU = integer upper bandwidth +! +! Output: (PRIVATE information used in DVJAC) +! +! IAB = IA descriptor array +! JAB = JA descriptor array +! +! Note: +! The PRIVATE banded information SUBDS, NSUBS, SUPDS, NSUPS, +! ML, and MU must be defined before calling BANDED_IAJA. +! ML = lower bandwidth +! MU = upper bandwidth +! NSUBS = number of strict sub diagonals +! SUBDS(I) = row in which the Ith sub diagonal +! begins, I=1, ..., NSUBS +! NSUPS = number of strict super diagonals +! SUPDS(I) = column in which the Ith super diagonal +! begins, I=1, ..., NSUPS +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: N, ML, MU +! .. +! .. Local Scalars .. + INTEGER :: I, IER, J, K, KBEGIN, KFINI, KI, KJ, & + NP1, NZBB + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ALLOCATED, MAX, MIN +! .. +! .. FIRST EXECUTABLE STATEMENT BANDED_IAJA +! .. +! Check for errors. + + IF (.NOT.BUILD_IAJA) THEN + MSG = 'BANDED_IAJA cannot be called with BUILD_IAJA = .FALSE.' + CALL XERRDV(MSG,1610,2,0,0,0,0,ZERO,ZERO) + END IF + + IF (NSUBS < 0) THEN + MSG = 'NSUBS < 0 in BANDED_IAJA.' + CALL XERRDV(MSG,1620,2,0,0,0,0,ZERO,ZERO) + ELSE + IF (NSUBS > 0) THEN + IF (NSUBS /= SIZE(SUBDS)) THEN + MSG = 'The size of the SUBDS array must' + CALL XERRDV(MSG,1630,1,0,0,0,0,ZERO,ZERO) + MSG = 'equal NSUBS in BANDED_IAJA.' + CALL XERRDV(MSG,1630,2,0,0,0,0,ZERO,ZERO) + END IF + END IF + END IF + + IF (NSUPS < 0) THEN + MSG = 'NSUPS < 0 in BANDED_IAJA.' + CALL XERRDV(MSG,1640,2,0,0,0,0,ZERO,ZERO) + ELSE + IF (NSUPS > 0) THEN + IF (NSUPS /= SIZE(SUPDS)) THEN + MSG = 'The size of the SUPDS array must' + CALL XERRDV(MSG,1650,1,0,0,0,0,ZERO,ZERO) + MSG = 'equal NSUPS in BANDED_IAJA.' + CALL XERRDV(MSG,1650,2,0,0,0,0,ZERO,ZERO) + END IF + END IF + END IF + +! Allocate the necessary storage for the descriptor arrays. + +! Define the total number of elements in all diagonals. + NP1 = N + 1 + NZB = (NSUBS + NSUPS + 1) * NP1 - 1 + IF (NSUBS /= 0) THEN + DO I = 1, NSUBS + NZB = NZB - SUBDS(I) + END DO + END IF + IF (NSUPS /= 0) THEN + DO I = 1, NSUPS + NZB = NZB - SUPDS(I) + END DO + END IF + IF (ALLOCATED(IAB)) THEN + DEALLOCATE(IAB,JAB,STAT=IER) + CALL CHECK_STAT(IER,800) + END IF + ALLOCATE(IAB(NP1),JAB(NZB),STAT=IER) + CALL CHECK_STAT(IER,810) + IAB(1) = 1 + NZBB = 0 + +! For each column in the matrix... + DO J = 1, N +! Vertical extent of band = KBEGIN to KFINI. +! KJ = number of nonzeros in column J. + KBEGIN = MAX(J-MU,1) + KFINI = MIN(J+ML,N) + KJ = 0 +! Locate the row positions of the nonzeros in column J. +! (Restrict attention to the band.) + IAB(J+1) = IAB(J) +! For each row in the intersection of the band with +! this column ... + DO K = KBEGIN, KFINI +! Does column J intersect a super diagonal at (K,J)? + IF (K < J) THEN + DO I = NSUPS, 1, -1 + KI = J + 1 - SUPDS(I) + IF (K == KI) THEN + KJ = KJ + 1 + IAB(J+1) = IAB(J+1) + 1 + NZBB = NZBB + 1 + JAB(NZBB) = K + GOTO 10 + END IF + END DO + ELSEIF (K == J) THEN +! We are on the main diagonal. + KJ = KJ + 1 + IAB(J+1) = IAB(J+1) + 1 + NZBB = NZBB + 1 + JAB(NZBB) = K + GOTO 10 + ELSE +! Does column J intersect a sub diagonal at (K,J)? + DO I = NSUBS, 1, -1 + KI = SUBDS(I) + J - 1 + IF (K == KI) THEN + KJ = KJ + 1 + IAB(J+1) = IAB(J+1) + 1 + NZBB = NZBB + 1 + JAB(NZBB) = K + GOTO 10 + END IF + END DO + END IF +10 CONTINUE + END DO + END DO + + IF (NZBB /= NZB) THEN + MSG = 'NZBB (I1) is not equal to NZB (I2)' + CALL XERRDV(MSG,1660,1,0,0,0,0,ZERO,ZERO) + MSG = 'in BANDED_IAJA.' + CALL XERRDV(MSG,1660,2,2,NZBB,NZB,0,ZERO,ZERO) + END IF + RETURN + + END SUBROUTINE BANDED_IAJA +!_______________________________________________________________________ + + SUBROUTINE BANDED_GET_BJNZ(N,ML,MU,JCOL,JNZ,NZJ) +! .. +! Locate the nonzeros in a given column of a sparse banded matrix +! with known diagonals. This is a version of BANDED_IAJA modified +! to do only one column. +! .. +! Input: +! +! N = the order of the matrix (number of odes) +! ML = integer lower bandwidth +! MU = integer upper bandwidth +! JCOL = column number between 1 and N +! JZ = integer array of length N +! +! Output: +! +! JNZ = integer array of length N. If +! JNZ(K) is not 0, there is a +! nonzero at position (K,JCOL) +! NZJ = number of nozeros in column JCOL +! +! Caution: +! No parameter checking is done since this subroutine +! will be called many times. Note that a number of +! PRIVATE parameters must be set before calling this +! subroutine. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: N, ML, MU, JCOL + INTEGER, INTENT (OUT) :: NZJ +! .. +! .. Array Arguments .. + INTEGER, INTENT (OUT) :: JNZ(*) +! .. +! .. Local Scalars .. + INTEGER :: I, J, K, KBEGIN, KFINI, KI, KJ +! .. +! .. Intrinsic Functions .. + INTRINSIC MAX, MIN +! .. +! .. FIRST EXECUTABLE STATEMENT BANDED_GET_BJNZ +! .. + JNZ(1:N) = 0 + J = JCOL + +! Locate the row positions of the nonzeros in column J. + +! Vertical extent of band = KBEGIN to KFINI. + KBEGIN = MAX(J-MU,1) + KFINI = MIN(J+ML,N) +! KJ = number of nonzeros in column J. + KJ = 0 +! For each row in the intersection of the band with +! this column ... + DO K = KBEGIN, KFINI +! Does column J intersect a super diagonal at (K,J)? + IF (K < J) THEN + DO I = NSUPS, 1, -1 + KI = J + 1 - SUPDS(I) + IF (K == KI) THEN + KJ = KJ + 1 + JNZ(KJ) = K + GOTO 10 + END IF + END DO + ELSEIF (K == J) THEN +! We are on the main diagonal. + KJ = KJ + 1 + JNZ(KJ) = K + GOTO 10 + ELSE +! Does column J intersect a sub diagonal at (K,J)? + DO I = NSUBS, 1, -1 + KI = SUBDS(I) + J - 1 + IF (K == KI) THEN + KJ = KJ + 1 + JNZ(KJ) = K + GOTO 10 + END IF + END DO + END IF +10 CONTINUE + END DO + NZJ = KJ + RETURN + + END SUBROUTINE BANDED_GET_BJNZ +!_______________________________________________________________________ + +! Beginning of Jacobian related routines that use MA28 + + SUBROUTINE DVNLSS28(Y,YH,LDYH,SAVF,EWT,ACOR,IWM,WM,F,JAC, & + NFLAG,ATOL,ITOL) +! .. +! This is the nonlinear system solver for MA28 based sparse solutions. +! .. +! Subroutine DVNLSS28 is a nonlinear system solver, which uses functional +! iteration or a chord (modified Newton) method. For the chord method +! direct linear algebraic system solvers are used. Subroutine DVNLSS28 +! then handles the corrector phase of this integration package. +! Communication with DVNLSS28 is done with the following variables. (For +! more details, please see the comments in the driver subroutine.) +! Y = The dependent variable, a vector of length N, input. +! YH = The Nordsieck (Taylor) array, LDYH by LMAX, input +! and output. On input, it contains predicted values. +! LDYH = A constant >= N, the first dimension of YH, input. +! SAVF = A work array of length N. +! EWT = An error weight vector of length N, input. +! ACOR = A work array of length N, used for the accumulated +! corrections to the predicted y vector. +! WM,IWM = Real and integer work arrays associated with matrix +! operations in chord iteration (MITER /= 0). +! F = Dummy name for user supplied routine for f. +! JAC = Dummy name for user supplied Jacobian routine. +! NFLAG = Input/output flag, with values and meanings as follows: +! INPUT +! 0 first call for this time step. +! -1 convergence failure in previous call to DVNLSS28. +! -2 error test failure in DVSTEP. +! OUTPUT +! 0 successful completion of nonlinear solver. +! -1 convergence failure or singular matrix. +! -2 unrecoverable error in matrix preprocessing +! (cannot occur here). +! -3 unrecoverable error in solution (cannot occur +! here). +! IPUP = Own variable flag with values and meanings as follows: +! 0, do not update the Newton matrix. +! MITER \= 0 update Newton matrix, because it is the +! initial step, order was changed, the error +! test failed, or an update is indicated by +! the scalar RC or step counter NST. +! For more details, see comments in driver subroutine. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: ITOL, LDYH + INTEGER, INTENT (INOUT) :: NFLAG +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: ACOR(*), EWT(*), SAVF(*), & + WM(*), Y(*), YH(LDYH,*) + REAL (WP), INTENT (IN) :: ATOL(*) + INTEGER IWM(*) + LOGICAL DUMMY +! .. +! .. Subroutine Arguments .. + EXTERNAL F, JAC +! .. +! .. Local Scalars .. + REAL (WP) :: ACNRMNEW, CSCALE, DCON, DEL, DELP + INTEGER :: I, IERPJ, IERSL, M +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +! .. +! .. FIRST EXECUTABLE STATEMENT DVNLSS28 +! .. +! Get rid of a couple of needless compiler warning messages. + DUMMY = .FALSE. + IF (DUMMY) THEN + WM(1) = ZERO + IWM(1) = 0 + END IF +! On the first step, on a change of method order, or after a +! nonlinear convergence failure with NFLAG = -2, set IPUP = MITER +! to force a Jacobian update when MITER /= 0. + IF (JSTART==0) NSLP = 0 + IF (NFLAG==0) ICF = 0 + IF (NFLAG==-2) IPUP = MITER + IF ((JSTART==0) .OR. (JSTART==-1)) IPUP = MITER +! If this is functional iteration, set CRATE = 1 and drop +! to 220. + IF (MITER==0) THEN + CRATE = ONE + GOTO 10 + END IF + +! RC is the ratio of new to old values of the coefficient H/EL(2)=h/l1. +! When RC differs from 1 by more than CCMAX, IPUP is set to MITER +! to force DVJACS28 to be called, if a Jacobian is involved. In any +! case, DVJACS28 is called at least every MSBP steps. + + DRC = ABS(RC-ONE) + IF (DRC>CCMAX .OR. NST>=NSLP+MSBP) IPUP = MITER + +! Up to MAXCOR corrector iterations are taken. A convergence test is +! made on the r.m.s. norm of each correction, weighted by the error +! weight vector EWT. The sum of the corrections is accumulated in the +! vector ACOR(i). The YH array is not altered in the corrector loop. + +10 M = 0 + DELP = ZERO +! Original: +! CALL DCOPY_F90(N,YH(1,1),1,Y,1) + CALL DCOPY_F90(N,YH(1:N,1),1,Y(1:N),1) + CALL F(N,TN,Y,SAVF) + NFE = NFE + 1 + IF (BOUNDS) THEN + DO I = 1, NDX + IF (ABS(YH(IDX(I),1)-LB(I))<=ZERO) SAVF(IDX(I)) = & + MAX(SAVF(IDX(I)),ZERO) + IF (ABS(YH(IDX(I),1)-UB(I))<=ZERO) SAVF(IDX(I)) = & + MIN(SAVF(IDX(I)),ZERO) + END DO + END IF + IF (IPUP<=0) GOTO 20 + +! If indicated, the matrix P = I - h*rl1*J is reevaluated and +! preprocessed before starting the corrector iteration. IPUP +! is set to 0 as an indicator that this has been done. + + CALL DVJACS28(Y,YH,LDYH,EWT,ACOR,SAVF,F,JAC,IERPJ,N, & + ATOL,ITOL) + IPUP = 0 + RC = ONE + DRC = ZERO + CRATE = ONE + NSLP = NST +! If matrix is singular, take error return to force cut in +! step size. + IF (IERPJ/=0) GOTO 70 +20 ACOR(1:N) = ZERO +! This is a looping point for the corrector iteration. +30 IF (MITER/=0) GOTO 40 + +! In the case of functional iteration, update Y directly from +! the result of the last function evaluation. + + SAVF(1:N) = RL1*(H*SAVF(1:N)-YH(1:N,2)) + Y(1:N) = SAVF(1:N) - ACOR(1:N) + DEL = DVNORM(N,Y,EWT) + Y(1:N) = YH(1:N,1) + SAVF(1:N) + CALL DCOPY_F90(N,SAVF,1,ACOR,1) + GOTO 50 + +! In the case of the chord method, compute the corrector error, +! and solve the linear system with that as right-hand side and +! P as coefficient matrix. The correction is scaled by the factor +! 2/(1+RC) to account for changes in h*rl1 since the last +! DVJACS28 call. + +40 Y(1:N) = (RL1*H)*SAVF(1:N) - (RL1*YH(1:N,2)+ACOR(1:N)) + CALL DVSOLS28(Y,SAVF,IERSL) + NNI = NNI + 1 + IF (IERSL>0) GOTO 60 + IF (METH==2 .AND. ABS(RC-ONE)>ZERO) THEN + CSCALE = TWO/(ONE+RC) + CALL DSCAL_F90(N,CSCALE,Y,1) + END IF + DEL = DVNORM(N,Y,EWT) + CALL DAXPY_F90(N,ONE,Y,1,ACOR,1) + Y(1:N) = YH(1:N,1) + ACOR(1:N) + +! Test for convergence. If M > 0, an estimate of the convergence +! rate constant is stored in CRATE, and this is used in the test. + +50 IF (M/=0) CRATE = MAX(CRDOWN*CRATE,DEL/DELP) + DCON = DEL*MIN(ONE,CRATE)/TQ(4) + IF (DCON<=ONE) GOTO 80 + M = M + 1 + IF (M==MAXCOR) GOTO 60 + IF (M>=2 .AND. DEL>RDIV*DELP) GOTO 60 + DELP = DEL + CALL F(N,TN,Y,SAVF) + NFE = NFE + 1 + IF (BOUNDS) THEN + DO I = 1, NDX + IF (ABS(YH(IDX(I),1)-LB(I))<=ZERO) SAVF(IDX(I)) = & + MAX(SAVF(I),ZERO) + IF (ABS(YH(IDX(I),1)-UB(I))<=ZERO) SAVF(IDX(I)) = & + MIN(SAVF(I),ZERO) + END DO + END IF + GOTO 30 + +60 IF (MITER==0 .OR. JCUR==1) GOTO 70 + ICF = 1 + IPUP = MITER + GOTO 10 + +70 CONTINUE + NFLAG = -1 + ICF = 2 + IPUP = MITER + RETURN + +! Return for successful step. +80 NFLAG = 0 + +! Enforce bounds. + IF (BOUNDS) THEN + CHANGED_ACOR = .FALSE. + IF (M==0) THEN + ACNRM = DEL + ELSE + ACNRM = DVNORM(N,ACOR,EWT) + END IF + IF (MITER/=0) THEN +! Since Y(:) = YH(:,1) + ACOR(:): + DO I = 1, NDX + IF (Y(IDX(I))UB(I)) THEN + CHANGED_ACOR = .TRUE. + ACOR(IDX(I)) = UB(I) - YH(IDX(I),1) + SAVF(IDX(I)) = ACOR(IDX(I)) + END IF + END DO + ELSE +! Since Y(:) = YH(:,1) + SAVF(:) and +! since CALL DCOPY_F90(N, SAVF, 1, ACOR, 1) ... + DO I = 1, NDX + IF (Y(IDX(I))UB(IDX(I))) THEN + CHANGED_ACOR = .TRUE. + ACOR(IDX(I)) = UB(I) - YH(IDX(I),1) + END IF + END DO + END IF + IF (CHANGED_ACOR) THEN + IF (M==0) THEN + ACNRMNEW = DEL + ELSE + ACNRMNEW = DVNORM(N,ACOR,EWT) + END IF + ACNRM = MAX(ACNRM,ACNRMNEW) + ELSE + END IF + NFLAG = 0 + JCUR = 0 + ICF = 0 + ELSE +! No projections are required. + NFLAG = 0 + JCUR = 0 + ICF = 0 + IF (M==0) ACNRM = DEL + IF (M>0) ACNRM = DVNORM(N,ACOR,EWT) + END IF + RETURN + + END SUBROUTINE DVNLSS28 +!_______________________________________________________________________ + + SUBROUTINE DVSOLS28(X,TEM,IERSL) +! .. +! Manage the solution of the MA28 based sparse linear system arising +! from a chord iteration. +! .. +! This routine solves the sparse linear system arising from a chord +! iteration. If MITER is 6 or 7, it calls MA28CD to accomplish this. +! Communication with DVSOLS28 uses the following variables: +! X = The right-hand side vector on input, and the solution vector +! on output, of length N. +! TEM (=SAVF(*)) +! IERSL = Output flag. IERSL = 0 if no trouble occurred. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (INOUT) :: IERSL +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: TEM(*), X(*) +! .. +! .. Local Scalars .. + INTEGER :: I +! .. +! .. FIRST EXECUTABLE STATEMENT DVSOLS28 +! .. + IF (SCALE_MATRIX) THEN + DO I = 1, N + X(I) = X(I) * RSCALEX(I) + END DO + END IF + IERSL = 0 + CALL MA28CD(N,PMAT,LICN_ALL,ICN,IKEEP28,X,TEM,1) + MA28CD_CALLS = MA28CD_CALLS + 1 + IF (SCALE_MATRIX) THEN + DO I = 1, N + X(I) = X(I) * CSCALEX(I) + END DO + END IF + RETURN + + END SUBROUTINE DVSOLS28 +!_______________________________________________________________________ + + SUBROUTINE DVJACS28(Y,YH,LDYH,EWT,FTEMP1,SAVF,F,JAC,IERPJ,N, & + ATOL,ITOL) +! .. +! Compute and process P = I - H*RL1*J, where J is an approximation to +! the MA28 based sparse Jacobian. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (INOUT) :: IERPJ + INTEGER, INTENT (IN) :: LDYH, N, ITOL +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: EWT(*), FTEMP1(*), SAVF(*), Y(*), YH(LDYH,*) + REAL (WP), INTENT (IN) :: ATOL(*) +! .. +! .. Subroutine Arguments .. + EXTERNAL F, JAC +! .. +! .. Local Scalars .. + REAL (WP) :: CON, FAC, HRL1, R, R0, SRUR + INTEGER :: I, IER, J, JER, JJ, JJ1, JJ2, K, K1, K2, MA28, & + MA28SAVE, MB28SAVE, NG, NZ + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, EXP, MAX, REAL +! .. +! .. FIRST EXECUTABLE STATEMENT DVJACS28 +! .. + IERPJ = 0 + +! Structure determination + +! Calculate the sparsity structure if this is the first call to +! DVJACS28 with ISTATE = 1 or if it is a continuation call with +! ISTATE = 3. + IF (ISTATC==1 .OR. ISTATC==3) THEN + CALL DVPREPS(N,Y,YH,LDYH,DTEMP,EWT,F,JAC) + ISTATC = 0 + END IF + JCUR = 0 + HRL1 = H*RL1 + CON = -HRL1 + +! If MA28 = 4 the saved copy of the Jacobian will be used +! to restore PMAT. If MA28 = 1,2,3 the Jacobian will be +! recomputed. If MA28 = 1,2 the JVECT and ICN pointer arrays +! will be defined and MA28AD will be called to decompose +! PMAT. If MA28 = 3 the JVECT pointer array will be defined +! and MA28BD will be called to decompose PMAT using the ICN +! pointer array returned in the last call to MA28AD. + + MA28 = 4 + IF (INEWJ==1 .OR. MB28==0) MA28 = 3 + IF (NST>=NSLJ+MSBJ) MA28 = 3 +! IF (ICF==1 .OR. ICF==2) MA28 = 3 + IF (ICF==1 .AND. DRC=NSLG+MSBG) MA28 = 2 + IF (JSTART==0 .OR. JSTART==-1) MA28 = 1 + JSTART = 1 +10 IF (MA28<=2) NSLG = NST + IF (MA28<=3) NSLJ = NST + +! Analytical Sparse Jacobian + +! If MITER = 6, call JAC to evaluate J analytically, multiply +! J by CON = -H*EL(1), and add the identity matrix to form P. + IF (MITER==6) THEN + IF (MA28==4) THEN +! Reuse the saved Jacobian. + NZ = IAN(N+1) - 1 + PMAT(1:NZ) = CON*JMAT(1:NZ) + DO K = 1, NZ + IF (JAN(K)==JVECT(K)) PMAT(K) = PMAT(K) + ONE + END DO + GOTO 90 + END IF + JCUR = 1 + NJE = NJE + 1 + IF (MA28==1 .OR. MA28==2) THEN + NZ = IAN(N+1) - 1 + CALL JAC(N,TN,Y,IAN,JAN,NZ,PMAT) + NZ = IAN(N+1) - 1 + IF (NZ>NZ_ALL) THEN + MSG = 'DVODE_F90-- NZ > NZ_ALL in DVJACS28.' + CALL XERRDV(MSG,1670,2,0,0,0,0,ZERO,ZERO) + END IF +! Define column pointers for MA28AD. + CALL SET_ICN(N,IAN,ICN) + CALL CHECK_DIAG(N,IAN,JAN,ICN) + PMAT(1:NZ) = CON*PMAT(1:NZ) + DO K = 1, NZ + IF (JAN(K)==ICN(K)) PMAT(K) = PMAT(K) + ONE + END DO + GOTO 80 + ELSE +! MA28 = 3... + NZ = IAN(N+1) - 1 + CALL JAC(N,TN,Y,IAN,JAN,NZ,PMAT) + NZ = IAN(N+1) - 1 + IF (NZ>NZ_ALL) THEN + MSG = 'DVODE_F90-- NZ > NZ_ALL in DVJACS28.' + CALL XERRDV(MSG,1680,2,0,0,0,0,ZERO,ZERO) + END IF +! Define column pointers for MA28AD. + CALL SET_ICN(N,IAN,JVECT) + CALL CHECK_DIAG(N,IAN,JAN,JVECT) + IF (INEWJ/=1) JMAT(1:NZ) = PMAT(1:NZ) + PMAT(1:NZ) = CON*PMAT(1:NZ) + DO K = 1, NZ + IF (JAN(K)==JVECT(K)) PMAT(K) = PMAT(K) + ONE + END DO + GOTO 90 + END IF + END IF + +! Finite Difference Sparse Jacobian + +! If MITER = 7, evaluate J numerically, multiply J by +! CON, and add the identity matrix to form P. + IF (MITER==7) THEN + IF (MA28==4) THEN +! Reuse the saved constant Jacobian. + NZ = IAN(N+1) - 1 + PMAT(1:NZ) = CON*JMAT(1:NZ) + DO J = 1, N + K1 = IAN(J) + K2 = IAN(J+1) - 1 + DO K = K1, K2 + I = JAN(K) + IF (I==J) PMAT(K) = PMAT(K) + ONE + END DO + END DO + GOTO 90 + ELSE + NZ = IAN(N+1) - 1 + JCUR = 1 + IF (.NOT.(J_IS_CONSTANT.AND.J_HAS_BEEN_COMPUTED)) THEN + IF (USE_JACSP) THEN +! Approximate the Jacobian using Doug Salane's JACSP. +! The JPNTRDS and INDROWDS pointer arrays were defined +! in DVPREPS (and altered in DSM). + IOPTDS(1) = 2 + IOPTDS(2) = 0 + IOPTDS(3) = 1 + IOPTDS(5) = 0 +! INFORDS(4) was initialized in DVPREPS (and altered in +! the first call to JACSP). + LWKDS = 3 * N + LIWKDS = 50 + N + NRFJACDS = NZ + NCFJACDS = 1 + +! Set flag to indicate how the YSCALE vector will be +! set for JACSP. + LIKE_ORIGINAL_VODE = .FALSE. +! Calculate the YSCALEDS vector for JACSPDV. + IF (LIKE_ORIGINAL_VODE) THEN + FAC = DVNORM(N,SAVF,EWT) +! JACSPDB multiplies YSCALEDS(*) BY UROUND**0.825: +! R0 = THOU*ABS(H)*UROUND*REAL(N)*FAC + R0 = THOU*ABS(H)*REAL(N)*FAC + IF (ABS(R0)<=ZERO) R0 = ONE +! SRUR = WM1 + DO J = 1, N +! JACSPDB multiplies YSCALEDS(*) BY UROUND**0.825: +! R = MAX(ABS(Y(J)),R0/EWT(J)) + R = MAX(ABS(Y(J))/U325,(R0/EWT(J))*U125) + YSCALEDS(J) = R + END DO + ELSE + IF (ITOL == 1 .OR. ITOL == 3) THEN + DO J = 1, N + YSCALEDS(J) = MAX(ABS(Y(J)),ATOL(1),UROUND) + END DO + ELSE + DO J = 1, N + YSCALEDS(J) = MAX(ABS(Y(J)),ATOL(J),UROUND) + END DO + END IF + END IF + + CALL JACSPDB(F,N,TN,Y,SAVF,PMAT(1),NRFJACDS, & + YSCALEDS,FACDS,IOPTDS,WKDS,LWKDS,IWKDS,LIWKDS, & + MAXGRPDS,NGRPDS,JPNTRDS,INDROWDS) + NFE = NFE + IWKDS(7) + NJE = NJE + 1 + + DO NG = 1, MAXGRPDS + JJ1 = IGP(NG) + JJ2 = IGP(NG+1) - 1 + DO JJ = JJ1, JJ2 + J = JGP(JJ) + K1 = IAN(J) + K2 = IAN(J+1) - 1 + DO K = K1, K2 + I = JAN(K) + GOTO (17,17,18) MA28 +! Define the row pointers for MA28AD. +17 JVECT(K) = I +! Define the column pointers for MA28AD. + ICN(K) = J + GOTO 19 +! Define the column pointers for MA28AD. +18 JVECT(K) = J +19 CONTINUE + IF (INEWJ==0) JMAT(K) = PMAT(K) + PMAT(K) = CON*PMAT(K) + IF (I==J) PMAT(K) = PMAT(K) + ONE + END DO + END DO + END DO + NFE = NFE + MAXGRPDS + ELSE + FAC = DVNORM(N,SAVF,EWT) + R0 = THOU*ABS(H)*UROUND*REAL(N)*FAC + IF (ABS(R0)<=ZERO) R0 = ONE + SRUR = WM1 + DO NG = 1, NGP + JJ1 = IGP(NG) + JJ2 = IGP(NG+1) - 1 + DO JJ = JJ1, JJ2 + J = JGP(JJ) + R = MAX(SRUR*ABS(Y(J)),R0/EWT(J)) + Y(J) = Y(J) + R + END DO + CALL F(N,TN,Y,FTEMP1) + NFE = NFE + 1 + DO JJ = JJ1, JJ2 + J = JGP(JJ) + Y(J) = YH(J,1) + R = MAX(SRUR*ABS(Y(J)),R0/EWT(J)) + FAC = ONE / R + K1 = IAN(J) + K2 = IAN(J+1) - 1 + DO K = K1, K2 + I = JAN(K) + GOTO (20,20,30) MA28 +! Define row pointers for MA28AD. +20 JVECT(K) = I +! Define column pointers for MA28AD. + ICN(K) = J + GOTO 40 +! Define column pointers for MA28AD. +30 JVECT(K) = J +40 PMAT(K) = (FTEMP1(I)-SAVF(I)) * FAC + IF (INEWJ==0) JMAT(K) = PMAT(K) + PMAT(K) = CON*PMAT(K) + IF (I==J) PMAT(K) = PMAT(K) + ONE + END DO + END DO + END DO + NFE = NFE + NGP + NJE = NJE + 1 + END IF + IF (J_IS_CONSTANT) J_HAS_BEEN_COMPUTED = .TRUE. + ELSE +! Do not recompute the constant Jacobian. +! Reuse the saved Jacobian. + NZ = IAN(N+1) - 1 + PMAT(1:NZ) = CON*JMAT(1:NZ) + DO J = 1, N + K1 = IAN(J) + K2 = IAN(J+1) - 1 + DO K = K1, K2 + I = JAN(K) + IF (I==J) PMAT(K) = PMAT(K) + ONE + GOTO (50,50,60) MA28 +! Define row pointers for MA28AD. +50 JVECT(K) = I +! Define column pointers for MA28AD. + ICN(K) = J + GOTO 70 +! Define column pointers for MA28AD. +60 JVECT(K) = J +70 CONTINUE + END DO + END DO + END IF + GOTO (80,80,90) MA28 + END IF + END IF + +! MA28AD does an LU factorization based on a pivotal strategy +! designed to compromise between maintaining sparsity and +! controlling loss of accuracy due to roundoff error. Unless +! magnitudes of Jacobian elements change so as to invalidate +! choice of pivots, MA28AD need only be called at beginning +! of the integration. + +80 CONTINUE + IF (SCALE_MATRIX) THEN +! MA19AD computes scaling factors for the iteration matrix. + CALL MC19AD(N,NZ,PMAT,JAN,ICN,RSCALEX,CSCALEX,WSCALEX) + MC19AD_CALLS = MC19AD_CALLS + 1 + DO I =1, N + RSCALEX(I) = EXP(RSCALEX(I)) + CSCALEX(I) = EXP(CSCALEX(I)) + END DO + DO K = 1, NZ + I = JAN(K) + J = ICN(K) + PMAT(K) = PMAT(K) * RSCALEX(I) * CSCALEX(J) + END DO + END IF + OK_TO_CALL_MA28 = .TRUE. + CALL MA28AD(N,NZ,PMAT,LICN_ALL,JAN,LIRN_ALL,ICN,U_PIVOT, & + IKEEP28,IW28,FTEMP1,IER) + OK_TO_CALL_MA28 = .FALSE. + MA28AD_CALLS = MA28AD_CALLS + 1 + MA28SAVE = MA28 + MB28SAVE = MB28 + MB28 = 0 + NLU = NLU + 1 + MAX_MINIRN = MAX(MAX_MINIRN,MINIRN) + MAX_MINICN = MAX(MAX_MINICN,MINICN) +! IER = -1: Numerically singular Jacobian +! IER = -2: Structurally singular Jacobian + IF (IER==-1 .OR. IER==-2) IERPJ = 1 + IF (IER==-3) THEN +! LIRN_ALL is not large enough. + IF (LP /= 0) THEN + MSG = 'LIRN_ALL (=I1) is not large enough.' + CALL XERRDV(MSG,1690,1,0,0,0,0,ZERO,ZERO) + MSG = 'Allocating more space for another try.' + CALL XERRDV(MSG,1690,1,1,LIRN_ALL,0,0,ZERO,ZERO) + END IF +! Allocate more space for JAN and JVECT and try again. + LIRN_ALL = LIRN_ALL + MAX(MAX(1000,ELBOW_ROOM*NZ_SWAG),10*N) + LIRN_ALL = MAX(LIRN_ALL,(11*MINIRN)/10) + IF (LIRN_ALL>MAX_ARRAY_SIZE) THEN + MSG = 'Maximum array size exceeded. Stopping.' + CALL XERRDV(MSG,1700,2,0,0,0,0,ZERO,ZERO) + END IF + DEALLOCATE (JAN,STAT=JER) + CALL CHECK_STAT(JER,820) + ALLOCATE (JAN(LIRN_ALL),STAT=JER) + CALL CHECK_STAT(JER,830) + IF (MITER==7) THEN + JAN(1:NZ) = JVECT(1:NZ) + END IF + DEALLOCATE (JVECT,STAT=JER) + CALL CHECK_STAT(JER,840) + ALLOCATE (JVECT(LIRN_ALL),STAT=JER) + CALL CHECK_STAT(JER,850) + IF (MITER==7) THEN + JVECT(1:NZ) = JAN(1:NZ) + END IF + MA28 = MA28SAVE + MB28 = MB28SAVE + NLU = NLU - 1 +! Since PMAT has changed, it must be restored: + IF (J_IS_CONSTANT) J_HAS_BEEN_COMPUTED = .FALSE. + GOTO 10 + END IF + IF (IER==-4 .OR. IER==-5 .OR. IER==-6) THEN +! LICN_ALL is not large enough. + IF (LP /= 0) THEN + MSG = 'LICN_ALL (=I1) is not large enough.' + CALL XERRDV(MSG,1710,1,0,0,0,0,ZERO,ZERO) + MSG = 'Allocating more space for another try.' + CALL XERRDV(MSG,1710,1,1,LICN_ALL,0,0,ZERO,ZERO) + END IF +! Allocate more space for JAN and JVECT and try again. + LICN_ALL = LICN_ALL + MAX(MAX(1000,ELBOW_ROOM*NZ_SWAG),10*N) + LICN_ALL = MAX(LICN_ALL,(11*MINICN)/10) + IF (LICN_ALL>MAX_ARRAY_SIZE) THEN + MSG = 'Maximum array size exceeded. Stopping.' + CALL XERRDV(MSG,1720,2,0,0,0,0,ZERO,ZERO) + END IF + DEALLOCATE (PMAT,ICN,STAT=JER) + CALL CHECK_STAT(JER,860) + ALLOCATE (PMAT(LICN_ALL),ICN(LICN_ALL),STAT=JER) + PMAT(1:LICN_ALL) = ZERO + CALL CHECK_STAT(JER,870) + IF (MITER==7) THEN + JAN(1:NZ) = JVECT(1:NZ) + END IF + MA28 = MA28SAVE + MB28 = MB28SAVE + NLU = NLU - 1 + IF (J_IS_CONSTANT) J_HAS_BEEN_COMPUTED = .FALSE. + GOTO 10 + END IF + IF (MITER/=7) RETURN + JAN(1:NZ) = JVECT(1:NZ) + RETURN + +! MA28BD uses the pivot sequence generated by an earlier call +! to MA28AD to factor a new matrix of the same structure. + +90 CONTINUE + + IF (SCALE_MATRIX) THEN + DO K =1, NZ + I = JAN(K) + J = JVECT(K) + PMAT(K) = PMAT(K) * RSCALEX(I) * CSCALEX(J) + END DO + END IF + CALL MA28BD(N,NZ,PMAT,LICN_ALL,JAN,JVECT,ICN,IKEEP28,IW28, & + FTEMP1,IER) + MA28BD_CALLS = MA28BD_CALLS + 1 + MB28 = 1 + NLU = NLU + 1 +! IER = -2 : The matrix is numerically singular. The MA28AD +! pivot sequence leads to a zero pivot, that is, +! to one for which the ratio of it to the smallest +! element in the row is less than EPS. +! IER = -13: The matrix is structurally singular. + + IF (REDO_PIVOT_SEQUENCE) THEN +! Force MA28AD to calculate a new pivot sequence. + IF (IER/=-13 .AND. IER/=-2 .AND. IER<=0) RETURN + IF (IER==-2) MA28 = 1 + IF (IER==-13) MA28 = 1 + IF (J_IS_CONSTANT) J_HAS_BEEN_COMPUTED = .FALSE. + ELSE + IF (IER==-2) IERPJ = 1 + IF (IER/=-13 .AND. IER<=0) RETURN + IF (IER==-13) MA28 = 1 + IF (J_IS_CONSTANT) J_HAS_BEEN_COMPUTED = .FALSE. + END IF + IF (IER>0) MA28 = 2 + IF (IER==-13 .AND. MITER==7 .AND. MOSS==2) THEN +! Recompute the sparsity structure. + CALL DVRENEW(N,Y,SAVF,EWT,F) + IF (J_IS_CONSTANT) J_HAS_BEEN_COMPUTED = .FALSE. + END IF + GOTO 10 + + END SUBROUTINE DVJACS28 +! End of Jacobian related routines that use MA28 +!_______________________________________________________________________ + + SUBROUTINE SET_ICN(N,IA,ICN) +! .. +! Define the column locations of nonzero elements for a sparse +! matrix. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: N +! .. +! .. Array Arguments .. + INTEGER, INTENT (IN) :: IA(*) + INTEGER, INTENT (INOUT) :: ICN(*) +! .. +! .. Local Scalars .. + INTEGER :: J, KMAX, KMIN +! .. +! .. FIRST EXECUTABLE STATEMENT SET_ICN +! .. + KMIN = 1 + DO J = 1, N + KMAX = IA(J+1) - 1 + ICN(KMIN:KMAX) = J + KMIN = KMAX + 1 + END DO + + END SUBROUTINE SET_ICN +!_______________________________________________________________________ + + SUBROUTINE CHECK_DIAG(N,IA,JA,ICN) +! .. +! Check that the diagonal is included in the sparse matrix +! description arrays. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: N +! .. +! .. Array Arguments .. + INTEGER, INTENT (IN) :: IA(*), ICN(*), JA(*) +! .. +! .. Local Scalars .. + INTEGER :: J, K, KMAX, KMIN + CHARACTER (80) :: MSG +! .. +! .. FIRST EXECUTABLE STATEMENT CHECK_DIAG +! .. + KMIN = 1 + DO J = 1, N + KMAX = IA(J+1) - 1 + DO K = KMIN, KMAX + IF (JA(K)==ICN(K)) GOTO 10 + END DO + MSG = 'In CHECK_DIAG, the diagonal is not present.' + CALL XERRDV(MSG,1730,2,0,0,0,0,ZERO,ZERO) +10 CONTINUE + KMIN = KMAX + 1 + END DO + + END SUBROUTINE CHECK_DIAG +!_______________________________________________________________________ + + SUBROUTINE DVCHECK(JOB,G,NEQ,Y,YH,NYH,G0,G1,GX,IRT) +! .. +! Check for the presence of a root in the vicinity of the current T, +! in a manner depending on the input flag JOB, and call DVROOTS to +! locate the root as precisely as possible. +! .. +! This subroutine is essentially DRCHEK from LSODAR. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (INOUT) :: IRT + INTEGER, INTENT (IN) :: JOB, NEQ, NYH +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: G0(*), G1(*), GX(*), Y(*), YH(NYH,*) +! .. +! .. Subroutine Arguments .. + EXTERNAL G +! .. +! .. Local Scalars .. + REAL (WP) :: HMING, T1, TEMP1, TEMP2, X + INTEGER :: I, IFLAG, JFLAG + LOGICAL :: ZROOT +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN +! .. +! In addition to variables described previously, DVCHECK +! uses the following for communication: +! JOB = integer flag indicating type of call: +! JOB = 1 means the problem is being initialized, and DVCHECK +! is to look for a root at or very near the initial T. +! JOB = 2 means a continuation call to the solver was just +! made, and DVCHECK is to check for a root in the +! relevant part of the step last taken. +! JOB = 3 means a successful step was just taken, and DVCHECK +! is to look for a root in the interval of the step. +! G0 = array of length NG, containing the value of g at T = T0ST. +! G0 is input for JOB >= 2, and output in all cases. +! G1,GX = arrays of length NG for work space. +! IRT = completion flag: +! IRT = 0 means no root was found. +! IRT = -1 means JOB = 1 and a root was found too near to T. +! IRT = 1 means a legitimate root was found (JOB = 2 or 3). +! On return, T0ST is the root location, and Y is the +! corresponding solution vector. +! T0ST = value of T at one endpoint of interval of interest. Only +! roots beyond T0ST in the direction of integration are sought. +! T0ST is input if JOB >= 2, and output in all cases. +! T0ST is updated by DVCHECK, whether a root is found or not. +! TLAST = last value of T returned by the solver (input only). +! TOUTC = copy of TOUT(input only). +! IRFND = input flag showing whether the last step taken had a root. +! IRFND = 1 if it did, = 0 if not. +! ITASKC = copy of ITASK (input only). +! NGC = copy of NG (input only). +! .. +! .. FIRST EXECUTABLE STATEMENT DVCHECK +! .. + IRT = 0 + JROOT(1:NGC) = 0 + HMING = (ABS(TN)+ABS(H))*UROUND*HUN + + GOTO (10,30,80) JOB + +! Evaluate g at initial T, and check for zero values. +10 CONTINUE + T0ST = TN + CALL G(NEQ,T0ST,Y,NGC,G0) + NGE = 1 + ZROOT = .FALSE. + DO I = 1, NGC + IF (ABS(G0(I))<=ZERO) ZROOT = .TRUE. + END DO + IF (.NOT.ZROOT) GOTO 20 +! g has a zero at T. Look at g at T + (small increment). +! TEMP1 = SIGN(HMING, H) +! T0ST = T0ST + TEMP1 +! TEMP2 = TEMP1 / H + TEMP2 = MAX(HMING/ABS(H),TENTH) + TEMP1 = TEMP2*H + T0ST = T0ST + TEMP1 + Y(1:N) = Y(1:N) + TEMP2*YH(1:N,2) + CALL G(NEQ,T0ST,Y,NGC,G0) + NGE = NGE + 1 + ZROOT = .FALSE. + DO I = 1, NGC + IF (ABS(G0(I))<=ZERO) ZROOT = .TRUE. + END DO + IF (.NOT.ZROOT) GOTO 20 +! g has a zero at T and also close to T. Take error return. + IRT = -1 + RETURN + +20 CONTINUE + RETURN + +30 CONTINUE + IF (IRFND==0) GOTO 70 +! If a root was found on the previous step, evaluate G0 = g(T0ST). + CALL DVINDY_CORE(T0ST,0,YH,NYH,Y,IFLAG) + IF (BOUNDS) THEN + DO I = 1, NDX + Y(IDX(I)) = MAX(Y(IDX(I)),LB(I)) + Y(IDX(I)) = MIN(Y(IDX(I)),UB(I)) + END DO + END IF + CALL G(NEQ,T0ST,Y,NGC,G0) + NGE = NGE + 1 + ZROOT = .FALSE. + DO I = 1, NGC + IF (ABS(G0(I))<=ZERO) ZROOT = .TRUE. + END DO + IF (.NOT.ZROOT) GOTO 70 +! g has a zero at T0ST. Look at g at T + (small increment). + TEMP1 = SIGN(HMING,H) + T0ST = T0ST + TEMP1 + IF ((T0ST-TN)*HZERO) GOTO 60 + JROOT(I) = 1 + ZROOT = .TRUE. +60 END DO + IF (.NOT.ZROOT) GOTO 70 +! g has a zero at T0ST and also close to T0ST. Return root. + IRT = 1 + RETURN +! G0 has no zero components. Proceed to check relevant interval. +70 IF (ABS(TN-TLAST)<=ZERO) GOTO 130 + +80 CONTINUE +! Set T1 to TN or TOUTC, whichever comes first, and get g at T1. + IF (ITASKC==2 .OR. ITASKC==3 .OR. ITASKC==5) GOTO 90 + IF ((TOUTC-TN)*H>=ZERO) GOTO 90 + T1 = TOUTC + IF ((T1-T0ST)*H<=ZERO) GOTO 130 + CALL DVINDY_CORE(T1,0,YH,NYH,Y,IFLAG) + IF (BOUNDS) THEN + DO I = 1, NDX + Y(IDX(I)) = MAX(Y(IDX(I)),LB(I)) + Y(IDX(I)) = MIN(Y(IDX(I)),UB(I)) + END DO + END IF + GOTO 100 +90 T1 = TN + DO I = 1, N + Y(I) = YH(I,1) + END DO +100 CALL G(NEQ,T1,Y,NGC,G1) + NGE = NGE + 1 +! Call DVROOTS to search for root in interval from T0ST to T1. + JFLAG = 0 +110 CONTINUE + CALL DVROOTS(NGC,HMING,JFLAG,T0ST,T1,G0,G1,GX,X) + IF (JFLAG>1) GOTO 120 + CALL DVINDY_CORE(X,0,YH,NYH,Y,IFLAG) + IF (BOUNDS) THEN + DO I = 1, NDX + Y(IDX(I)) = MAX(Y(IDX(I)),LB(I)) + Y(IDX(I)) = MIN(Y(IDX(I)),UB(I)) + END DO + END IF + CALL G(NEQ,X,Y,NGC,GX) + NGE = NGE + 1 + GOTO 110 +120 T0ST = X + CALL DCOPY_F90(NGC,GX,1,G0,1) + IF (JFLAG==4) GOTO 130 +! Found a root. Interpolate to X and return. + CALL DVINDY_CORE(X,0,YH,NYH,Y,IFLAG) + IF (BOUNDS) THEN + DO I = 1, NDX + Y(IDX(I)) = MAX(Y(IDX(I)),LB(I)) + Y(IDX(I)) = MIN(Y(IDX(I)),UB(I)) + END DO + END IF + IRT = 1 + RETURN +130 CONTINUE + RETURN + + END SUBROUTINE DVCHECK +!_______________________________________________________________________ + + SUBROUTINE DVROOTS(NG,HMIN,JFLAG,X0,X1,G0,G1,GX,X) +! .. +! Perform root finding for DVODE_F90. +! .. +! This is essentially subroutine DROOTS from LSODAR. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP), INTENT (IN) :: HMIN + REAL (WP), INTENT (INOUT) :: X, X0, X1 + INTEGER, INTENT (INOUT) :: JFLAG + INTEGER, INTENT (IN) :: NG +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: G0(NG), G1(NG), GX(NG) +! .. +! .. Local Scalars .. + REAL (WP) :: T2, TMAX + INTEGER :: I, IMXOLD, NXLAST + LOGICAL :: SGNCHG, XROOT, ZROOT +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, SIGN +! .. +! This subroutine finds the leftmost root of a set of arbitrary +! functions gi(x) (i = 1,...,NG) in an interval (X0,X1). Only roots +! of odd multiplicity (i.e. changes of sign of the gi) are found. +! Here the sign of X1 - X0 is arbitrary, but is constant for a given +! problem, and 'leftmost' means nearest to X0.The values of the +! vector-valued function g(x) = (gi, i=1...NG) are communicated +! through the call sequence of DVROOTS. The method used is the +! Illinois algorithm. + +! Reference: +! Kathie L. Hiebert and Lawrence F. Shampine, Implicitly Defined +! Output Points for Solutions of ODEs, Sandia Report SAND/80-0180, +! February 1980. + +! Description of parameters. + +! NG = number of functions gi, or the number of components of +! the vector valued function g(x). Input only. + +! HMIN = resolution parameter in X. Input only. When a root is +! found, it is located only to within an error of HMIN in X. +! Typically, HMIN should be set to something on the order of +! 100 * UROUND * MAX(ABS(X0),ABS(X1)), +! where UROUND is the unit roundoff of the machine. + +! JFLAG = integer flag for input and output communication. + +! On input, set JFLAG = 0 on the first call for the problem, +! and leave it unchanged until the problem is completed. +! (The problem is completed when JFLAG >= 2 on return.) + +! On output, JFLAG has the following values and meanings: +! JFLAG = 1 means DVROOTS needs a value of g(x). Set GX = g(X) +! and call DVROOTS again. +! JFLAG = 2 means a root has been found. The root is +! at X, and GX contains g(X). (Actually, X is the +! rightmost approximation to the root on an interval +! (X0,X1) of size HMIN or less.) +! JFLAG = 3 means X = X1 is a root, with one or more of the gi +! being zero at X1 and no sign changes in (X0,X1). +! GX contains g(X) on output. +! JFLAG = 4 means no roots (of odd multiplicity) were +! found in (X0,X1) (no sign changes). + +! X0,X1 = endpoints of the interval where roots are sought. +! X1 and X0 are input when JFLAG = 0 (first call), and +! must be left unchanged between calls until the problem is +! completed. X0 and X1 must be distinct, but X1 - X0 may be +! of either sign. However, the notion of 'left' and 'right' +! will be used to mean nearer to X0 or X1, respectively. +! When JFLAG >= 2 on return, X0 and X1 are output, and +! are the endpoints of the relevant interval. + +! G0,G1 = arrays of length NG containing the vectors g(X0) and g(X1), +! respectively. When JFLAG = 0, G0 and G1 are input and +! none of the G0(i) should be zero. +! When JFLAG >= 2 on return, G0 and G1 are output. + +! GX = array of length NG containing g(X). GX is input +! when JFLAG = 1, and output when JFLAG >= 2. + +! X = independent variable value. Output only. +! When JFLAG = 1 on output, X is the point at which g(x) +! is to be evaluated and loaded into GX. +! When JFLAG = 2 or 3, X is the root. +! When JFLAG = 4, X is the right endpoint of the interval, X1. + +! JROOT = integer array of length NG. Output only. +! When JFLAG = 2 or 3, JROOT indicates which components +! of g(x) have a root at X. JROOT(i) is 1 if the i-th +! component has a root, and JROOT(i) = 0 otherwise. +! .. +! .. FIRST EXECUTABLE STATEMENT DVROOTS +! .. + IF (JFLAG==1) GOTO 90 +! JFLAG /= 1. Check for change in sign of g or zero at X1. + IMAX = 0 + TMAX = ZERO + ZROOT = .FALSE. + DO 20 I = 1, NG + IF (ABS(G1(I))>ZERO) GOTO 10 + ZROOT = .TRUE. + GOTO 20 +! At this point, G0(i) has been checked and cannot be zero. +10 IF (ABS(SIGN(ONE,G0(I))-SIGN(ONE,G1(I)))<=ZERO) GOTO 20 + T2 = ABS(G1(I)/(G1(I)-G0(I))) + IF (T2<=TMAX) GOTO 20 + TMAX = T2 + IMAX = I +20 END DO + IF (IMAX>0) GOTO 30 + SGNCHG = .FALSE. + GOTO 40 +30 SGNCHG = .TRUE. +40 IF (.NOT.SGNCHG) GOTO 200 +! There is a sign change. Find the first root in the interval. + XROOT = .FALSE. + NXLAST = 0 + LAST = 1 + +! Repeat until the first root in the interval is found. Loop point. +50 CONTINUE + IF (XROOT) GOTO 170 + IF (NXLAST==LAST) GOTO 60 + ALPHA = ONE + GOTO 80 +60 IF (LAST==0) GOTO 70 + ALPHA = HALF*ALPHA + GOTO 80 +70 ALPHA = TWO*ALPHA +80 X2 = X1 - (X1-X0)*G1(IMAX)/(G1(IMAX)-ALPHA*G0(IMAX)) +! IF ((ABS(X2 - X0) < HMIN) .AND. (ABS(X1 - X0) > TEN * & +! HMIN)) X2 = X0 + PT1 * (X1 - X0) + +! If X2 is too close to X0 or X1, adjust it inward, +! by a fractional distance that is between 0.1 and 0.5. + IF (ABS(X2-X0)ZERO) GOTO 100 + ZROOT = .TRUE. + GOTO 110 +! Neither G0(i) nor GX(i) can be zero at this point. +100 IF (ABS(SIGN(ONE,G0(I))-SIGN(ONE,GX(I)))<=ZERO) GOTO 110 + T2 = ABS(GX(I)/(GX(I)-G0(I))) + IF (T2<=TMAX) GOTO 110 + TMAX = T2 + IMAX = I +110 END DO + IF (IMAX>0) GOTO 120 + SGNCHG = .FALSE. + IMAX = IMXOLD + GOTO 130 +120 SGNCHG = .TRUE. +130 NXLAST = LAST + IF (.NOT.SGNCHG) GOTO 140 +! Sign change between X0 and X2, so replace X1 with X2. + X1 = X2 + CALL DCOPY_F90(NG,GX,1,G1,1) + LAST = 1 + XROOT = .FALSE. + GOTO 160 +140 IF (.NOT.ZROOT) GOTO 150 +! Zero value at X2 and no sign change in (X0,X2), so X2 is a root. + X1 = X2 + CALL DCOPY_F90(NG,GX,1,G1,1) + XROOT = .TRUE. + GOTO 160 +! No sign change between X0 and X2. Replace X0 with X2. +150 CONTINUE + CALL DCOPY_F90(NG,GX,1,G0,1) + X0 = X2 + LAST = 0 + XROOT = .FALSE. +160 IF (ABS(X1-X0)<=HMIN) XROOT = .TRUE. + GOTO 50 + +! Return with X1 as the root. Set JROOT. Set X = X1 and GX = G1. +170 JFLAG = 2 + X = X1 + CALL DCOPY_F90(NG,G1,1,GX,1) + DO 190 I = 1, NG + JROOT(I) = 0 + IF (ABS(G1(I))>ZERO) GOTO 180 + JROOT(I) = 1 + GOTO 190 +180 IF (ABS(SIGN(ONE,G0(I))-SIGN(ONE,G1(I)))>ZERO) JROOT(I) = 1 +190 END DO + RETURN + +! No sign change in the interval. Check for zero at right endpoint. +200 IF (.NOT.ZROOT) GOTO 210 + +! Zero value at X1 and no sign change in (X0,X1). Return JFLAG = 3. + X = X1 + CALL DCOPY_F90(NG,G1,1,GX,1) + DO I = 1, NG + JROOT(I) = 0 + IF (ABS(G1(I))<=ZERO) JROOT(I) = 1 + END DO + JFLAG = 3 + RETURN + +! No sign changes in this interval. Set X = X1, return JFLAG = 4. +210 CALL DCOPY_F90(NG,G1,1,GX,1) + X = X1 + JFLAG = 4 + RETURN + + END SUBROUTINE DVROOTS +!_______________________________________________________________________ + + SUBROUTINE DVNRDP(Y,IYDIM,NEQN,NQ) +! .. +! Retract the Nordsieck array (undo prediction). +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: IYDIM, NEQN, NQ +! .. +! .. Array Arguments .. + REAL (WP) :: Y(*) +! .. +! .. Local Scalars .. + INTEGER :: I, J, J1, J2 +! .. +! .. FIRST EXECUTABLE STATEMENT DVNRDP +! .. + DO J1 = 1, NQ + DO J2 = J1, NQ + J = (NQ+J1) - J2 + DO I = 1, NEQN +! Original: +! Y(I,J) = Y(I,J) + Y(I,J+1) + Y(I+(J-1)*IYDIM) = Y(I+(J-1)*IYDIM) + Y(I+J*IYDIM) + END DO + END DO + END DO + RETURN + + END SUBROUTINE DVNRDP +!_______________________________________________________________________ + + SUBROUTINE DVNRDN(Y,IYDIM,NEQN,NQ) +! .. +! Apply the Nordsieck array (predict). +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: IYDIM, NEQN, NQ +! .. +! .. Array Arguments .. + REAL (WP) :: Y(*) +! .. +! .. Local Scalars .. + INTEGER :: I, J, J1, J2 +! .. +! .. FIRST EXECUTABLE STATEMENT DVNRDN +! .. + DO J1 = 1, NQ + DO J2 = J1, NQ + J = (NQ+J1) - J2 + DO I = 1, NEQN +! Original: +! Y(I,J) = Y(I,J) - Y(I,J+1) + Y(I+(J-1)*IYDIM) = Y(I+(J-1)*IYDIM) - Y(I+J*IYDIM) + END DO + END DO + END DO + RETURN + + END SUBROUTINE DVNRDN +!_______________________________________________________________________ + + SUBROUTINE DVNRDS(Y,IYDIM,NEQN,L,RH) +! .. +! Scale the Nordsieck array. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP) :: RH + INTEGER :: IYDIM, L, NEQN +! .. +! .. Array Arguments .. + REAL (WP) :: Y(*) +! .. +! .. Local Scalars .. + REAL (WP) :: R1 + INTEGER :: I, J +! .. +! .. FIRST EXECUTABLE STATEMENT DVNRDS +! .. + R1 = ONE + DO J = 2, L + R1 = R1*RH + DO I = 1, NEQN +! Original: +! Y(I,J) = Y(I,J)*R1 + Y(I+(J-1)*IYDIM) = Y(I+(J-1)*IYDIM)*R1 + END DO + END DO + RETURN + + END SUBROUTINE DVNRDS +!_______________________________________________________________________ + + SUBROUTINE RELEASE_ARRAYS +! .. +! Deallocate any allocated arrays and determine how much storage was +! used (not called by DVODE_F90). +! .. + IMPLICIT NONE +! .. +! .. Local Scalars .. + INTEGER :: IER, II, IR + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ALLOCATED, SIZE +! .. +!_______________________________________________________________________ +! *****MA48 build change point. Insert these statements. +! INTEGER INFO +! INTEGER ISIZE +! COMMON /MA48SIZE/ ISIZE +!_______________________________________________________________________ +! .. +! .. FIRST EXECUTABLE STATEMENT RELEASE_ARRAYS +! .. + IR = 0 + IF (ALLOCATED(ACOR)) THEN + IR = IR + SIZE(ACOR) + DEALLOCATE (ACOR,STAT=IER) + CALL CHECK_STAT(IER,880) + END IF + IF (ALLOCATED(CSCALEX)) THEN + IR = IR + SIZE(CSCALEX) + SIZE(RSCALEX) + SIZE(WSCALEX) + DEALLOCATE (CSCALEX,RSCALEX,WSCALEX,STAT=IER) + CALL CHECK_STAT(IER,890) + END IF + IF (ALLOCATED(DTEMP)) THEN + IR = IR + SIZE(DTEMP) + DEALLOCATE (DTEMP,STAT=IER) + CALL CHECK_STAT(IER,900) + END IF + IF (ALLOCATED(EWT)) THEN + IR = IR + SIZE(EWT) + DEALLOCATE (EWT,STAT=IER) + CALL CHECK_STAT(IER,910) + END IF + IF (ALLOCATED(FACDS)) THEN + IR = IR + SIZE(FACDS) + DEALLOCATE (FACDS,STAT=IER) + CALL CHECK_STAT(IER,920) + END IF + IF (ALLOCATED(FPTEMP)) THEN + IR = IR + SIZE(FPTEMP) + DEALLOCATE (FPTEMP,STAT=IER) + CALL CHECK_STAT(IER,930) + END IF + IF (ALLOCATED(FTEMP)) THEN + IR = IR + SIZE(FTEMP) + DEALLOCATE (FTEMP,STAT=IER) + CALL CHECK_STAT(IER,940) + END IF + IF (ALLOCATED(FTEMP1)) THEN + IR = IR + SIZE(FTEMP1) + DEALLOCATE (FTEMP1,STAT=IER) + CALL CHECK_STAT(IER,950) + END IF + IF (ALLOCATED(G0)) THEN + IR = IR + SIZE(G0) + DEALLOCATE (G0,STAT=IER) + CALL CHECK_STAT(IER,960) + END IF + IF (ALLOCATED(G1)) THEN + IR = IR + SIZE(G1) + DEALLOCATE (G1,STAT=IER) + CALL CHECK_STAT(IER,970) + END IF + IF (ALLOCATED(GX)) THEN + IR = IR + SIZE(GX) + DEALLOCATE (GX,STAT=IER) + CALL CHECK_STAT(IER,980) + END IF + IF (ALLOCATED(JMAT)) THEN + IR = IR + SIZE(JMAT) + DEALLOCATE (JMAT,STAT=IER) + CALL CHECK_STAT(IER,990) + END IF + IF (ALLOCATED(LB)) THEN + IR = IR + SIZE(LB) + DEALLOCATE (LB,STAT=IER) + CALL CHECK_STAT(IER,1000) + END IF + IF (ALLOCATED(UB)) THEN + IR = IR + SIZE(UB) + DEALLOCATE (UB,STAT=IER) + CALL CHECK_STAT(IER,1000) + END IF + IF (ALLOCATED(PMAT)) THEN + IR = IR + SIZE(PMAT) + DEALLOCATE (PMAT,STAT=IER) + CALL CHECK_STAT(IER,1010) + END IF + IF (ALLOCATED(RWORK)) THEN + IR = IR + SIZE(RWORK) + DEALLOCATE (RWORK,STAT=IER) + CALL CHECK_STAT(IER,1020) + END IF + IF (ALLOCATED(SAVF)) THEN + IR = IR + SIZE(SAVF) + DEALLOCATE (SAVF,STAT=IER) + CALL CHECK_STAT(IER,1030) + END IF + IF (ALLOCATED(YMAX)) THEN + IR = IR + SIZE(YMAX) + DEALLOCATE (YMAX,STAT=IER) + CALL CHECK_STAT(IER,1040) + END IF + IF (ALLOCATED(WM)) THEN + IR = IR + SIZE(WM) + DEALLOCATE (WM,STAT=IER) + CALL CHECK_STAT(IER,1050) + END IF + IF (ALLOCATED(YHNQP2)) THEN + IR = IR + SIZE(YHNQP2) + DEALLOCATE (YHNQP2,STAT=IER) + CALL CHECK_STAT(IER,1060) + END IF + IF (ALLOCATED(YHTEMP)) THEN + IR = IR + SIZE(YHTEMP) + DEALLOCATE (YHTEMP,STAT=IER) + CALL CHECK_STAT(IER,1070) + END IF + IF (ALLOCATED(YMAX)) THEN + IR = IR + SIZE(YMAX) + DEALLOCATE (YMAX,STAT=IER) + CALL CHECK_STAT(IER,1080) + END IF + IF (ALLOCATED(YNNEG)) THEN + IR = IR + SIZE(YNNEG) + DEALLOCATE (YNNEG,STAT=IER) + CALL CHECK_STAT(IER,1090) + END IF + IF (ALLOCATED(YSCALEDS)) THEN + IR = IR + SIZE(YSCALEDS) + DEALLOCATE (YSCALEDS,STAT=IER) + CALL CHECK_STAT(IER,1100) + END IF + IF (ALLOCATED(YTEMP)) THEN + IR = IR + SIZE(YTEMP) + DEALLOCATE (YTEMP,STAT=IER) + CALL CHECK_STAT(IER,1110) + END IF + IF (ALLOCATED(WKDS)) THEN + IR = IR + SIZE(WKDS) + DEALLOCATE (WKDS,STAT=IER) + CALL CHECK_STAT(IER,1120) + END IF + II = 0 + IF (ALLOCATED(BIGP)) THEN + II = II + SIZE(BIGP) + DEALLOCATE (BIGP,STAT=IER) + CALL CHECK_STAT(IER,1130) + END IF + IF (ALLOCATED(BJGP)) THEN + II = II + SIZE(BJGP) + DEALLOCATE (BJGP,STAT=IER) + CALL CHECK_STAT(IER,1140) + END IF + IF (ALLOCATED(IA)) THEN + II = II + SIZE(IA) + DEALLOCATE (IA,STAT=IER) + CALL CHECK_STAT(IER,1150) + END IF + IF (ALLOCATED(IAB)) THEN + II = II + SIZE(IAB) + DEALLOCATE (IAB,STAT=IER) + CALL CHECK_STAT(IER,1160) + END IF + IF (ALLOCATED(IAN)) THEN + II = II + SIZE(IAN) + DEALLOCATE (IAN,STAT=IER) + CALL CHECK_STAT(IER,1170) + END IF + IF (ALLOCATED(ICN)) THEN + II = II + SIZE(ICN) + DEALLOCATE (ICN,STAT=IER) + CALL CHECK_STAT(IER,1180) + END IF + IF (ALLOCATED(IDX)) THEN + II = II + SIZE(IDX) + DEALLOCATE (IDX,STAT=IER) + CALL CHECK_STAT(IER,1190) + END IF + IF (ALLOCATED(IGP)) THEN + II = II + SIZE(IGP) + DEALLOCATE (IGP,STAT=IER) + CALL CHECK_STAT(IER,1200) + END IF + IF (ALLOCATED(IKEEP28)) THEN + II = II + SIZE(IKEEP28,1)*SIZE(IKEEP28,2) + DEALLOCATE (IKEEP28,STAT=IER) + CALL CHECK_STAT(IER,1210) + END IF + IF (ALLOCATED(INDCOLDS)) THEN + II = II + SIZE(INDCOLDS) + DEALLOCATE (INDCOLDS,STAT=IER) + CALL CHECK_STAT(IER,1220) + END IF + IF (ALLOCATED(INDROWDS)) THEN + II = II + SIZE(INDROWDS) + DEALLOCATE (INDROWDS,STAT=IER) + CALL CHECK_STAT(IER,1230) + END IF + IF (ALLOCATED(IOPTDS)) THEN + II = II + SIZE(IOPTDS) + DEALLOCATE (IOPTDS,STAT=IER) + CALL CHECK_STAT(IER,1240) + END IF + IF (ALLOCATED(IPNTRDS)) THEN + II = II + SIZE(IPNTRDS) + DEALLOCATE (IPNTRDS,STAT=IER) + CALL CHECK_STAT(IER,1250) + END IF + IF (ALLOCATED(IWADS)) THEN + II = II + SIZE(IWADS) + DEALLOCATE (IWADS,STAT=IER) + CALL CHECK_STAT(IER,1260) + END IF + IF (ALLOCATED(IWKDS)) THEN + II = II + SIZE(IWKDS) + DEALLOCATE (IWKDS,STAT=IER) + CALL CHECK_STAT(IER,1270) + END IF + IF (ALLOCATED(IWORK)) THEN + II = II + SIZE(IWORK) + DEALLOCATE (IWORK,STAT=IER) + CALL CHECK_STAT(IER,1280) + END IF + IF (ALLOCATED(IW28)) THEN + II = II + SIZE(IW28,1)*SIZE(IW28,2) + DEALLOCATE (IW28,STAT=IER) + CALL CHECK_STAT(IER,1290) + END IF + IF (ALLOCATED(JA)) THEN + II = II + SIZE(JA) + DEALLOCATE (JA,STAT=IER) + CALL CHECK_STAT(IER,1300) + END IF + IF (ALLOCATED(JAB)) THEN + II = II + SIZE(JAB) + DEALLOCATE (JAB,STAT=IER) + CALL CHECK_STAT(IER,1310) + END IF + IF (ALLOCATED(JAN)) THEN + II = II + SIZE(JAN) + DEALLOCATE (JAN,STAT=IER) + CALL CHECK_STAT(IER,1320) + END IF + IF (ALLOCATED(JATEMP)) THEN + II = II + SIZE(JATEMP) + DEALLOCATE (JATEMP,STAT=IER) + CALL CHECK_STAT(IER,1330) + END IF + IF (ALLOCATED(JGP)) THEN + II = II + SIZE(JGP) + DEALLOCATE (JGP,STAT=IER) + CALL CHECK_STAT(IER,1340) + END IF + IF (ALLOCATED(JPNTRDS)) THEN + II = II + SIZE(JPNTRDS) + DEALLOCATE (JPNTRDS,STAT=IER) + CALL CHECK_STAT(IER,1350) + END IF + IF (ALLOCATED(JROOT)) THEN + II = II + SIZE(JROOT) + DEALLOCATE (JROOT,STAT=IER) + CALL CHECK_STAT(IER,1360) + END IF + IF (ALLOCATED(JVECT)) THEN + II = II + SIZE(JVECT) + DEALLOCATE (JVECT,STAT=IER) + CALL CHECK_STAT(IER,1370) + END IF + IF (ALLOCATED(NGRPDS)) THEN + II = II + SIZE(NGRPDS) + DEALLOCATE (NGRPDS,STAT=IER) + CALL CHECK_STAT(IER,1380) + END IF + IF (ALLOCATED(SUBDS)) THEN + II = II + SIZE(SUBDS) + DEALLOCATE (SUBDS,STAT=IER) + CALL CHECK_STAT(IER,1390) + END IF + IF (ALLOCATED(SUPDS)) THEN + II = II + SIZE(SUPDS) + DEALLOCATE (SUPDS,STAT=IER) + CALL CHECK_STAT(IER,1400) + END IF +!_______________________________________________________________________ +! *****MA48 build change point. Insert these statements. +! IF (MA48_WAS_USED) THEN +! CALL MA48_FINALIZE(FACTORS,CONTROL,INFO) +! IF (INFO /= 0) THEN +! MSG = 'The call to MA48_FINALIZE FAILED.' +! CALL XERRDV(MSG,1740,1,1,II,0,0,ZERO,ZERO) +! END IF +! MSG = 'Size of MA48 deallocated arrays (I1) = ' +! CALL XERRDV(MSG,1750,1,1,ISIZE,0,0,ZERO,ZERO) +! END IF +!_______________________________________________________________________ + +! Print the amount of storage used. + MSG = 'I1 = Total length of REAL arrays used.' + CALL XERRDV(MSG,1760,1,1,IR,0,0,ZERO,ZERO) + MSG = 'I1 = Total length of INTEGER arrays used.' + CALL XERRDV(MSG,1760,1,1,II,0,0,ZERO,ZERO) + +! In case DVODE_F90 is subsequently called: + OPTS_CALLED = .FALSE. + RETURN + + END SUBROUTINE RELEASE_ARRAYS +! End of DVODE_F90 subroutines +!_______________________________________________________________________ + +! Beginning of LINPACK and BLAS subroutines + SUBROUTINE DGEFA_F90(A,LDA,N,IPVT,INFO) +! .. +! Factor a matrix using Gaussian elimination. +! .. +! DGEFA_F90 factors a real(wp) matrix by Gaussian elimination. +! DGEFA_F90 is usually called by DGECO, but it can be called +! directly with a saving in time if RCOND is not needed. +! (Time for DGECO) = (1 + 9/N)*(Time for DGEFA_F90). +! On Entry +! A REAL(KIND=WP)(LDA, N) +! the matrix to be factored. +! LDA INTEGER +! the leading dimension of the array A. +! N INTEGER +! the order of the matrix A. +! On Return +! A an upper triangular matrix and the multipliers +! which were used to obtain it. +! The factorization can be written A = L*U where +! L is a product of permutation and unit lower +! triangular matrices and U is upper triangular. +! IPVT INTEGER(N) +! an integer vector of pivot indices. +! INFO INTEGER +! = 0 normal value. +! = K if U(K,K) == 0.0. This is not an error +! condition for this subroutine, but it does +! indicate that DGESL_F90 or DGEDI will divide +! by zero if called. Use RCOND in DGECO for a +! reliable indication of singularity. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (INOUT) :: INFO + INTEGER, INTENT (IN) :: LDA, N +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: A(LDA,*) + INTEGER, INTENT (INOUT) :: IPVT(*) +! .. +! .. Local Scalars .. + REAL (WP) :: T + INTEGER :: J, K, KP1, L, NM1 +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS +! .. +! .. FIRST EXECUTABLE STATEMENT DGEFA_F90 +! .. + INFO = 0 + NM1 = N - 1 + IF (NM1<1) GOTO 50 + DO K = 1, NM1 + KP1 = K + 1 + +! Find L = pivot index. + +! Original: +! L = IDAMAX_F90(N-K+1,A(K,K),1) + K - 1 + L = IDAMAX_F90(N-K+1,A(K:N,K),1) + K - 1 + IPVT(K) = L + +! Zero pivot implies this column already triangularized. + +! IF (A(L, K) == ZERO) GOTO 40 + IF (ABS(A(L,K))<=ZERO) GOTO 30 + +! Interchange if necessary. + + IF (L==K) GOTO 10 + T = A(L,K) + A(L,K) = A(K,K) + A(K,K) = T +10 CONTINUE + +! Compute multipliers. + + T = -ONE/A(K,K) +! Original: +! CALL DSCAL_F90(N-K,T,A(K+1,K),1) + CALL DSCAL_F90(N-K,T,A(K+1:N,K),1) + +! Row elimination with column indexing. + + DO J = KP1, N + T = A(L,J) + IF (L==K) GOTO 20 + A(L,J) = A(K,J) + A(K,J) = T +20 CONTINUE +! Original: +! CALL DAXPY_F90(N-K,T,A(K+1,K),1,A(K+1,J),1) + CALL DAXPY_F90(N-K,T,A(K+1:N,K),1,A(K+1:N,J),1) + END DO + GOTO 40 +30 CONTINUE + INFO = K +40 CONTINUE + END DO +50 CONTINUE + IPVT(N) = N +! IF (A(N, N) == ZERO) INFO = N + IF (ABS(A(N,N))<=ZERO) INFO = N + RETURN + + END SUBROUTINE DGEFA_F90 +!_______________________________________________________________________ + + SUBROUTINE DGESL_F90(A,LDA,N,IPVT,B,JOB) +! .. +! Solve the real system A*X=B or TRANS(A)*X=B using the factors +! computed by DGECO or DGEFA_F90. +! .. +! DGESL_F90 solves the real(wp) system +! A * X = B or TRANS(A) * X = B +! using the factors computed by DGECO or DGEFA_F90. +! On Entry +! A REAL(KIND=WP)(LDA, N) +! the output from DGECO or DGEFA_F90. +! LDA INTEGER +! the leading dimension of the array A. +! N INTEGER +! the order of the matrix A. +! IPVT INTEGER(N) +! the pivot vector from DGECO or DGEFA_F90. +! B REAL(KIND=WP)(N) +! the right hand side vector. +! JOB INTEGER +! = 0 to solve A*X = B, +! = nonzero to solve TRANS(A)*X = B where +! TRANS(A)is the transpose. +! On Return +! B the solution vector X. +! Error Condition +! A division by zero will occur if the input factor contains a +! zero on the diagonal. Technically this indicates singularity +! but it is often caused by improper arguments or improper +! setting of LDA. It will not occur if the subroutines are +! called correctly and if DGECO has set RCOND > 0.0 or +! DGEFA_F90 has set INFO == 0. +! To compute INVERSE(A) * C where C is a matrix +! with P columns +! CALL DGECO(A,LDA,N,IPVT,RCOND,Z) +! IF (RCOND is too small) GOTO +! DO J = 1, P +! CALL DGESL_F90(A,LDA,N,IPVT,C(1,J),0) +! END DO +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: JOB, LDA, N +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: A(LDA,*), B(*) + INTEGER, INTENT (INOUT) :: IPVT(*) +! .. +! .. Local Scalars .. + REAL (WP) :: T + INTEGER :: K, KB, L, NM1 +! .. +! .. FIRST EXECUTABLE STATEMENT DGESL_F90 +! .. + NM1 = N - 1 + IF (JOB/=0) GOTO 30 + +! JOB = 0, solve A*X = B. +! First solve L*Y = B. + + IF (NM1<1) GOTO 20 + DO K = 1, NM1 + L = IPVT(K) + T = B(L) + IF (L==K) GOTO 10 + B(L) = B(K) + B(K) = T +10 CONTINUE +! Original: +! CALL DAXPY_F90(N-K,T,A(K+1,K),1,B(K+1),1) + CALL DAXPY_F90(N-K,T,A(K+1:N,K),1,B(K+1:N),1) + END DO +20 CONTINUE + +! Now solve U*X = Y. + + DO KB = 1, N + K = N + 1 - KB + B(K) = B(K)/A(K,K) + T = -B(K) +! Original: +! CALL DAXPY_F90(K-1,T,A(1,K),1,B(1),1) + CALL DAXPY_F90(K-1,T,A(1:K-1,K),1,B(1:K-1),1) + END DO + GOTO 60 +30 CONTINUE + +! JOB /= 0, solve TRANS(A)*X = B. +! First solve TRANS(U)*Y = B. + + DO K = 1, N + T = DDOT_F90(K-1,A(1,K),1,B(1),1) + B(K) = (B(K)-T)/A(K,K) + END DO + +! Now solve TRANS(L)*X = Y. + + IF (NM1<1) GOTO 50 + DO KB = 1, NM1 + K = N - KB + B(K) = B(K) + DDOT_F90(N-K,A(K+1,K),1,B(K+1),1) + L = IPVT(K) + IF (L==K) GOTO 40 + T = B(L) + B(L) = B(K) + B(K) = T +40 CONTINUE + END DO +50 CONTINUE +60 CONTINUE + RETURN + + END SUBROUTINE DGESL_F90 +!_______________________________________________________________________ + + SUBROUTINE DGBFA_F90(ABD,LDA,N,ML,MU,IPVT,INFO) +! .. +! Factor a banded matrix using Gaussian elimination. +! .. +! DGBFA_F90 factors a real(wp) band matrix by elimination. +! DGBFA_F90 is usually called by DGBCO, but it can be called +! directly with a saving in time if RCOND is not needed. +! On Entry +! ABD REAL(KIND=WP)(LDA, N) +! contains the matrix in band storage. The columns +! of the matrix are stored in the columns of ABD and +! the diagonals of the matrix are stored in rows +! ML+1 through 2*ML+MU+1 of ABD. +! See the comments below for details. +! LDA INTEGER +! the leading dimension of the array ABD. +! LDA must be >= 2*ML + MU + 1. +! N INTEGER +! the order of the original matrix. +! ML INTEGER +! number of diagonals below the main diagonal. +! 0 <= ML < N. +! MU INTEGER +! number of diagonals above the main diagonal. +! 0 <= MU < N. +! More efficient if ML <= MU. +! On Return +! ABD an upper triangular matrix in band storage and +! the multipliers which were used to obtain it. +! The factorization can be written A = L*U where +! L is a product of permutation and unit lower +! triangular matrices and U is upper triangular. +! IPVT INTEGER(N) +! an integer vector of pivot indices. +! INFO INTEGER +! = 0 normal value. +! = K if U(K,K) == 0.0. This is not an error +! condition for this subroutine, but it does +! indicate that DGBSL_F90 will divide by zero +! if called. Use RCOND in DGBCO for a reliable +! indication of singularity. +! Band Storage +! If A is a band matrix, the following program segment +! will set up the input. +! ML = (band width below the diagonal) +! MU = (band width above the diagonal) +! M = ML + MU + 1 +! DO J = 1, N +! I1 = MAX(1, J-MU) +! I2 = MIN(N, J+ML) +! DO 10 I = I1, I2 +! K = I - J + M +! ABD(K,J) = A(I,J) +! END DO +! END DO +! This uses rows ML+1 through 2*ML+MU+1 of ABD. +! In addition, the first ML rows in ABD are used for +! elements generated during the triangularization. +! The total number of rows needed in ABD is 2*ML+MU+1. +! The ML+MU by ML+MU upper left triangle and the +! ML by ML lower right triangle are not referenced. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (INOUT) :: INFO + INTEGER, INTENT (IN) :: LDA, ML, MU, N +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: ABD(LDA,*) + INTEGER, INTENT (INOUT) :: IPVT(*) +! .. +! .. Local Scalars .. + REAL (WP) :: T + INTEGER :: I0, J, J0, J1, JU, JZ, K, KP1, L, LM, M, MM, NM1 +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +! .. +! .. FIRST EXECUTABLE STATEMENT DGBFA_F90 +! .. + M = ML + MU + 1 + INFO = 0 + +! Zero initial fill-in columns. + + J0 = MU + 2 + J1 = MIN(N,M) - 1 + IF (J1N) GOTO 20 + IF (ML<1) GOTO 20 + ABD(1:ML,JZ) = ZERO +20 CONTINUE + +! Find L = pivot index. + + LM = MIN(ML,N-K) +! Original: +! L = IDAMAX_F90(LM+1,ABD(M,K),1) + M - 1 + L = IDAMAX_F90(LM+1,ABD(M:M+LM,K),1) + M - 1 + + IPVT(K) = L + K - M + +! Zero pivot implies this column already triangularized. + +! IF (ABD(L, K) == ZERO) GOTO 100 + IF (ABS(ABD(L,K))<=ZERO) GOTO 60 + +! Interchange if necessary. + + IF (L==M) GOTO 30 + T = ABD(L,K) + ABD(L,K) = ABD(M,K) + ABD(M,K) = T +30 CONTINUE + +! Compute multipliers. + + T = -ONE/ABD(M,K) +! Original: +! CALL DSCAL_F90(LM,T,ABD(M+1,K),1) + CALL DSCAL_F90(LM,T,ABD(M+1:M+LM,K),1) + +! Row elimination with column indexing. + + JU = MIN(MAX(JU,MU+IPVT(K)),N) + MM = M + IF (JU 0.0 +! or DGBFA_F90 has set INFO == 0 . +! To compute INVERSE(A) * C where C is a matrix +! with P columns +! CALL DGBCO(ABD,LDA,N,ML,MU,IPVT,RCOND,Z) +! IF (RCOND is too small) GOTO ... +! DO J = 1, P +! CALL DGBSL_F90(ABD,LDA,N,ML,MU,IPVT,C(1,J),0) +! END DO +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: JOB, LDA, ML, MU, N +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: ABD(LDA,*), B(*) + INTEGER, INTENT (INOUT) :: IPVT(*) +! .. +! .. Local Scalars .. + REAL (WP) :: T + INTEGER :: K, KB, L, LA, LB, LM, M, NM1 +! .. +! .. Intrinsic Functions .. + INTRINSIC MIN +! .. +! .. FIRST EXECUTABLE STATEMENT DGBSL_F90 +! .. + M = MU + ML + 1 + NM1 = N - 1 + IF (JOB/=0) GOTO 30 + +! JOB = 0, solve A*X = B. +! First solve L*Y = B. + + IF (ML==0) GOTO 20 + IF (NM1<1) GOTO 20 + DO K = 1, NM1 + LM = MIN(ML,N-K) + L = IPVT(K) + T = B(L) + IF (L==K) GOTO 10 + B(L) = B(K) + B(K) = T +10 CONTINUE +! Original: +! CALL DAXPY_F90(LM,T,ABD(M+1,K),1,B(K+1),1) + CALL DAXPY_F90(LM,T,ABD(M+1:M+LM,K),1,B(K+1:K+LM),1) + END DO +20 CONTINUE + +! Now solve U*X = Y. + + DO KB = 1, N + K = N + 1 - KB + B(K) = B(K)/ABD(M,K) + LM = MIN(K,M) - 1 + LA = M - LM + LB = K - LM + T = -B(K) +! Original: +! CALL DAXPY_F90(LM,T,ABD(LA,K),1,B(LB),1) + CALL DAXPY_F90(LM,T,ABD(LA:LA+LM-1,K),1,B(LB:LB+LM-1),1) + END DO + GOTO 60 +30 CONTINUE + +! JOB /= 0, solve TRANS(A)*X = B. +! First solve TRANS(U)*Y = B. + + DO K = 1, N + LM = MIN(K,M) - 1 + LA = M - LM + LB = K - LM +! Original: +! T = DDOT_F90(LM,ABD(LA,K),1,B(LB),1) + T = DDOT_F90(LM,ABD(LA:LA+LM-1,K),1,B(LB:LB+LM-1),1) + B(K) = (B(K)-T)/ABD(M,K) + END DO + +! Now solve TRANS(L)*X = Y. + + IF (ML==0) GOTO 50 + IF (NM1<1) GOTO 50 + DO KB = 1, NM1 + K = N - KB + LM = MIN(ML,N-K) +! Original: +! B(K) = B(K) + DDOT_F90(LM,ABD(M+1,K),1,B(K+1),1) + B(K) = B(K) + DDOT_F90(LM,ABD(M+1:M+LM,K),1,B(K+1:K+LM),1) + L = IPVT(K) + IF (L==K) GOTO 40 + T = B(L) + B(L) = B(K) + B(K) = T +40 CONTINUE + END DO +50 CONTINUE +60 CONTINUE + RETURN + + END SUBROUTINE DGBSL_F90 +!_______________________________________________________________________ + + SUBROUTINE DAXPY_F90(N,DA,DX,INCX,DY,INCY) +! .. +! Compute a constant times a vector plus a vector. +! .. +! Description of Parameters +! Input: +! N - number of elements in input vector(s) +! DA - real(wp) scalar multiplier +! DX - real(wp) vector with N elements +! INCX - storage spacing between elements of DX +! DY - real(wp) vector with N elements +! INCY - storage spacing between elements of DY +! Output: +! DY - real(wp) result (unchanged if N <= 0) +! Overwrite real(wp) DY with real(wp) DA*DX + DY. +! For I = 0 to N-1, replace DY(LY+I*INCY) with +! DA*DX(LX+I*INCX) + DY(LY+I*INCY), +! where LX = 1 if INCX >= 0, else LX = 1+(1-N)*INCX, +! and LY is defined in a similar way using INCY. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP), INTENT (IN) :: DA + INTEGER, INTENT (IN) :: INCX, INCY, N +! .. +! .. Array Arguments .. + REAL (WP), INTENT (IN) :: DX(*) + REAL (WP), INTENT (INOUT) :: DY(*) +! .. +! .. Local Scalars .. + INTEGER :: I, IX, IY, M, MP1, NS +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MOD +! .. +! .. FIRST EXECUTABLE STATEMENT DAXPY_F90 +! .. +! IF (N <= 0 .OR. DA == ZERO) RETURN + IF (N<=0 .OR. ABS(DA)<=ZERO) RETURN +! IF (INCX==INCY) IF (INCX-1) 10, 20, 40 + IF (INCX == INCY) THEN + IF (INCX < 1) THEN + GOTO 10 + ELSEIF (INCX == 1) THEN + GOTO 20 + ELSE + GOTO 40 + END IF + END IF + +! Code for unequal or nonpositive increments. + +10 IX = 1 + IY = 1 + IF (INCX<0) IX = (-N+1)*INCX + 1 + IF (INCY<0) IY = (-N+1)*INCY + 1 + DO I = 1, N + DY(IY) = DY(IY) + DA*DX(IX) + IX = IX + INCX + IY = IY + INCY + END DO + RETURN + +! Code for both increments equal to 1. + +! Clean-up loop so remaining vector length is a multiple of 4. + +20 M = MOD(N,4) + IF (M==0) GOTO 30 + DY(1:M) = DY(1:M) + DA*DX(1:M) + IF (N<4) RETURN +30 MP1 = M + 1 + DO I = MP1, N, 4 + DY(I) = DY(I) + DA*DX(I) + DY(I+1) = DY(I+1) + DA*DX(I+1) + DY(I+2) = DY(I+2) + DA*DX(I+2) + DY(I+3) = DY(I+3) + DA*DX(I+3) + END DO + RETURN + +! Code for equal, positive, non-unit increments. + +40 NS = N*INCX + DO I = 1, NS, INCX + DY(I) = DA*DX(I) + DY(I) + END DO + RETURN + + END SUBROUTINE DAXPY_F90 +!_______________________________________________________________________ + + SUBROUTINE DCOPY_F90(N,DX,INCX,DY,INCY) +! .. +! Copy a vector to another vector. +! .. +! Description of Parameters +! Input: +! N - number of elements in input vector(s) +! DX - real(wp) vector with N elements +! INCX - storage spacing between elements of DX +! DY - real(wp) vector with N elements +! INCY - storage spacing between elements of DY +! Output: +! DY - copy of vector DX(unchanged if N <= 0) +! Copy real(wp) DX to real(wp) DY. +! For I = 0 to N-1, copy DX(LX+I*INCX) to DY(LY+I*INCY), +! where LX = 1 if INCX >= 0, else LX = 1+(1-N)*INCX, +! and LY is defined in a similar way using INCY. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: INCX, INCY, N +! .. +! .. Array Arguments .. + REAL (WP), INTENT (IN) :: DX(*) + REAL (WP), INTENT (INOUT) :: DY(*) +! .. +! .. Local Scalars .. + INTEGER :: I, IX, IY, M, MP1, NS +! .. +! .. Intrinsic Functions .. + INTRINSIC MOD +! .. +! .. FIRST EXECUTABLE STATEMENT DCOPY_F90 +! .. + IF (N<=0) RETURN +! IF (INCX==INCY) IF (INCX-1) 10, 20, 40 + IF (INCX == INCY) THEN + IF (INCX < 1) THEN + GOTO 10 + ELSEIF (INCX == 1) THEN + GOTO 20 + ELSE + GOTO 40 + END IF + END IF + +! Code for unequal or nonpositive increments. + +10 IX = 1 + IY = 1 + IF (INCX<0) IX = (-N+1)*INCX + 1 + IF (INCY<0) IY = (-N+1)*INCY + 1 + DO I = 1, N + DY(IY) = DX(IX) + IX = IX + INCX + IY = IY + INCY + END DO + RETURN + +! Code for both increments equal to 1. + +! Clean-up loop so remaining vector length is a multiple of 7. + +20 M = MOD(N,7) + IF (M==0) GOTO 30 + DO I = 1, M + DY(I) = DX(I) + END DO + IF (N<7) RETURN +30 MP1 = M + 1 + DO I = MP1, N, 7 + DY(I) = DX(I) + DY(I+1) = DX(I+1) + DY(I+2) = DX(I+2) + DY(I+3) = DX(I+3) + DY(I+4) = DX(I+4) + DY(I+5) = DX(I+5) + DY(I+6) = DX(I+6) + END DO + RETURN + +! Code for equal, positive, non-unit increments. + +40 NS = N*INCX + DO I = 1, NS, INCX + DY(I) = DX(I) + END DO + RETURN + + END SUBROUTINE DCOPY_F90 +!_______________________________________________________________________ + + FUNCTION DDOT_F90(N,DX,INCX,DY,INCY) +! .. +! Compute the inner product of two vectors. +! .. +! Description of Parameters +! Input: +! N - number of elements in input vector(s) +! DX - real(wp) vector with N elements +! INCX - storage spacing between elements of DX +! DY - real(wp) vector with N elements +! INCY - storage spacing between elements of DY +! Output: +! DDOT_F90 - real(wp) dot product (zero if N <= 0) +! Returns the dot product of real(wp) DX and DY. +! DDOT_F90 = sum for I = 0 to N-1 of DX(LX+I*INCX) * DY(LY+I*INCY), +! where LX = 1 if INCX >= 0, else LX = 1+(1-N)*INCX, and LY is +! defined in a similar way using INCY. +! .. + IMPLICIT NONE +! .. +! .. Function Return Value .. + REAL (WP) :: DDOT_F90 +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: INCX, INCY, N +! .. +! .. Array Arguments .. + REAL (WP), INTENT (IN) :: DX(*), DY(*) +! .. +! .. Local Scalars .. + INTEGER :: I, IX, IY, M, MP1, NS +! .. +! .. Intrinsic Functions .. + INTRINSIC MOD +! .. +! .. FIRST EXECUTABLE STATEMENT DDOT_F90 +! .. + DDOT_F90 = ZERO + IF (N<=0) RETURN +! IF (INCX==INCY) IF (INCX-1) 10, 20, 40 + IF (INCX == INCY) THEN + IF (INCX < 1) THEN + GOTO 10 + ELSEIF (INCX == 1) THEN + GOTO 20 + ELSE + GOTO 40 + END IF + END IF + +! Code for unequal or nonpositive increments. + +10 IX = 1 + IY = 1 + IF (INCX<0) IX = (-N+1)*INCX + 1 + IF (INCY<0) IY = (-N+1)*INCY + 1 + DO I = 1, N + DDOT_F90 = DDOT_F90 + DX(IX)*DY(IY) + IX = IX + INCX + IY = IY + INCY + END DO + RETURN + +! Code for both increments equal to 1. + +! Clean-up loop so remaining vector length is a multiple of 5. + +20 M = MOD(N,5) + IF (M==0) GOTO 30 + DO I = 1, M + DDOT_F90 = DDOT_F90 + DX(I)*DY(I) + END DO + IF (N<5) RETURN +30 MP1 = M + 1 + DO I = MP1, N, 5 + DDOT_F90 = DDOT_F90 + DX(I)*DY(I) + DX(I+1)*DY(I+1) + & + DX(I+2)*DY(I+2) + DX(I+3)*DY(I+3) + DX(I+4)*DY(I+4) + END DO + RETURN + +! Code for equal, positive, non-unit increments. + +40 NS = N*INCX + DO I = 1, NS, INCX + DDOT_F90 = DDOT_F90 + DX(I)*DY(I) + END DO + RETURN + + END FUNCTION DDOT_F90 +!_______________________________________________________________________ + + SUBROUTINE DSCAL_F90(N,DA,DX,INCX) +! .. +! Multiply a vector by a constant. +! .. +! Description of Parameters +! Input: +! N - number of elements in input vector(s) +! DA - real(wp) scale factor +! DX - real(wp) vector with N elements +! INCX - storage spacing between elements of DX +! Output: +! DX - real(wp) result (unchanged if N <= 0) +! Replace real(wp) DX by real(wp) DA*DX. +! For I = 0 to N-1, replace DX(IX+I*INCX) with DA * DX(IX+I*INCX), +! where IX = 1 if INCX >= 0, else IX = 1+(1-N)*INCX. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP), INTENT (IN) :: DA + INTEGER, INTENT (IN) :: INCX, N +! .. +! .. Array Arguments .. + REAL (WP), INTENT (INOUT) :: DX(*) +! .. +! .. Local Scalars .. + INTEGER :: I, IX, M, MP1 +! .. +! .. Intrinsic Functions .. + INTRINSIC MOD +! .. +! .. FIRST EXECUTABLE STATEMENT DSCAL_F90 +! .. + IF (N<=0) RETURN + IF (INCX==1) GOTO 10 + +! Code for increment not equal to 1. + + IX = 1 + IF (INCX<0) IX = (-N+1)*INCX + 1 + DO I = 1, N + DX(IX) = DA*DX(IX) + IX = IX + INCX + END DO + RETURN + +! Code for increment equal to 1. + +! Clean-up loop so remaining vector length is a multiple of 5. + +10 M = MOD(N,5) + IF (M==0) GOTO 20 + DX(1:M) = DA*DX(1:M) + IF (N<5) RETURN +20 MP1 = M + 1 + DO I = MP1, N, 5 + DX(I) = DA*DX(I) + DX(I+1) = DA*DX(I+1) + DX(I+2) = DA*DX(I+2) + DX(I+3) = DA*DX(I+3) + DX(I+4) = DA*DX(I+4) + END DO + RETURN + + END SUBROUTINE DSCAL_F90 +!_______________________________________________________________________ + + FUNCTION IDAMAX_F90(N,DX,INCX) +! .. +! Find the smallest index of that component of a vector +! having the maximum magnitude. +! .. +! Description of Parameters +! Input: +! N - number of elements in input vector(s) +! DX - real(wp) vector with N elements +! INCX - storage spacing between elements of DX +! Output: +! IDAMAX_F90 - smallest index (zero if N <= 0) +! Find smallest index of maximum magnitude of real(wp) DX. +! IDAMAX_F90 = first I, I = 1 to N, to maximize +! ABS(DX(IX+(I-1)*INCX)), where IX = 1 if INCX >= 0, +! else IX = 1+(1-N)*INCX. +! .. + IMPLICIT NONE +! .. +! .. Function Return Value .. + INTEGER :: IDAMAX_F90 +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: INCX, N +! .. +! .. Array Arguments .. + REAL (WP), INTENT (IN) :: DX(*) +! .. +! .. Local Scalars .. + REAL (WP) :: DMAX, XMAG + INTEGER :: I, IX +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS +! .. +! .. FIRST EXECUTABLE STATEMENT IDAMAX_F90 +! .. + IDAMAX_F90 = 0 + IF (N<=0) RETURN + IDAMAX_F90 = 1 + IF (N==1) RETURN + + IF (INCX==1) GOTO 10 + +! Code for increments not equal to 1. + + IX = 1 + IF (INCX<0) IX = (-N+1)*INCX + 1 + DMAX = ABS(DX(IX)) + IX = IX + INCX + DO I = 2, N + XMAG = ABS(DX(IX)) + IF (XMAG>DMAX) THEN + IDAMAX_F90 = I + DMAX = XMAG + END IF + IX = IX + INCX + END DO + RETURN + +! Code for increments equal to 1. + +10 DMAX = ABS(DX(1)) + DO I = 2, N + XMAG = ABS(DX(I)) + IF (XMAG>DMAX) THEN + IDAMAX_F90 = I + DMAX = XMAG + END IF + END DO + RETURN + + END FUNCTION IDAMAX_F90 +! End of LINPACK and BLAS subroutines +!_______________________________________________________________________ + +! Beginning of MA28 subroutines +! THIS IS A FORTRAN 90 TRANSLATION OF HSL'S F77 MA28. IT IS INTENDED +! ONLY FOR USE IN CONJUNCTION WITH THE ODE SOLVER DVODE_F90 AND IS +! NOT FUNCTIONAL IN A STANDALONE MANNER. PLEASE NOTE THAT MA28 IS NOT +! PUBLIC DOMAIN SOFTWARE BUT HAS BEEN MADE AVAILABLE TO THE NUMERICAL +! ANALYSIS COMMUNITY BY HARWELL. FOR OTHER USE PLEASE CONTACT HARWELL +! AT HTTP://WWW.HSL-LIBRARY.COM OR CONTACT HSL@HYPROTECH.COM IF YOU +! WISH TO SOLVE GENERAL SPARSE LINEAR SYSTEMS. IF YOU FIND A BUG OR +! ENCOUNTER A PROBLEM WITH THE USE OF MA28 WITH DVODE.F90, PLEASE +! CONTACT ONE OF THE AUTHORS OF DVODE_F90: +! G.D. Byrne (gbyrne@wi.rr.com) +! S. Thompson, thompson@radford.edu +! NUMEROUS CHANGES WERE MADE IN CONNECTION WITH DVODE_F90 USAGE. THESE +! INCLUDE USING METCALF'S CONVERTER TO TRANSLATE THE ORIGINAL f77 CODE +! TO F90, MOVING INITIALIZATIONS TO THE DVODE_F90 PRIVATE SECTION, +! ELIMINATION OF HOLLERITHS IN FORMAT STATEMENTS, ELIMINATION OF +! BLOCKDATA, CHANGES IN ARITHMETICAL OPERATOR SYNTAX, AND CONVERSION +! TO UPPER CASE. THIS VERSION OF MA28 IS INTENDED ONLY FOR USE WITH +! DVODE_F90. PLEASE DO NOT MODIFY IT FOR ANY OTHER PURPOSE. IF YOU +! HAVE LICENSED ACCESS TO THE HSL LIBRARY, AN ALTERNATE VERSION OF +! DVODE_F90 BASED ON THE SUCCESSOR TO MA28, MA48, IS AVAILABLE FROM +! THE AUTHORS. PLEASE NOTE THAT THE ALTERNATE VERSION OF DVODE_F90 +! IS NOT SELF CONTAINED SINCE MA48 IS NOT DISTRIBUTED WITH DVODE_F90. +!****************************************************************** +! *****MA28 COPYRIGHT NOTICE***** +! COPYRIGHT (C) 2001 COUNCIL FOR THE CENTRAL LABORATORY +! OF THE RESEARCH COUNCILS +! ALL RIGHTS RESERVED. + +! NONE OF THE COMMENTS IN THIS COPYRIGHT NOTICE BETWEEN THE LINES +! OF ASTERISKS SHALL BE REMOVED OR ALTERED IN ANY WAY. + +! THIS PACKAGE IS INTENDED FOR COMPILATION WITHOUT MODIFICATION, +! SO MOST OF THE EMBEDDED COMMENTS HAVE BEEN REMOVED. + +! ALL USE IS SUBJECT TO LICENCE. IF YOU NEED FURTHER CLARIFICATION, +! PLEASE SEE HTTP://WWW.HSL-LIBRARY.COM OR CONTACT HSL@HYPROTECH.COM + +! PLEASE NOTE THAT: + +! 1. THE PACKAGES MAY ONLY BE USED FOR THE PURPOSES SPECIFIED IN THE +! LICENCE AGREEMENT AND MUST NOT BE COPIED BY THE LICENSEE FOR +! USE BY ANY OTHER PERSONS. USE OF THE PACKAGES IN ANY COMMERCIAL +! APPLICATION SHALL BE SUBJECT TO PRIOR WRITTEN AGREEMENT BETWEEN +! HYPROTECH UK LIMITED AND THE LICENSEE ON SUITABLE TERMS AND +! CONDITIONS, WHICH WILL INCLUDE FINANCIAL CONDITIONS. +! 2. ALL INFORMATION ON THE PACKAGE IS PROVIDED TO THE LICENSEE ON +! THE UNDERSTANDING THAT THE DETAILS THEREOF ARE CONFIDENTIAL. +! 3. ALL PUBLICATIONS ISSUED BY THE LICENSEE THAT INCLUDE RESULTS +! OBTAINED WITH THE HELP OF ONE OR MORE OF THE PACKAGES SHALL +! ACKNOWLEDGE THE USE OF THE PACKAGES. THE LICENSEE WILL NOTIFY +! HSL@HYPROTECH.COM OR HYPROTECH UK LIMITED OF ANY SUCH PUBLICATION. +! 4. THE PACKAGES MAY BE MODIFIED BY OR ON BEHALF OF THE LICENSEE +! FOR SUCH USE IN RESEARCH APPLICATIONS BUT AT NO TIME SHALL SUCH +! PACKAGES OR MODIFICATIONS THEREOF BECOME THE PROPERTY OF THE +! LICENSEE. THE LICENSEE SHALL MAKE AVAILABLE FREE OF CHARGE TO THE +! COPYRIGHT HOLDER FOR ANY PURPOSE ALL INFORMATION RELATING TO +! ANY MODIFICATION. +! 5. NEITHER COUNCIL FOR THE CENTRAL LABORATORY OF THE RESEARCH +! COUNCILS NOR HYPROTECH UK LIMITED SHALL BE LIABLE FOR ANY +! DIRECT OR CONSEQUENTIAL LOSS OR DAMAGE WHATSOEVER ARISING OUT OF +! THE USE OF PACKAGES BY THE LICENSEE. +!****************************************************************** + + SUBROUTINE MA28AD(N,NZ,A,LICN,IRN,LIRN,ICN,U,IKEEP,IW,W,IFLAG) +! .. +! This subroutine performs the LU factorization of A. +! .. +! The parameters are as follows: +! N Order of matrix. Not altered by subroutine. +! NZ Number of non-zeros in input matrix. Not altered by subroutine. +! A Array of length LICN. Holds non-zeros of matrix +! on entry and non-zeros of factors on exit. Reordered by +! MA20AD and MC23AD and altered by MA30AD. +! LICN Length of arrays A and ICN. Not altered by subroutine. +! IRN Array of length LIRN. Holds row indices on input. +! Used as workspace by MA30AD to hold column orientation of +! matrix. +! LIRN Length of array IRN. Not altered by the subroutine. +! ICN Array of length LICN. Holds column indices on entry +! and column indices of decomposed matrix on exit. Reordered +! by MA20AD and MC23AD and altered by MA30AD. +! U Variable set by user to control bias towards numeric or +! sparsity pivoting. U = 1.0 gives partial pivoting +! while U = 0. does not check multipliers at all. Values of U +! greater than one are treated as one while negative values +! are treated as zero. Not altered by subroutine. +! IKEEP Array of length 5*N used as workspace by MA28AD. +! (See later comments.) It is not required to be set on entry +! and, on exit, it contains information about the decomposition. +! It should be preserved between this call and subsequent calls +! to MA28BD or MA30CD. +! IKEEP(I,1),I = 1,N holds the total length of the part of row +! I in the diagonal block. +! Row IKEEP(I,2),I = 1,N of the input matrix is the Ith row in +! pivot order. +! Column IKEEP(I,3),I = 1,N of the input matrix is the Ith +! Column in pivot order. +! IKEEP(I,4),I = 1,N holds the length of the part of row I in +! the L part of the LU decomposition. +! IKEEP(I,5),I = 1,N holds the length of the part of row I in +! the off-diagonal blocks. If there is only one diagonal block, +! IKEEP(1,5) will be set to -1. +! IW Array of length 8*N. If the option NSRCH <= N is used, then +! the length of array IW can be reduced to 7*N. +! W Array length N. Used by MC24AD both as workspace and to return +! growth estimate in W(1). The use of this array by MA28AD is +! thus optional depending on logical variable GROW. +! IFLAG Variable used as error flag by subroutine. A positive or +! zero value on exit indicates success. Possible negative +! values are -1 through -14. + +! Private Variable Information. +! LP, MP Default value 6 (line printer). Unit number for error +! messages and duplicate element warning, respectively. +! NLP, MLP INTEGER. Unit number for messages from MA30AD and +! MC23AD. Set by MA28AD to the value of LP. +! LBLOCK Logical variable with default value .TRUE. If .TRUE., +! MC23AD is used to first permute the matrix to block lower +! triangular form. +! GROW Logical variable with default value .TRUE. If .TRUE., then +! an estimate of the increase in size of matrix elements during +! LU decomposition is given by MC24AD. +! EPS, RMIN, RESID. Variables not referenced by MA28AD. +! IRNCP, ICNCP INTEGER. Set to number of compresses on arrays IRN +! and ICN/A, respectively. +! MINIRN, MINICN INTEGER. Minimum length of arrays IRN and ICN/A, +! respectively, for success on future runs. +! IRANK INTEGER. Estimated rank of matrix. +! MIRNCP, MICNCP, MIRANK, MIRN, MICN INTEGER. Variables used to +! communicate between MA30FD and MA28FD values of +! abovenamed variables with somewhat similar names. +! ABORT1, ABORT2 LOGICAL. Variables with default value .TRUE. +! If .FALSE., then decomposition will be performed even +! if the matrix is structurally or numerically singular, +! respectively. +! ABORTA, ABORTB LOGICAL. Variables used to communicate values +! of ABORT1 and ABORT2 to MA30AD. +! ABORT Logical variable used to communicate value of ABORT1 +! to MC23AD. +! ABORT3 Logical variable. Not referenced by MA28AD. +! IDISP Array of length 2. Used to communicate information +! on decomposition between this call to MA28AD and subsequent +! calls to MA28BD and MA30CD. On exit, IDISP(1) and +! IDISP(2) indicate position in arrays A and ICN of the +! first and last elements in the LU decomposition of the +! diagonal blocks, respectively. +! NUMNZ Structural rank of matrix. +! NUM Number of diagonal blocks. +! LARGE Size of largest diagonal block. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP) :: U + INTEGER :: IFLAG, LICN, LIRN, N, NZ +! .. +! .. Array Arguments .. + REAL (WP) :: A(LICN), W(N) + INTEGER :: ICN(LICN), IKEEP(N,5), IRN(LIRN), IW(N,8) +! .. +! .. Local Scalars .. + REAL (WP) :: UPRIV + INTEGER :: I, I1, IEND, II, J, J1, J2, JAY, JJ, KNUM, LENGTH, MOVE, & + NEWJ1, NEWPOS + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MAX +! .. +! .. FIRST EXECUTABLE STATEMENT MA28AD +! .. +! Check that this call was made from DVODE_F90 and, if not, stop. + IF (.NOT.OK_TO_CALL_MA28) THEN + MSG = 'This version of MA28 may be used only in conjunction with DVODE_F90.' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = 'Please refer to the following HSL copyright notice for MA28.' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = '****************************************************************** ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' *****MA28 COPYRIGHT NOTICE***** ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' COPYRIGHT (C) 2001 COUNCIL FOR THE CENTRAL LABORATORY ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' OF THE RESEARCH COUNCILS ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' ALL RIGHTS RESERVED. ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' NONE OF THE COMMENTS IN THIS COPYRIGHT NOTICE BETWEEN THE LINES ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' OF ASTERISKS SHALL BE REMOVED OR ALTERED IN ANY WAY. ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' THIS PACKAGE IS INTENDED FOR COMPILATION WITHOUT MODIFICATION, ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' SO MOST OF THE EMBEDDED COMMENTS HAVE BEEN REMOVED. ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' ALL USE IS SUBJECT TO LICENCE. IF YOU NEED FURTHER CLARIFICATION, ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' PLEASE SEE HTTP://WWW.HSL-LIBRARY.COM OR CONTACT HSL@HYPROTECH.COM ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' PLEASE NOTE THAT: ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' 1. THE PACKAGES MAY ONLY BE USED FOR THE PURPOSES SPECIFIED IN THE ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' LICENCE AGREEMENT AND MUST NOT BE COPIED BY THE LICENSEE FOR ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' USE BY ANY OTHER PERSONS. USE OF THE PACKAGES IN ANY COMMERCIAL ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' APPLICATION SHALL BE SUBJECT TO PRIOR WRITTEN AGREEMENT BETWEEN ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' HYPROTECH UK LIMITED AND THE LICENSEE ON SUITABLE TERMS AND ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' CONDITIONS, WHICH WILL INCLUDE FINANCIAL CONDITIONS. ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' 2. ALL INFORMATION ON THE PACKAGE IS PROVIDED TO THE LICENSEE ON ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' THE UNDERSTANDING THAT THE DETAILS THEREOF ARE CONFIDENTIAL. ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' 3. ALL PUBLICATIONS ISSUED BY THE LICENSEE THAT INCLUDE RESULTS ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' OBTAINED WITH THE HELP OF ONE OR MORE OF THE PACKAGES SHALL ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' ACKNOWLEDGE THE USE OF THE PACKAGES. THE LICENSEE WILL NOTIFY ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' HSL@HYPROTECH.COM OR HYPROTECH UK LIMITED OF ANY SUCH PUBLICATION. ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' 4. THE PACKAGES MAY BE MODIFIED BY OR ON BEHALF OF THE LICENSEE ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' FOR SUCH USE IN RESEARCH APPLICATIONS BUT AT NO TIME SHALL SUCH ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' PACKAGES OR MODIFICATIONS THEREOF BECOME THE PROPERTY OF THE ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' LICENSEE. THE LICENSEE SHALL MAKE AVAILABLE FREE OF CHARGE TO THE ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' COPYRIGHT HOLDER FOR ANY PURPOSE ALL INFORMATION RELATING TO ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' ANY MODIFICATION. ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' 5. NEITHER COUNCIL FOR THE CENTRAL LABORATORY OF THE RESEARCH ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' COUNCILS NOR HYPROTECH UK LIMITED SHALL BE LIABLE FOR ANY ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' DIRECT OR CONSEQUENTIAL LOSS OR DAMAGE WHATSOEVER ARISING OUT OF ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = ' THE USE OF PACKAGES BY THE LICENSEE. ' + CALL XERRDV(MSG,1770,1,0,0,0,0,ZERO,ZERO) + MSG = '****************************************************************** ' + CALL XERRDV(MSG,1770,2,0,0,0,0,ZERO,ZERO) + END IF + +! Some initialization and transfer of information between +! common blocks (see earlier comments). + IFLAG = 0 + ABORTA = ABORT1 + ABORTB = ABORT2 + ABORT = ABORT1 + MLP = LP + NLP = LP + TOL1 = TOL + LBIG1 = LBIG + NSRCH1 = NSRCH +! UPRIV private copy of U is used in case it is outside +! range zero to one and is thus altered by MA30AD. + UPRIV = U +! Simple data check on input variables and array dimensions. + IF (N>0) GOTO 10 + IFLAG = -8 + IF (LP/=0) WRITE (LP,90000) N + GOTO 170 +10 IF (NZ>0) GOTO 20 + IFLAG = -9 + IF (LP/=0) WRITE (LP,90001) NZ + GOTO 170 +20 IF (LICN>=NZ) GOTO 30 + IFLAG = -10 + IF (LP/=0) WRITE (LP,90002) LICN + GOTO 170 +30 IF (LIRN>=NZ) GOTO 40 + IFLAG = -11 + IF (LP/=0) WRITE (LP,90003) LIRN + GOTO 170 + +! Data check to see if all indices lie between 1 and N. +40 DO 50 I = 1, NZ + IF (IRN(I)>0 .AND. IRN(I)<=N .AND. ICN(I)>0 .AND. ICN(I)<=N) & + GOTO 50 + IF (IFLAG==0 .AND. LP/=0) WRITE (LP,90004) + IFLAG = -12 + IF (LP/=0) WRITE (LP,90005) I, A(I), IRN(I), ICN(I) +50 END DO + IF (IFLAG<0) GOTO 180 + +! Sort matrix into row order. + CALL MC20AD(N,NZ,A,ICN,IW(1,1),IRN,0) +! Part of IKEEP is used here as a work-array. IKEEP(I,2) is +! the last row to have a non-zero in column I. IKEEP(I,3) +! is the off-set of column I from the start of the row. + IKEEP(1:N,2) = 0 + IKEEP(1:N,1) = 0 + +! Check for duplicate elements summing any such entries +! and printing a warning message on unit MP. +! MOVE is equal to the number of duplicate elements found. + MOVE = 0 +! The loop also calculates the largest element in the matrix, +! THEMAX. + THEMAX = ZERO +! J1 is position in arrays of first non-zero in row. + J1 = IW(1,1) + DO 90 I = 1, N + IEND = NZ + 1 + IF (I/=N) IEND = IW(I+1,1) + LENGTH = IEND - J1 + IF (LENGTH==0) GOTO 90 + J2 = IEND - 1 + NEWJ1 = J1 - MOVE + DO 80 JJ = J1, J2 + J = ICN(JJ) + THEMAX = MAX(THEMAX,ABS(A(JJ))) + IF (IKEEP(J,2)==I) GOTO 70 +! First time column has ocurred in current row. + IKEEP(J,2) = I + IKEEP(J,3) = JJ - MOVE - NEWJ1 + IF (MOVE==0) GOTO 80 +! Shift necessary because of previous duplicate element. + NEWPOS = JJ - MOVE + A(NEWPOS) = A(JJ) + ICN(NEWPOS) = ICN(JJ) + GOTO 80 +! Duplicate element. +70 MOVE = MOVE + 1 + LENGTH = LENGTH - 1 + JAY = IKEEP(J,3) + NEWJ1 + IF (MP/=0) WRITE (MP,90006) I, J, A(JJ) + A(JAY) = A(JAY) + A(JJ) + THEMAX = MAX(THEMAX,ABS(A(JAY))) +80 END DO + IKEEP(I,1) = LENGTH + J1 = IEND +90 END DO + +! KNUM is actual number of non-zeros in matrix with any multiple +! entries counted only once. + KNUM = NZ - MOVE + IF (.NOT.LBLOCK) GOTO 100 + +! Perform block triangularisation. + CALL MC23AD(N,ICN,A,LICN,IKEEP(1,1),IDISP,IKEEP(1,2),IKEEP(1,3), & + IKEEP(1,5),IW(1,3),IW) + IF (IDISP(1)>0) GOTO 130 + IFLAG = -7 + IF (IDISP(1)==-1) IFLAG = -1 + IF (LP/=0) WRITE (LP,90007) + GOTO 170 + +! Block triangularization not requested. +! Move structure to end of data arrays in preparation for MA30AD. +! Also set LENOFF(1) to -1 and set permutation arrays. +100 DO I = 1, KNUM + II = KNUM - I + 1 + NEWPOS = LICN - I + 1 + ICN(NEWPOS) = ICN(II) + A(NEWPOS) = A(II) + END DO + IDISP(1) = 1 + IDISP(2) = LICN - KNUM + 1 + DO I = 1, N + IKEEP(I,2) = I + IKEEP(I,3) = I + END DO + IKEEP(1,5) = -1 +130 IF (LBIG) BIG1 = THEMAX + IF (NSRCH<=N) GOTO 140 + +! Perform LU decomposition on diagonal blocks. + CALL MA30AD(N,ICN,A,LICN,IKEEP(1,1),IKEEP(1,4),IDISP,IKEEP(1,2), & + IKEEP(1,3),IRN,LIRN,IW(1,2),IW(1,3),IW(1,4),IW(1,5),IW(1,6),IW(1,7), & + IW(1,8),IW,UPRIV,IFLAG) + GOTO 150 +! This call if used if NSRCH has been set less than or equal to N. +! In this case, two integer work arrays of length can be saved. +140 CALL MA30AD(N,ICN,A,LICN,IKEEP(1,1),IKEEP(1,4),IDISP,IKEEP(1,2), & + IKEEP(1,3),IRN,LIRN,IW(1,2),IW(1,3),IW(1,4),IW(1,5),IW,IW,IW(1,6),IW,& + UPRIV,IFLAG) + +! Transfer private variable information. +!150 MINIRN = MAX(MIRN,NZ) +! MINICN = MAX(MICN,NZ) +150 MINIRN = MAX(MINIRN,NZ) + MINICN = MAX(MINICN,NZ) +! IRNCP = MIRNCP +! ICNCP = MICNCP +! IRANK = MIRANK +! NDROP = NDROP1 + IF (LBIG) BIG = BIG1 + IF (IFLAG>=0) GOTO 160 + IF (LP/=0) WRITE (LP,90008) + GOTO 170 + +! Reorder off-diagonal blocks according to pivot permutation. +160 I1 = IDISP(1) - 1 + IF (I1/=0) CALL MC22AD(N,ICN,A,I1,IKEEP(1,5),IKEEP(1,2), & + IKEEP(1,3),IW,IRN) + I1 = IDISP(1) + IEND = LICN - I1 + 1 + +! Optionally calculate element growth estimate. + IF (GROW) CALL MC24AD(N,ICN,A(I1),IEND,IKEEP,IKEEP(1,4),W) +! Increment growth estimate by original maximum element. + IF (GROW) W(1) = W(1) + THEMAX + IF (GROW .AND. N>1) W(2) = THEMAX +! Set flag if the only error is due to duplicate elements. + IF (IFLAG>=0 .AND. MOVE/=0) IFLAG = -14 + GOTO 180 +170 IF (LP/=0) WRITE (LP,90009) +180 RETURN +90000 FORMAT (' N is out of range = ',I10) +90001 FORMAT (' NZ is non positive = ',I10) +90002 FORMAT (' LICN is too small = ',I10) +90003 FORMAT (' LIRN is too small = ',I10) +90004 FORMAT (' Error return from MA28AD because indices found out', & + ' of range') +90005 FORMAT (1X,I6,'The element with value ',1P,D22.14, & + ' is out of range with indices ',I8,',',I8) +90006 FORMAT (' Duplicate element in position ',I8,',',I8,' with value ',1P, & + D22.14) +90007 FORMAT (' Error return from MC23AD') +90008 FORMAT (' Error return from MA30AD') +90009 FORMAT (' Error return from MA28AD') + + END SUBROUTINE MA28AD +!_______________________________________________________________________ + + SUBROUTINE MA28BD(N,NZ,A,LICN,IVECT,JVECT,ICN,IKEEP,IW,W,IFLAG) +! .. +! This subroutine factorizes a matrix of a similar sparsity +! pattern to that previously factorized by MA28AD. +! .. +! The parameters are as follows: +! N Order of matrix. Not altered by subroutine. +! NZ Number of non-zeros in input matrix. Not +! altered by subroutine. +! A Array of length LICN. Holds non-zeros of +! matrix on entry and non-zeros of factors on exit. +! Reordered by MA28DD and altered by MA30BD. +! LICN Length of arrays A and ICN. Not altered by +! subroutine. +! IVECT, JVECT Arrays of length NZ. Hold row and column +! indices of non-zeros, respectively. Not altered +! by subroutine. +! ICN Array of length LICN. Same array as output +! from MA28AD. Unchanged by MA28BD. +! IKEEP Array of length 5*N. Same array as output +! from MA28AD. Unchanged by MA28BD. +! IW Array length 5*N. Used as workspace by +! MA28DD and MA30BD. +! W Array of length N. Used as workspace by +! MA28DD, MA30BD and (optionally) MC24AD. +! IFLAG Integer used as error flag, with positive +! or zero value indicating success. + +! Private Variable Information. +! Unless otherwise stated private variables are as in MA28AD. +! Those variables referenced by MA28BD are mentioned below. +! LP, MP Integers used as in MA28AD as unit number for error +! and warning messages, respectively. +! NLP Integer variable used to give value of LP to MA30ED. +! EPS MA30BD will output a positive value +! for IFLAG if any modulus of the ratio of pivot element +! to the largest element in its row (U part only) is less +! than EPS (unless EPS is greater than 1.0 when no action +! takes place). +! RMIN Variable equal to the value of this minimum ratio in +! cases where EPS is less than or equal to 1.0. +! MEPS,MRMIN Variables used by the subroutine to communicate between +! MA28FD and MA30GD. +! IDISP Integer array of length 2. The same as that used by +! MA28AD. Unchanged by MA28BD. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: IFLAG, LICN, N, NZ +! .. +! .. Array Arguments .. + REAL (WP) :: A(LICN), W(N) + INTEGER :: ICN(LICN), IKEEP(N,5), IVECT(NZ), IW(N,5), JVECT(NZ) +! .. +! .. Local Scalars .. + INTEGER :: I1, IDUP, IEND +! .. +! .. FIRST EXECUTABLE STATEMENT MA28BD +! .. +! Check to see if elements were dropped in previous MA28AD call. + IF (NDROP==0) GOTO 10 + IFLAG = -15 + IF (LP/=0) WRITE (LP,90000) IFLAG, NDROP + GOTO 70 +10 IFLAG = 0 + MEPS = EPS + NLP = LP +! Simple data check on variables. + IF (N>0) GOTO 20 + IFLAG = -11 + IF (LP/=0) WRITE (LP,90001) N + GOTO 60 +20 IF (NZ>0) GOTO 30 + IFLAG = -10 + IF (LP/=0) WRITE (LP,90002) NZ + GOTO 60 +30 IF (LICN>=NZ) GOTO 40 + IFLAG = -9 + IF (LP/=0) WRITE (LP,90003) LICN + GOTO 60 + +40 CALL MA28DD(N,A,LICN,IVECT,JVECT,NZ,ICN,IKEEP(1,1),IKEEP(1,4), & + IKEEP(1,5),IKEEP(1,2),IKEEP(1,3),IW(1,3),IW,W(1),IFLAG) +! THEMAX is largest element in matrix. + THEMAX = W(1) + IF (LBIG) BIG1 = THEMAX +! IDUP equals one if there were duplicate elements, zero otherwise. + IDUP = 0 + IF (IFLAG==(N+1)) IDUP = 1 + IF (IFLAG<0) GOTO 60 + +! Perform row Gauss elimination on the structure received from MA28DD. + CALL MA30BD(N,ICN,A,LICN,IKEEP(1,1),IKEEP(1,4),IDISP,IKEEP(1,2), & + IKEEP(1,3),W,IW,IFLAG) + +! Transfer private variable information. + IF (LBIG) BIG1 = BIG +! RMIN = MRMIN + IF (IFLAG>=0) GOTO 50 + IFLAG = -2 + IF (LP/=0) WRITE (LP,90004) + GOTO 60 + +! Optionally calculate the growth parameter. +50 I1 = IDISP(1) + IEND = LICN - I1 + 1 + IF (GROW) CALL MC24AD(N,ICN,A(I1),IEND,IKEEP,IKEEP(1,4),W) +! Increment estimate by largest element in input matrix. + IF (GROW) W(1) = W(1) + THEMAX + IF (GROW .AND. N>1) W(2) = THEMAX +! Set flag if the only error is due to duplicate elements. + IF (IDUP==1 .AND. IFLAG>=0) IFLAG = -14 + GOTO 70 +60 IF (LP/=0) WRITE (LP,90005) +70 RETURN +90000 FORMAT (' Error return from MA28BD with IFLAG = ',I4/I7, & + ' Entries dropped from structure by MA28AD') +90001 FORMAT (' N is out of range = ',I10) +90002 FORMAT (' NZ is non positive = ',I10) +90003 FORMAT (' LICN is too small = ',I10) +90004 FORMAT (' Error return from MA30BD') +90005 FORMAT (' + Error return from MA28BD') + + END SUBROUTINE MA28BD +!_______________________________________________________________________ + + SUBROUTINE MA28DD(N,A,LICN,IVECT,JVECT,NZ,ICN,LENR,LENRL,LENOFF, & + IP,IQ,IW1,IW,W1,IFLAG) +! .. +! This subroutine need never be called by the user directly. It sorts +! the user's matrix into the structure of the decomposed form and checks +! for the presence of duplicate entries or non-zeros lying outside the +! sparsity pattern of the decomposition. It also calculates the largest +! element in the input matrix. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP) :: W1 + INTEGER :: IFLAG, LICN, N, NZ +! .. +! .. Array Arguments .. + REAL (WP) :: A(LICN) + INTEGER :: ICN(LICN), IP(N), IQ(N), IVECT(NZ), IW(N,2), IW1(N,3), & + JVECT(NZ), LENOFF(N), LENR(N), LENRL(N) +! .. +! .. Local Scalars .. + REAL (WP) :: AA + INTEGER :: I, IBLOCK, IDISP2, IDUMMY, II, INEW, IOLD, J1, J2, JCOMP, & + JDUMMY, JJ, JNEW, JOLD, MIDPT + LOGICAL :: BLOCKL +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, IABS, MAX +! .. +! .. FIRST EXECUTABLE STATEMENT MA28DD +! .. + BLOCKL = LENOFF(1) >= 0 +! IW1(I,3) is set to the block in which row I lies and the +! inverse permutations to IP and IQ are set in IW1(:,1) and +! IW1(:,2), respectively. +! Pointers to beginning of the part of row I in diagonal and +! off-diagonal blocks are set in IW(I,2) and IW(I,1), +! respectively. + IBLOCK = 1 + IW(1,1) = 1 + IW(1,2) = IDISP(1) + DO 10 I = 1, N + IW1(I,3) = IBLOCK + IF (IP(I)<0) IBLOCK = IBLOCK + 1 + II = IABS(IP(I)+0) +! II = IABS(IP(I)) + IW1(II,1) = I + JJ = IQ(I) + JJ = IABS(JJ) + IW1(JJ,2) = I + IF (I==1) GOTO 10 + IF (BLOCKL) IW(I,1) = IW(I-1,1) + LENOFF(I-1) + IW(I,2) = IW(I-1,2) + LENR(I-1) +10 END DO +! Place each non-zero in turn into its correct location in +! the A/ICN array. + IDISP2 = IDISP(2) + DO 170 I = 1, NZ +! Necessary to avoid reference to unassigned element of ICN. + IF (I>IDISP2) GOTO 20 + IF (ICN(I)<0) GOTO 170 +20 IOLD = IVECT(I) + JOLD = JVECT(I) + AA = A(I) +! This is a dummy loop for following a chain of interchanges. +! It will be executed NZ TIMES in total. + DO IDUMMY = 1, NZ +! Perform some validity checks on IOLD and JOLD. + IF (IOLD<=N .AND. IOLD>0 .AND. JOLD<=N .AND. JOLD>0) GOTO 30 + IF (LP/=0) WRITE (LP,90000) I, A(I), IOLD, JOLD + IFLAG = -12 + GOTO 180 +30 INEW = IW1(IOLD,1) + JNEW = IW1(JOLD,2) +! Are we in a valid block and is it diagonal or off-diagonal? +! IF (IW1(INEW,3)-IW1(JNEW,3)) 40, 60, 50 +!40 IFLAG = -13 + IF (IW1(INEW,3)-IW1(JNEW,3) == 0) GOTO 60 + IF (IW1(INEW,3)-IW1(JNEW,3) > 0) GOTO 50 + IFLAG = -13 + IF (LP/=0) WRITE (LP,90001) IOLD, JOLD + GOTO 180 +50 J1 = IW(INEW,1) + J2 = J1 + LENOFF(INEW) - 1 + GOTO 110 +! Element is in diagonal block. +60 J1 = IW(INEW,2) + IF (INEW>JNEW) GOTO 70 + J2 = J1 + LENR(INEW) - 1 + J1 = J1 + LENRL(INEW) + GOTO 110 +70 J2 = J1 + LENRL(INEW) +! Binary search of ordered list. Element in L part of row. + DO 100 JDUMMY = 1, N + MIDPT = (J1+J2)/2 + JCOMP = IABS(ICN(MIDPT)+0) +! JCOMP = IABS(ICN(MIDPT)) +! IF (JNEW-JCOMP) 80, 130, 90 +!80 J2 = MIDPT + IF (JNEW-JCOMP == 0) GOTO 130 + IF (JNEW-JCOMP > 0) GOTO 90 + J2 = MIDPT + GOTO 100 +90 J1 = MIDPT +100 END DO + IFLAG = -13 + IF (LP/=0) WRITE (LP,90002) IOLD, JOLD + GOTO 180 +! Linear search. Element in L part of row or off-diagonal blocks. +110 DO MIDPT = J1, J2 + IF (IABS(ICN(MIDPT)+0)==JNEW) GOTO 130 +! IF (IABS(ICN(MIDPT))==JNEW) GOTO 130 + END DO + IFLAG = -13 + IF (LP/=0) WRITE (LP,90002) IOLD, JOLD + GOTO 180 +! Equivalent element of ICN is in position MIDPT. +130 IF (ICN(MIDPT)<0) GOTO 160 + IF (MIDPT>NZ .OR. MIDPT<=I) GOTO 150 + W1 = A(MIDPT) + A(MIDPT) = AA + AA = W1 + IOLD = IVECT(MIDPT) + JOLD = JVECT(MIDPT) + ICN(MIDPT) = -ICN(MIDPT) + END DO +150 A(MIDPT) = AA + ICN(MIDPT) = -ICN(MIDPT) + GOTO 170 +160 A(MIDPT) = A(MIDPT) + AA +! Set flag for duplicate elements. + iflag = n + 1 +170 END DO +! Reset ICN array and zero elements in LU but not in A. +! Also calculate the maximum element of A. +180 W1 = ZERO + DO 200 I = 1, IDISP2 + IF (ICN(I)<0) GOTO 190 + A(I) = ZERO + GOTO 200 +190 ICN(I) = -ICN(I) + W1 = MAX(W1,ABS(A(I))) +200 END DO + RETURN +90000 FORMAT (' Element ',I6,' with value ',D22.14,' has indices ',I8, & + ','/I8,' indices out of range') +90001 FORMAT (' Non-zero ',I7,',',I6,' in zero off-diagonal',' block') +90002 FORMAT (' Element ',I6,',',I6,' was not in LU pattern') + + END SUBROUTINE MA28DD +!_______________________________________________________________________ + + SUBROUTINE MA28CD(N,A,LICN,ICN,IKEEP,RHS,W,MTYPE) +! .. +! This subroutine uses the factors from MA28AD or MA28BD to solve a +! system of equations without iterative refinement. +! .. +! The parameters are: +! N Order of matrix. Not altered by subroutine. +! A array of length LICN. the same array as +! was used in the most recent call to MA28AD or MA28BD. +! LICN Length of arrays A and ICN. Not altered by subroutine. +! ICN Array of length LICN. Same array as output from +! MA28AD. Unchanged by MA30CD. +! IKEEP Array of length 5*N. Same array as output from +! MA28AD. Unchanged by MA30CD. +! RHS Array of length N. On entry, it holds the +! right hand side, on exit, the solution vector. +! W Array of length N. Used as workspace by MA30CD. +! MTYPE Integer used to tell MA30CD to solve the direct +! equation (MTYPE = 1) or its transpose (MTYPE /= 1). + +! Private Variable Information. +! Unless otherwise stated private variables are as in MA28AD. +! Those variables referenced by MA30CD are mentioned below. +! RESID Variable which returns maximum residual of +! equations where pivot was zero. +! MRESID Variable used by MA30CD to communicate between +! MA28FD and MA30HD. +! IDISP Integer array of length 2. The same as that used +! by MA28AD. Unchanged by MA28BD. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: LICN, MTYPE, N +! .. +! .. Array Arguments .. + REAL (WP) :: A(LICN), RHS(N), W(N) + INTEGER :: ICN(LICN), IKEEP(N,5) +! .. +! .. FIRST EXECUTABLE STATEMENT MA28CD +! .. +! This call performs the solution of the set of equations. + CALL MA30CD(N,ICN,A,LICN,IKEEP(1,1),IKEEP(1,4),IKEEP(1,5),IDISP, & + IKEEP(1,2),IKEEP(1,3),RHS,W,MTYPE) +! Transfer private variable information. + RESID = MRESID + RETURN + + END SUBROUTINE MA28CD +!_______________________________________________________________________ + + SUBROUTINE MA28ID(N,NZ,AORG,IRNORG,ICNORG,LICN,A,ICN,IKEEP,RHS, & + X,R,W,MTYPE,PREC,IFLAG) +! .. +! This subroutine uses the factors from an earlier call to MA28AD or +! MA28BD to solve the system of equations with iterative refinement. +! .. +! The parameters are: +! N Order of the matrix. Not altered by the subroutine. +! NZ Number of entries in the original matrix. Not altered by +! the subroutine. +! For this entry the original matrix must have been saved in +! AORG, IRNORG, ICNORG where entry AORG(K) is in row IRNORG(K) +! and column ICNORG(K),K = 1,...,NZ. Information about the +! factors of A is communicated to this subroutine via the +! parameters LICN, A, ICN and IKEEP where: +! AORG Array of length NZ. Not altered by MA28ID. +! IRNORG Array of length NZ. Not altered by MA28ID. +! ICNORG Array of length NZ. Not altered by MA28ID. +! LICN Length of arrays A and ICN. Not altered by the subroutine. +! A Array of length LICN. It must be unchanged since the +! last call to MA28AD or MA28BD. Not altered by the +! subroutine. +! ICN, IKEEP are the arrays (of lengths LICN and 5*N, respectively) +! of the same names as in the previous all to MA28AD. They +! should be unchanged since this earlier call and they are +! not altered by MA28ID. +! The other parameters are as follows: +! RHS Array of length N. The user must set RHS(I) to contain +! the value of the Ith component of the right hand side. +! Not altered by MA28ID. +! X Array of length N. If an initial guess of the solution +! is given (ISTART equal to 1), then the user must set X(I) +! to contain the value of the Ith component of the estimated +! solution. On exit, X(I) contains the Ith component of the +! solution vector. +! R Array of length N. It need not be set on entry. On exit, +! R(I) contains the Ith component of an estimate of the error +! if MAXIT is greater than 0. +! W Array of length N. Used as workspace by MA28ID. +! MTYPE Must be set to determine whether MA28ID will solve A*X = RHS +! (MTYPE = 1) or AT*X = RHS (MTYPE /= 1). Not altered by MA28ID. +! PREC Should be set by the user to the relative accuracy required. +! The iterative refinement will terminate if the magnitude of +! the largest component of the estimated error relative to the +! largest component in the solution is less than PREC. +! Not altered by MA28ID. +! IFLAG Diagnostic flag which will be set to zero on successful +! exit from MA28ID. Otherwise it will have a non-zero value. +! The non-zero value IFLAG can have on exit from MA28ID are +! -16 indicating that more than MAXIT iteartions are required. +! -17 indicating that more convergence was too slow. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP) :: PREC + INTEGER :: IFLAG, LICN, MTYPE, N, NZ +! .. +! .. Array Arguments .. + REAL (WP) :: A(LICN), AORG(NZ), R(N), RHS(N), W(N), X(N) + INTEGER :: ICN(LICN), ICNORG(NZ), IKEEP(N,5), IRNORG(NZ) +! .. +! .. Local Scalars .. + REAL (WP) :: CONVER, D, DD + INTEGER :: I, ITERAT, NCOL, NROW +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MAX +! .. +! .. FIRST EXECUTABLE STATEMENT MA28ID +! .. +! Initialization of NOITER, ERRMAX, and IFLAG. + NOITER = 0 + ERRMAX = ZERO + IFLAG = 0 + +! Jump if a starting vector has been supplied by the user. + + IF (ISTART==1) GOTO 20 + +! Make a copy of the right-hand side vector. + + X(1:N) = RHS(1:N) + +! Find the first solution. + + CALL MA28CD(N,A,LICN,ICN,IKEEP,X,W,MTYPE) + +! Stop the computations if MAXIT = 0. + +20 IF (MAXIT==0) GOTO 160 + +! Calculate the max-norm of the first solution. + + DD = 0.0 + DO I = 1, N + DD = MAX(DD,ABS(X(I))) + END DO + DXMAX = DD + +! Begin the iterative process. + + DO 120 ITERAT = 1, MAXIT + D = DD + +! Calculate the residual vector. + + R(1:N) = RHS(1:N) + IF (MTYPE==1) GOTO 60 + DO I = 1, NZ + NROW = IRNORG(I) + NCOL = ICNORG(I) + R(NCOL) = R(NCOL) - AORG(I)*X(NROW) + END DO + GOTO 80 +! MTYPE = 1. +60 DO I = 1, NZ + NROW = IRNORG(I) + NCOL = ICNORG(I) + R(NROW) = R(NROW) - AORG(I)*X(NCOL) + END DO +80 DRES = 0.0 + +! Find the max-norm of the residual vector. + + DO I = 1, N + DRES = MAX(DRES,ABS(R(I))) + END DO + +! Stop the calculations if the max-norm of +! the residual vector is zero. + +! IF (DRES == 0.0) GOTO 150 + IF (ABS(DRES)<=ZERO) GOTO 150 + +! Calculate the correction vector. + + NOITER = NOITER + 1 + CALL MA28CD(N,A,LICN,ICN,IKEEP,R,W,MTYPE) + +! Find the max-norm of the correction vector. + + DD = 0.0 + DO I = 1, N + DD = MAX(DD,ABS(R(I))) + END DO + +! Check the convergence. + + IF (DD>D*CGCE .AND. ITERAT>=2) GOTO 130 + IF (ABS((DXMAX*TEN+DD)-(DXMAX*TEN))<=ZERO) GOTO 140 + +! Attempt to improve the solution. + + DXMAX = 0.0 + DO I = 1, N + X(I) = X(I) + R(I) + DXMAX = MAX(DXMAX,ABS(X(I))) + END DO + +! Check the stopping criterion. + + IF (DD= 0 or IFLAG = -14) give the minimum size of IRN +! and A/ICN, respectively which would enable a successful run on +! an identical matrix. On an exit with IFLAG equal to -5, MINICN +! gives the minimum value of ICN for success on subsequent runs on +! an identical matrix. In the event of failure with IFLAG = -6,-4, +! -3,-2, OR -1, then MINICN and MINIRN give the minimum value of +! LICN and LIRN, respectively which would be required for a +! successful decomposition up to the point at which the failure +! occurred. +! IRANK Integer variable which gives an upper bound on the rank of +! the matrix. +! ABORT1 is a logical variable with default value .TRUE. If ABORT1 is +! set to .FALSE., then MA28AD will decompose structurally singular +! matrices (including rectangular ones). +! ABORT2 is a logical variable with default value .TRUE. If ABORT2 is +! set to .FALSE., then MA28AD will decompose numerically singular +! matrices. +! IDISP is an integer array of length 2. On output from MA28AD, The +! indices of the diagonal blocks of the factors lie in positions +! IDISP(1) to IDISP(2) of A/ICN. This array must be preserved +! between a call to MA28AD and subsequent calls to MA28BD, +! MA30CD or MA28ID. +! TOL If set to a positive value, then any non-zero whose modulus is +! less than TOL will be dropped from the factorization. The +! factorization will then require less storage but will be +! inaccurate. After a run of MA28AD with TOL positive it is not +! possible to use MA28BD and the user is recommended to use +! MA28ID to obtain the solution. The default value for TOL is 0.0. +! THEMAX On exit from MA28AD, THEMAX will hold the largest entry of +! the original matrix. +! BIG If LBIG has been set to .TRUE., BIG will hold the largest entry +! encountered during the factorization by MA28AD or MA28BD. +! DXMAX On exit from MA28ID, DXMAX will be set to the largest component +! of the solution. +! ERRMAX If MAXIT is positive, ERRMAX will be set to the largest +! component in the estimate of the error. +! DRES On exit from MA28ID, if MAXIT is positive, DRES will be set to +! the largest component of the residual. +! CGCE Used by MA28ID to check the convergence rate. If the ratio of +! successive corrections is not less than CGCE, then we terminate +! since the convergence rate is adjudged too slow. +! NDROP If TOL has been set positive on exit from MA28AD, NDROP will +! hold the number of entries dropped from the data structure. +! MAXIT Maximum number of iterations performed by MA28ID. Default = 16. +! NOITER Set by MA28ID to the number of iterative refinement iterations +! actually used. +! NSRCH If NSRCH is set to a value less than N, then a different pivot +! option will be employed by MA28AD. This may result in different +! fill-in and execution time for MA28AD. If NSRCH is less than or +! equal to N, the workspace array IW can be reduced in length. The +! default value for NSRCH is 32768. +! ISTART If ISTART is set to a value other than zero, then the user +! must supply an estimate of the solution to MA28ID. The default +! value for istart is zero. +! LBIG If LBIG is set to .TRUE., the value of the largest element +! encountered in the factorization by MA28AD or MA28BD is returned +! in BIG. Setting LBIG to .TRUE. will increase the time for MA28AD +! marginally and that for MA28BD by about 20%. The default value +! for LBIG is .FALSE. +!_______________________________________________________________________ + + SUBROUTINE MA30AD(NN,ICN,A,LICN,LENR,LENRL,IDISP,IP,IQ,IRN,LIRN, & + LENC,IFIRST,LASTR,NEXTR,LASTC,NEXTC,IPTR,IPC,U,IFLAG) +! .. +! If the user requires a more convenient data interface then the +! MA28 package should be used. The MA28 subroutines call the MA30 +! subroutines after checking the user's input data and optionally +! using MC23AD to permute the matrix to block triangular form. +! This package of subroutines (MA30AD, MA30BD, MA30CD, and MA30DD) +! performs operations pertinent to the solution of a general +! sparse N by N system of linear equations (i.e., solve +! AX = B). Structually singular matrices are permitted including +! those with row or columns consisting entirely of zeros (i.e., +! including rectangular matrices). It is assumed that the +! non-zeros of the matrix A do not differ widely in size. If +! necessary a prior call of the scaling subroutine MA19AD may be +! made. +! A discussion of the design of these subroutines is given by Duff +! and Reid (ACM TRANS MATH SOFTWARE 5 PP 18-35, 1979(CSS 48)) while +! fuller details of the implementation are given in duff (HARWELL +! REPORT AERE-R 8730, 1977). The additional pivoting option in +! ma30ad and the use of drop tolerances (see private variables for +! MA30ID) were added to the package after joint work with Reid, +! Schaumburg, Wasniewski and Zlatev (Duff, Reid, Schaumburg, +! Wasniewski and Zlatev, HARWELL REPORT CSS 135, 1983). + +! MA30AD performs the LU decomposition of the diagonal blocks of +! the permutation PAQ of a sparse matrix A, where input permutations +! P1 and Q1 are used to define the diagonal blocks. There may be +! non-zeros in the off-diagonal blocks but they are unaffected by +! MA30AD. P and P1 differ only within blocks as do Q and Q1. The +! permutations P1 and Q1 may be found by calling MC23AD or the +! matrix may be treated as a single block by using P1 = Q1 = I. The +! matrix non-zeros should be held compactly by rows, although it +! should be noted that the user can supply the matrix by columns +! to get the LU decomposition of A transpose. + +! This description should also be consulted for further information +! on most of the parameters of MA30BD and MA30CD. +! The parameters are: +! N is an integer variable which must be set by the user to the order +! of the matrix. It is not altered by MA30AD. +! ICN is an integer array of length LICN. Positions IDISP(2) to +! LICN must be set by the user to contain the column indices of +! the non-zeros in the diagonal blocks of P1*A*Q1. Those belonging +! to a single row must be contiguous but the ordering of column +! indices with each row is unimportant. The non-zeros of row I +! precede those of row I+1,I = 1,...,N-1 and no wasted space is +! allowed between the rows. On output the column indices of the +! LU decomposition of PAQ are held in positions IDISP(1) to +! IDISP(2), the rows are in pivotal order, and the column indices +! of the L Part of each row are in pivotal order and precede those +! of U. Again there is no wasted space either within a row or +! between the rows. ICN(1) to ICN(IDISP(1)-1), are neither +! required nor altered. If MC23AD has been called, these will hold +! information about the off-diagonal blocks. +! A Array of length LICN whose entries IDISP(2) to LICN must be set +! by the user to the values of the non-zero entries of the matrix +! in the order indicated by ICN. On output A will hold the LU +! factors of the matrix where again the position in the matrix +! is determined by the corresponding values in ICN. +! A(1) to A(IDISP(1)-1) are neither required nor altered. +! LICN is an integer variable which must be set by the user to the +! length of arrays ICN and A. It must be big enough for A and ICN +! to hold all the non-zeros of L and U and leave some "elbow +! room". It is possible to calculate a minimum value for LICN by +! a preliminary run of MA30AD. The adequacy of the elbow room +! can be judged by the size of the private variable ICNCP. +! It is not altered by MA30AD. +! LENR is an integer array of length N. On input, LENR(I) should +! equal the number of non-zeros in row I,I = 1,...,N of the +! diagonal blocks of P1*A*Q1. On output, LENR(I) will equal the +! total number of non-zeros in row I of L and row I of U. +! LENRL is an integer array of length N. On output from MA30AD, +! LENRL(I) will hold the number of non-zeros in row I of L. +! IDISP is an integer array of length 2. The user should set IDISP(1) +! to be the first available position in A/ICN for the LU +! decomposition while IDISP(2) is set to the position in A/ICN of +! the first non-zero in the diagonal blocks of P1*A*Q1. On output, +! IDISP(1) will be unaltered while IDISP(2) will be set to the +! position in A/ICN of the last non-zero of the LU decomposition. +! IP is an integer array of length N which holds a permutation of +! the integers 1 to N. On input to MA30AD, the absolute value of +! IP(I) must be set to the row of A which is row I of P1*A*Q1. A +! negative value for IP(I) indicates that row I is at the end of a +! diagonal block. On output from MA30AD, IP(I) indicates the row +! of A which is the Ith row in PAQ. IP(I) will still be negative +! for the last row of each block (except the last). +! IQ is an integer array of length N which again holds a +! permutation of the integers 1 to N. On input to MA30AD, IQ(J) +! must be set to the column of A which is column J of P1*A*Q1. On +! output from MA30AD, the absolute value of IQ(J) indicates the +! column of A which is the Jth in PAQ. For rows, I say, in which +! structural or numerical singularity is detected IQ(I) is +! negated. +! IRN is an integer array of length LIRN used as workspace by +! MA30AD. +! LIRN is an integer variable. It should be greater than the +! largest number of non-zeros in a diagonal block of P1*A*Q1 but +! need not be as large as LICN. It is the length of array IRN and +! should be large enough to hold the active part of any block, +! plus some "elbow room", the a posteriori adequacy of which can +! be estimated by examining the size of private variable +! IRNCP. +! LENC, FIRST, LASTR, NEXTR, LASTC, NEXTC are all integer arrays of +! length N which are used as workspace by MA30AD. If NSRCH is +! set to a value less than or equal to N, then arrays LASTC and +! NEXTC are not referenced by MA30AD and so can be dummied in +! the call to MA30AD. +! IPTR, IPC are integer arrays of length N which are used as +! workspace by MA30AD. +! U is a real(kind=wp) variable which should be set by the user +! to a value between 0. and 1.0. If less than zero it is reset +! to zero and if its value is 1.0 or greater it is reset to +! 0.9999 (0.999999999 in D version). It determines the balance +! between pivoting for sparsity and for stability, values near +! zero emphasizing sparsity and values near one emphasizing +! stability. We recommend U = 0.1 as a posible first trial value. +! The stability can be judged by a later call to MC24AD or by +! setting LBIG to .TRUE. +! IFLAG is an integer variable. It will have a non-negative value +! if MA30AD is successful. Negative values indicate error +! conditions while positive values indicate that the matrix has +! been successfully decomposed but is singular. For each non-zero +! value, an appropriate message is output on unit LP. Possible +! non-zero values for IFLAG are +! -1 THe matrix is structurally singular with rank given by IRANK +! in private variables for MA30FD. +! +1 If, however, the user wants the LU decomposition of a +! structurally singular matrix and sets the private variable +! ABORT1 to .FALSE., then, in the event of singularity and a +! successful decomposition, Iflag is returned with the value +1 +! and no message is output. +! -2 The matrix is numerically singular (it may also be structurally +! singular) with estimated rank given by IRANK in private variables +! for MA30FD. +! +2 THE user can choose to continue the decomposition even when a +! zero pivot is encountered by setting private variable +! ABORT2 TO .FALSE. If a singularity is encountered, IFLAG will +! then return with a value of +2, and no message is output if +! the decomposition has been completed successfully. +! -3 LIRN has not been large enough to continue with the +! decomposition. If the stage was zero then private variable +! MINIRN gives the length sufficient to start the decomposition on +! this block. FOr a successful decomposition on this block the +! user should make LIRN slightly (say about N/2) greater than this +! value. +! -4 LICN is not large enough to continue with the decomposition. +! -5 The decomposition has been completed but some of the LU factors +! have been discarded to create enough room in A/ICN to continue +! the decomposition. The variable MINICN in private variables for +! MA30FD. Then gives the size that LICN should be to enable the +! factorization to be successful. If the user sets private +! variable ABORT3 to .TRUE., then the subroutine will exit +! immediately instead of destroying any factors and continuing. +! -6 Both LICN and LIRN are too small. Termination has been caused +! by lack of space in IRN (see error IFLAG = -3), but already +! some of the LU factors in A/ICN have been lost (see error +! IFLAG = -5). MINICN gives the minimum amount of space +! required in A/ICN for decomposition up to this point. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + REAL (WP) :: U + INTEGER :: IFLAG, LICN, LIRN, NN +! .. +! .. Array Arguments .. + REAL (WP) :: A(LICN) + INTEGER :: ICN(LICN), IDISP(2), IFIRST(NN), IP(NN), IPC(NN), & + IPTR(NN), IQ(NN), IRN(LIRN), LASTC(NN), LASTR(NN), & + LENC(NN), LENR(NN), LENRL(NN), NEXTC(NN), NEXTR(NN) +! .. +! .. Local Scalars .. + REAL (WP) :: AANEW, AMAX, ANEW, AU, PIVR, PIVRAT, SCALE + INTEGER :: COLUPD, DISPC, I, I1, I2, IACTIV, IBEG, IDISPC, & + IDROP, IDUMMY, IEND, IFILL, IFIR, II, III, IJFIR, IJP1, & + IJPOS, ILAST, INDROW, IOP, IPIV, IPOS, IROWS, ISING, ISRCH,& + ISTART, ISW, ISW1, ITOP, J, J1, J2, JBEG, JCOST, JCOUNT, & + JDIFF, JDUMMY, JEND, JJ, JMORE, JNEW, JNPOS, JOLD, JPIV, & + JPOS, JROOM, JVAL, JZER, JZERO, K, KCOST, KDROP, L, LC, & + LENPIV, LENPP, LL, LR, MOREI, MSRCH, N, NBLOCK, NC, NNM1, & + NR, NUM, NZ, NZ2, NZCOL, NZMIN, NZPC, NZROW, OLDEND, & + OLDPIV, PIVEND, PIVOT, PIVROW, ROWI +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, IABS, MAX, MIN +! .. +! .. FIRST EXECUTABLE STATEMENT MA30AD +! .. + MSRCH = NSRCH + NDROP = 0 + LNPIV(1:10) = 0 + LPIV(1:10) = 0 + MAPIV = 0 + MANPIV = 0 + IAVPIV = 0 + IANPIV = 0 + KOUNTL = 0 + MINIRN = 0 + MINICN = IDISP(1) - 1 + MOREI = 0 + IRANK = NN + IRNCP = 0 + ICNCP = 0 + IFLAG = 0 +! Reset U if necessary. + U = MIN(U,UMAX) +! IBEG is the position of the next pivot row after elimination +! step using it. + U = MAX(U,ZERO) + IBEG = IDISP(1) +! IACTIV is the position of the first entry in the active part +! of A/ICN. + IACTIV = IDISP(2) +! NZROW is current number of non-zeros in active and unprocessed +! part of row file ICN. + NZROW = LICN - IACTIV + 1 + MINICN = NZROW + MINICN + +! Count the number of diagonal blocks and set up pointers to the +! beginnings of the rows. NUM is the number of diagonal blocks. + NUM = 1 + IPTR(1) = IACTIV + IF (NN==1) GOTO 30 + NNM1 = NN - 1 + DO I = 1, NNM1 + IF (IP(I)<0) NUM = NUM + 1 + IPTR(I+1) = IPTR(I) + LENR(I) + END DO +! ILAST is the last row in the previous block. +30 ILAST = 0 + +! *********************************************** +! **** LU decomposition of block NBLOCK **** +! *********************************************** + +! Each pass through this loop performs LU decomposition on one +! of the diagonal blocks. + DO 1070 NBLOCK = 1, NUM + ISTART = ILAST + 1 + DO IROWS = ISTART, NN + IF (IP(IROWS)<0) GOTO 50 + END DO + IROWS = NN +50 ILAST = IROWS +! N is the number of rows in the current block. +! ISTART is the index of the first row in the current block. +! ILAST is the index of the last row in the current block. +! IACTIV is the position of the first entry in the block. +! ITOP is the position of the last entry in the block. + N = ILAST - ISTART + 1 + IF (N/=1) GOTO 100 + +! Code for dealing WITH 1x1 block. + LENRL(ILAST) = 0 + ISING = ISTART + IF (LENR(ILAST)/=0) GOTO 60 +! Block is structurally singular. + IRANK = IRANK - 1 + ISING = -ISING + IF (IFLAG/=2 .AND. IFLAG/=-5) IFLAG = 1 + IF (.NOT.ABORT1) GOTO 90 + IDISP(2) = IACTIV + IFLAG = -1 + IF (LP/=0) WRITE (LP,90000) +! RETURN + GOTO 1190 +60 SCALE = ABS(A(IACTIV)) + IF (ABS(SCALE)<=ZERO) GOTO 70 + IF (LBIG) BIG = MAX(BIG,SCALE) + GOTO 80 +70 ISING = -ISING + IRANK = IRANK - 1 + IPTR(ILAST) = 0 + IF (IFLAG/=-5) IFLAG = 2 + IF (.NOT.ABORT2) GOTO 80 + IDISP(2) = IACTIV + IFLAG = -2 + IF (LP/=0) WRITE (LP,90001) + GOTO 1190 +80 A(IBEG) = A(IACTIV) + ICN(IBEG) = ICN(IACTIV) + IACTIV = IACTIV + 1 + IPTR(ISTART) = 0 + IBEG = IBEG + 1 + NZROW = NZROW - 1 +90 LASTR(ISTART) = ISTART + IPC(ISTART) = -ISING + GOTO 1070 + +! Non-trivial block. +100 ITOP = LICN + IF (ILAST/=NN) ITOP = IPTR(ILAST+1) - 1 + +! Set up column oriented storage. + LENRL(ISTART:ILAST) = 0 + LENC(ISTART:ILAST) = 0 + IF (ITOP-IACTIVJ2) GOTO 160 + DO JJ = J1, J2 + J = ICN(JJ) + IPOS = IPC(J) - 1 + IRN(IPOS) = INDROW + IPC(J) = IPOS + END DO +160 END DO +! DISPC is the lowest indexed active location in the column file. + DISPC = IPC(ISTART) + NZCOL = LIRN - DISPC + 1 + MINIRN = MAX(NZCOL,MINIRN) + NZMIN = 1 + +! Initialize array IFIRST. IFIRST(I) = +/- K indicates that +! row/col K has I non-zeros. If IFIRST(I) = 0, there is no +! row or column with I non-zeros. + IFIRST(1:N) = 0 + +! Compute ordering of row and column counts. +! First run through columns (from column N to column 1). + DO 190 JJ = ISTART, ILAST + J = ILAST - JJ + ISTART + NZ = LENC(J) + IF (NZ/=0) GOTO 180 + IPC(J) = 0 + GOTO 190 +180 IF (NSRCH<=NN) GOTO 190 + ISW = IFIRST(NZ) + IFIRST(NZ) = -J + LASTC(J) = 0 + NEXTC(J) = -ISW + ISW1 = IABS(ISW) + IF (ISW/=0) LASTC(ISW1) = J +190 END DO +! Now run through rows (again from N to 1). + DO 220 II = ISTART, ILAST + I = ILAST - II + ISTART + NZ = LENR(I) + IF (NZ/=0) GOTO 200 + IPTR(I) = 0 + LASTR(I) = 0 + GOTO 220 +200 ISW = IFIRST(NZ) + IFIRST(NZ) = I + IF (ISW>0) GOTO 210 + NEXTR(I) = 0 + LASTR(I) = ISW + GOTO 220 +210 NEXTR(I) = ISW + LASTR(I) = LASTR(ISW) + LASTR(ISW) = I +220 END DO + +! ********************************************** +! **** Start of main elimination loop **** +! ********************************************** + DO 1050 PIVOT = ISTART, ILAST + +! First find the pivot using MARKOWITZ criterion with +! stability control. +! JCOST is the Markowitz cost of the best pivot so far. +! This pivot is in row IPIV and column JPIV. + NZ2 = NZMIN + JCOST = N*N + +! Examine rows/columns in order of ascending count. + DO L = 1, 2 + PIVRAT = ZERO + ISRCH = 1 + LL = L +! A pass with L equal to 2 is only performed in the +! case of singularity. + DO 340 NZ = NZ2, N + IF (JCOST<=(NZ-1)**2) GOTO 430 + IJFIR = IFIRST(NZ) +! IF (IJFIR) 240, 230, 250 +!230 IF (LL==1) NZMIN = NZ + 1 + IF (IJFIR < 0) GOTO 240 + IF (IJFIR > 0) GOTO 250 + IF (LL==1) NZMIN = NZ + 1 + GOTO 340 +240 LL = 2 + IJFIR = -IJFIR + GOTO 300 +250 LL = 2 +! Scan rows with NZ non-zeros. + DO IDUMMY = 1, N + IF (JCOST<=(NZ-1)**2) GOTO 430 + IF (ISRCH>MSRCH) GOTO 430 + IF (IJFIR==0) GOTO 290 +! Row IJFIR is now examined. + I = IJFIR + IJFIR = NEXTR(I) +! First calculate multiplier threshold level. + AMAX = ZERO + J1 = IPTR(I) + LENRL(I) + J2 = IPTR(I) + LENR(I) - 1 + DO JJ = J1, J2 + AMAX = MAX(AMAX,ABS(A(JJ))) + END DO + AU = AMAX*U + ISRCH = ISRCH + 1 +! Scan row for possible pivots. + DO 270 JJ = J1, J2 + IF (ABS(A(JJ))<=AU .AND. L==1) GOTO 270 + J = ICN(JJ) + KCOST = (NZ-1)*(LENC(J)-1) + IF (KCOST>JCOST) GOTO 270 + PIVR = ZERO + IF (ABS(AMAX)>ZERO) PIVR = ABS(A(JJ))/AMAX + IF (KCOST==JCOST .AND. (PIVR<=PIVRAT .OR. NSRCH>NN+1)) & + GOTO 270 +! Best pivot so far is found. + JCOST = KCOST + IJPOS = JJ + IPIV = I + JPIV = J + IF (MSRCH>NN+1 .AND. JCOST<=(NZ-1)**2) GOTO 430 + PIVRAT = PIVR +270 END DO + END DO + +! Columns with NZ non-zeros now examined. +290 IJFIR = IFIRST(NZ) + IJFIR = -LASTR(IJFIR) +300 IF (JCOST<=NZ*(NZ-1)) GOTO 430 + IF (MSRCH<=NN) GOTO 340 + DO 330 IDUMMY = 1, N + IF (IJFIR==0) GOTO 340 + J = IJFIR + IJFIR = NEXTC(IJFIR) + I1 = IPC(J) + I2 = I1 + NZ - 1 +! Scan column J. + DO 320 II = I1, I2 + I = IRN(II) + KCOST = (NZ-1)*(LENR(I)-LENRL(I)-1) + IF (KCOST>=JCOST) GOTO 320 +! Pivot has best Markowitz count so far. Now +! check its suitability on numeric grounds by +! examining the other non-zeros in its row. + J1 = IPTR(I) + LENRL(I) + J2 = IPTR(I) + LENR(I) - 1 +! We need a stability check on singleton columns +! because of possible problems with +! underdetermined systems. + AMAX = ZERO + DO JJ = J1, J2 + AMAX = MAX(AMAX,ABS(A(JJ))) + IF (ICN(JJ)==J) JPOS = JJ + END DO + IF (ABS(A(JPOS))<=AMAX*U .AND. L==1) GOTO 320 + JCOST = KCOST + IPIV = I + JPIV = J + IJPOS = JPOS + IF (ABS(AMAX)>ZERO) PIVRAT = ABS(A(JPOS))/AMAX + IF (JCOST<=NZ*(NZ-1)) GOTO 430 +320 END DO +330 END DO +340 END DO +! In the event of singularity, we must make sure all +! rows and columns are tested. + MSRCH = N + +! Matrix is numerically or structurally singular. Which +! it is will be diagnosed later. + IRANK = IRANK - 1 + END DO +! Assign rest of rows and columns to ordering array. +! Matrix is structurally singular. + IF (IFLAG/=2 .AND. IFLAG/=-5) IFLAG = 1 + IRANK = IRANK - ILAST + PIVOT + 1 + IF (.NOT.ABORT1) GOTO 360 + IDISP(2) = IACTIV + IFLAG = -1 + IF (LP/=0) WRITE (LP,90000) + GOTO 1190 +360 K = PIVOT - 1 + DO 400 I = ISTART, ILAST + IF (LASTR(I)/=0) GOTO 400 + K = K + 1 + LASTR(I) = K + IF (LENRL(I)==0) GOTO 390 + MINICN = MAX(MINICN,NZROW+IBEG-1+MOREI+LENRL(I)) + IF (IACTIV-IBEG>=LENRL(I)) GOTO 370 + CALL MA30DD(A,ICN,IPTR(ISTART),N,IACTIV,ITOP,.TRUE.) +! Check now to see if MA30DD has created enough +! available space. + IF (IACTIV-IBEG>=LENRL(I)) GOTO 370 +! Create more space by destroying previously created +! LU factors. + MOREI = MOREI + IBEG - IDISP(1) + IBEG = IDISP(1) + IF (LP/=0) WRITE (LP,90002) + IFLAG = -5 + IF (ABORT3) GOTO 1160 +370 J1 = IPTR(I) + J2 = J1 + LENRL(I) - 1 + IPTR(I) = 0 + DO JJ = J1, J2 + A(IBEG) = A(JJ) + ICN(IBEG) = ICN(JJ) + ICN(JJ) = 0 + IBEG = IBEG + 1 + END DO + NZROW = NZROW - LENRL(I) +390 IF (K==ILAST) GOTO 410 +400 END DO +410 K = PIVOT - 1 + DO 420 I = ISTART, ILAST + IF (IPC(I)/=0) GOTO 420 + K = K + 1 + IPC(I) = K + IF (K==ILAST) GOTO 1060 +420 END DO + +! The pivot has now been found in position (IPIV,JPIV) +! in location IJPOS in row file. +! Update column and row ordering arrays to correspond +! with removal of the active part of the matrix. +430 ISING = PIVOT + IF (ABS(A(IJPOS))>ZERO) GOTO 440 +! Numerical singularity is recorded here. + ISING = -ISING + IF (IFLAG/=-5) IFLAG = 2 + IF (.NOT.ABORT2) GOTO 440 + IDISP(2) = IACTIV + IFLAG = -2 + IF (LP/=0) WRITE (LP,90001) + GOTO 1190 +440 OLDPIV = IPTR(IPIV) + LENRL(IPIV) + OLDEND = IPTR(IPIV) + LENR(IPIV) - 1 +! Changes to column ordering. + IF (NSRCH<=NN) GOTO 470 + COLUPD = NN + 1 + LENPP = OLDEND - OLDPIV + 1 + IF (LENPP<4) LPIV(1) = LPIV(1) + 1 + IF (LENPP>=4 .AND. LENPP<=6) LPIV(2) = LPIV(2) + 1 + IF (LENPP>=7 .AND. LENPP<=10) LPIV(3) = LPIV(3) + 1 + IF (LENPP>=11 .AND. LENPP<=15) LPIV(4) = LPIV(4) + 1 + IF (LENPP>=16 .AND. LENPP<=20) LPIV(5) = LPIV(5) + 1 + IF (LENPP>=21 .AND. LENPP<=30) LPIV(6) = LPIV(6) + 1 + IF (LENPP>=31 .AND. LENPP<=50) LPIV(7) = LPIV(7) + 1 + IF (LENPP>=51 .AND. LENPP<=70) LPIV(8) = LPIV(8) + 1 + IF (LENPP>=71 .AND. LENPP<=100) LPIV(9) = LPIV(9) + 1 + IF (LENPP>=101) LPIV(10) = LPIV(10) + 1 + MAPIV = MAX(MAPIV,LENPP) + IAVPIV = IAVPIV + LENPP + DO 460 JJ = OLDPIV, OLDEND + J = ICN(JJ) + LC = LASTC(J) + NC = NEXTC(J) + NEXTC(J) = -COLUPD + IF (JJ/=IJPOS) COLUPD = J + IF (NC/=0) LASTC(NC) = LC + IF (LC==0) GOTO 450 + NEXTC(LC) = NC + GOTO 460 +450 NZ = LENC(J) + ISW = IFIRST(NZ) + IF (ISW>0) LASTR(ISW) = -NC + IF (ISW<0) IFIRST(NZ) = -NC +460 END DO +! Changes to row ordering. +470 I1 = IPC(JPIV) + I2 = I1 + LENC(JPIV) - 1 + DO 490 II = I1, I2 + I = IRN(II) + LR = LASTR(I) + NR = NEXTR(I) + IF (NR/=0) LASTR(NR) = LR + IF (LR<=0) GOTO 480 + NEXTR(LR) = NR + GOTO 490 +480 NZ = LENR(I) - LENRL(I) + IF (NR/=0) IFIRST(NZ) = NR + IF (NR==0) IFIRST(NZ) = LR +490 END DO + +! Move pivot to position LENRL+1 in pivot row and move pivot +! row to the beginning of the available storage. The L part +! and the pivot in the old copy of the pivot row is nullified +! while, in the strictly upper triangular part, the column +! indices, J say, are overwritten by the corresponding entry +! of IQ (IQ(J)) and IQ(J) is set to the negative of the +! displacement of the column index from the pivot entry. + IF (OLDPIV==IJPOS) GOTO 500 + AU = A(OLDPIV) + A(OLDPIV) = A(IJPOS) + A(IJPOS) = AU + ICN(IJPOS) = ICN(OLDPIV) + ICN(OLDPIV) = JPIV +! Check to see if there is space immediately available in +! A/ICN to hold new copy of pivot row. +500 MINICN = MAX(MINICN,NZROW+IBEG-1+MOREI+LENR(IPIV)) + IF (IACTIV-IBEG>=LENR(IPIV)) GOTO 510 + CALL MA30DD(A,ICN,IPTR(ISTART),N,IACTIV,ITOP,.TRUE.) + OLDPIV = IPTR(IPIV) + LENRL(IPIV) + OLDEND = IPTR(IPIV) + LENR(IPIV) - 1 +! Check now to see if MA30DD has created enough +! available space. + IF (IACTIV-IBEG>=LENR(IPIV)) GOTO 510 +! Create more space by destroying previously created +! LU factors. + MOREI = MOREI + IBEG - IDISP(1) + IBEG = IDISP(1) + IF (LP/=0) WRITE (LP,90002) + IFLAG = -5 + IF (ABORT3) GOTO 1160 + IF (IACTIV-IBEG>=LENR(IPIV)) GOTO 510 +! There is still not enough room in A/ICN. + IFLAG = -4 + GOTO 1160 +! Copy pivot row and set up IQ array. +510 IJPOS = 0 + J1 = IPTR(IPIV) + + DO JJ = J1, OLDEND + A(IBEG) = A(JJ) + ICN(IBEG) = ICN(JJ) + IF (IJPOS/=0) GOTO 520 + IF (ICN(JJ)==JPIV) IJPOS = IBEG + ICN(JJ) = 0 + GOTO 530 +520 K = IBEG - IJPOS + J = ICN(JJ) + ICN(JJ) = IQ(J) + IQ(J) = -K +530 IBEG = IBEG + 1 + END DO + + IJP1 = IJPOS + 1 + PIVEND = IBEG - 1 + LENPIV = PIVEND - IJPOS + NZROW = NZROW - LENRL(IPIV) - 1 + IPTR(IPIV) = OLDPIV + 1 + IF (LENPIV==0) IPTR(IPIV) = 0 + +! Remove pivot row (including pivot) from column +! oriented file. + DO JJ = IJPOS, PIVEND + J = ICN(JJ) + I1 = IPC(J) + LENC(J) = LENC(J) - 1 +! I2 is last position in new column. + I2 = IPC(J) + LENC(J) - 1 + IF (I2ZERO) AU = -A(JJ)/A(IJPOS) + IF (LBIG) BIG = MAX(BIG,ABS(AU)) + A(JJ) = A(J1) + A(J1) = AU + ICN(JJ) = ICN(J1) + ICN(J1) = JPIV + LENRL(I) = LENRL(I) + 1 + GOTO 590 +580 END DO +! JUMP if pivot row is a singleton. +590 IF (LENPIV==0) GOTO 880 +! Now perform necessary operations on rest of non-pivot +! row I. + ROWI = J1 + 1 + IOP = 0 +! Jump if all the pivot row causes fill-in. + IF (ROWI>IEND) GOTO 670 +! Perform operations on current non-zeros in row I. +! Innermost loop. + LENPP = IEND - ROWI + 1 + IF (LENPP<4) LNPIV(1) = LNPIV(1) + 1 + IF (LENPP>=4 .AND. LENPP<=6) LNPIV(2) = LNPIV(2) + 1 + IF (LENPP>=7 .AND. LENPP<=10) LNPIV(3) = LNPIV(3) + 1 + IF (LENPP>=11 .AND. LENPP<=15) LNPIV(4) = LNPIV(4) + 1 + IF (LENPP>=16 .AND. LENPP<=20) LNPIV(5) = LNPIV(5) + 1 + IF (LENPP>=21 .AND. LENPP<=30) LNPIV(6) = LNPIV(6) + 1 + IF (LENPP>=31 .AND. LENPP<=50) LNPIV(7) = LNPIV(7) + 1 + IF (LENPP>=51 .AND. LENPP<=70) LNPIV(8) = LNPIV(8) + 1 + IF (LENPP>=71 .AND. LENPP<=100) LNPIV(9) = LNPIV(9) + 1 + IF (LENPP>=101) LNPIV(10) = LNPIV(10) + 1 + MANPIV = MAX(MANPIV,LENPP) + IANPIV = IANPIV + LENPP + KOUNTL = KOUNTL + 1 + DO 600 JJ = ROWI, IEND + J = ICN(JJ) + IF (IQ(J)>0) GOTO 600 + IOP = IOP + 1 + PIVROW = IJPOS - IQ(J) + A(JJ) = A(JJ) + AU*A(PIVROW) + IF (LBIG) BIG = MAX(ABS(A(JJ)),BIG) + ICN(PIVROW) = -ICN(PIVROW) + IF (ABS(A(JJ))0) LASTR(ISW) = -NC + IF (ISW<0) IFIRST(NZ) = -NC +650 END DO + ICN(JNEW:IEND) = 0 +! The value of IDROP might be different from that +! calculated earlier because we may now have dropped +! some non-zeros which were not modified by the pivot +! row. + IDROP = IEND + 1 - JNEW + IEND = JNEW - 1 + LENR(I) = LENR(I) - IDROP + NZROW = NZROW - IDROP + NZCOL = NZCOL - IDROP + NDROP = NDROP + IDROP +670 IFILL = LENPIV - IOP +! Jump is if there is no fill-in. + IF (IFILL==0) GOTO 770 +! Now for the fill-in. + MINICN = MAX(MINICN,MOREI+IBEG-1+NZROW+IFILL+LENR(I)) +! See if there is room for fill-in. +! Get maximum space for row I in situ. + DO JDIFF = 1, IFILL + JNPOS = IEND + JDIFF + IF (JNPOS>LICN) GOTO 690 + IF (ICN(JNPOS)/=0) GOTO 690 + END DO +! There is room for all the fill-in after the end of the +! row so it can be left in situ. +! Next available space for fill-in. + IEND = IEND + 1 + GOTO 770 +! JMORE spaces for fill-in are required in front of row. +690 JMORE = IFILL - JDIFF + 1 + I1 = IPTR(I) +! We now look in front of the row to see if there is space +! for the rest of the fill-in. + DO JDIFF = 1, JMORE + JNPOS = I1 - JDIFF + IF (JNPOS=IBEG) GOTO 750 + CALL MA30DD(A,ICN,IPTR(ISTART),N,IACTIV,ITOP,.TRUE.) + I1 = IPTR(I) + IEND = I1 + LENR(I) - 1 + JNPOS = IACTIV - LENR(I) - IFILL + IF (JNPOS>=IBEG) GOTO 750 +! No space available so try to create some by throwing +! away previous LU decomposition. + MOREI = MOREI + IBEG - IDISP(1) - LENPIV - 1 + IF (LP/=0) WRITE (LP,90002) + IFLAG = -5 + IF (ABORT3) GOTO 1160 +! Keep record of current pivot row. + IBEG = IDISP(1) + ICN(IBEG) = JPIV + A(IBEG) = A(IJPOS) + IJPOS = IBEG + DO JJ = IJP1, PIVEND + IBEG = IBEG + 1 + A(IBEG) = A(JJ) + ICN(IBEG) = ICN(JJ) + END DO + IJP1 = IJPOS + 1 + PIVEND = IBEG + IBEG = IBEG + 1 + IF (JNPOS>=IBEG) GOTO 750 +! This still does not give enough room. + IFLAG = -4 + GOTO 1160 +750 IACTIV = MIN(IACTIV,JNPOS) +! Move non-pivot row I. + IPTR(I) = JNPOS + DO JJ = I1, IEND + A(JNPOS) = A(JJ) + ICN(JNPOS) = ICN(JJ) + JNPOS = JNPOS + 1 + ICN(JJ) = 0 + END DO +! First new available space. + IEND = JNPOS +770 NZROW = NZROW + IFILL +! Innermost fill-in loop which also resets ICN. + IDROP = 0 + DO 850 JJ = IJP1, PIVEND + J = ICN(JJ) + IF (J<0) GOTO 840 + ANEW = AU*A(JJ) + AANEW = ABS(ANEW) + IF (AANEW>=TOL) GOTO 780 + IDROP = IDROP + 1 + NDROP = NDROP + 1 + NZROW = NZROW - 1 + MINICN = MINICN - 1 + IFILL = IFILL - 1 + GOTO 850 +780 IF (LBIG) BIG = MAX(AANEW,BIG) + A(IEND) = ANEW + ICN(IEND) = J + IEND = IEND + 1 + +! Put new entry in column file. + MINIRN = MAX(MINIRN,NZCOL+LENC(J)+1) + JEND = IPC(J) + LENC(J) + JROOM = NZPC - III + 1 + LENC(J) + IF (JEND>LIRN) GOTO 790 + IF (IRN(JEND)==0) GOTO 830 +790 IF (JROOM=LENC(J)+1) GOTO 800 +! Column file is not large enough. + GOTO 1170 +! Copy column to beginning of file. +800 JBEG = IPC(J) + JEND = IPC(J) + LENC(J) - 1 + JZERO = DISPC - 1 + DISPC = DISPC - JROOM + IDISPC = DISPC + DO II = JBEG, JEND + IRN(IDISPC) = IRN(II) + IRN(II) = 0 + IDISPC = IDISPC + 1 + END DO + IPC(J) = DISPC + JEND = IDISPC + IRN(JEND:JZERO) = 0 +830 IRN(JEND) = I + NZCOL = NZCOL + 1 + LENC(J) = LENC(J) + 1 +! End of adjustment to column file. + GOTO 850 +840 ICN(JJ) = -J +850 END DO + IF (IDROP==0) GOTO 870 + DO KDROP = 1, IDROP + ICN(IEND) = 0 + IEND = IEND + 1 + END DO +870 LENR(I) = LENR(I) + IFILL +! End of scan of pivot column. +880 END DO + +! Remove pivot column from column oriented storage +! and update row ordering arrays. + I1 = IPC(JPIV) + I2 = IPC(JPIV) + LENC(JPIV) - 1 + NZCOL = NZCOL - LENC(JPIV) + DO 930 II = I1, I2 + I = IRN(II) + IRN(II) = 0 + NZ = LENR(I) - LENRL(I) + IF (NZ/=0) GOTO 890 + LASTR(I) = 0 + GOTO 930 +890 IFIR = IFIRST(NZ) + IFIRST(NZ) = I +! IF (IFIR) 900, 920, 910 +!900 LASTR(I) = IFIR + IF (IFIR == 0) GOTO 920 + IF (IFIR > 0) GOTO 910 + LASTR(I) = IFIR + NEXTR(I) = 0 + GOTO 930 +910 LASTR(I) = LASTR(IFIR) + NEXTR(I) = IFIR + LASTR(IFIR) = I + GOTO 930 +920 LASTR(I) = 0 + NEXTR(I) = 0 + NZMIN = MIN(NZMIN,NZ) +930 END DO +! Restore IQ and nullify U part of old pivot row. +! Record the column permutation in LASTC(JPIV) and +! the row permutation in LASTR(IPIV). +940 IPC(JPIV) = -ISING + LASTR(IPIV) = PIVOT + IF (LENPIV==0) GOTO 1050 + NZROW = NZROW - LENPIV + JVAL = IJP1 + JZER = IPTR(IPIV) + IPTR(IPIV) = 0 + DO JCOUNT = 1, LENPIV + J = ICN(JVAL) + IQ(J) = ICN(JZER) + ICN(JZER) = 0 + JVAL = JVAL + 1 + JZER = JZER + 1 + END DO +! Adjust column ordering arrays. + IF (NSRCH>NN) GOTO 980 + DO 970 JJ = IJP1, PIVEND + J = ICN(JJ) + NZ = LENC(J) + IF (NZ/=0) GOTO 960 + IPC(J) = 0 + GOTO 970 +960 NZMIN = MIN(NZMIN,NZ) +970 END DO + GOTO 1050 +980 JJ = COLUPD + DO 1040 JDUMMY = 1, NN + J = JJ + IF (J==NN+1) GOTO 1050 + JJ = -NEXTC(J) + NZ = LENC(J) + IF (NZ/=0) GOTO 990 + IPC(J) = 0 + GOTO 1040 +990 IFIR = IFIRST(NZ) + LASTC(J) = 0 +! IF (IFIR) 1000, 1010, 1020 +!1000 IFIRST(NZ) = -J + IF (IFIR == 0) GOTO 1010 + IF (IFIR > 0) GOTO 1020 + IFIRST(NZ) = -J + IFIR = -IFIR + LASTC(IFIR) = J + NEXTC(J) = IFIR + GOTO 1040 +1010 IFIRST(NZ) = -J + NEXTC(J) = 0 + GOTO 1030 +1020 LC = -LASTR(IFIR) + LASTR(IFIR) = -J + NEXTC(J) = LC + IF (LC/=0) LASTC(LC) = J +1030 NZMIN = MIN(NZMIN,NZ) +1040 END DO +1050 END DO +! ******************************************** +! **** End of main elimination loop **** +! ******************************************** + +! Reset IACTIV to point to the beginning of the +! next block. +1060 IF (ILAST/=NN) IACTIV = IPTR(ILAST+1) +1070 END DO + +! ******************************************** +! **** End of deomposition of block **** +! ******************************************** + +! Record singularity (if any) in IQ array. + IF (IRANK==NN) GOTO 1090 + DO 1080 I = 1, NN + IF (IPC(I)<0) GOTO 1080 + ISING = IPC(I) + IQ(ISING) = -IQ(ISING) + IPC(I) = -ISING +1080 END DO + +! Run through LU decomposition changing column indices +! to that of new order and permuting LENR and LENRL +! arrays according to pivot permutations. +1090 ISTART = IDISP(1) + IEND = IBEG - 1 + IF (IEND=0) GOTO 20 +! First non-zero of row/col has been located. + J = -ICN(JPOS) + ICN(JPOS) = IPTR(J) + IPTR(J) = KN +20 ICN(KN) = ICN(JPOS) +30 END DO + IACTIV = KN + RETURN + + END SUBROUTINE MA30DD +!_______________________________________________________________________ + + SUBROUTINE MA30BD(N,ICN,A,LICN,LENR,LENRL,IDISP,IP,IQ,W,IW,IFLAG) +! .. +! MA30BD performs the LU decomposition of the diagonal blocks of +! a new matrix PAQ of the same sparsity pattern, using information +! from a previous call to MA30AD. THe entries of the input matrix +! must already be in their final positions in the LU decomposition +! structure. This routine executes about five times faster than +! MA30AD. +! .. +! We now describe the argument list for MA30BD. Consult MA30AD for +! further information on these parameters. +! N is an integer variable set to the order of the matrix. +! ICN is an integer array of length LICN. It should be unchanged +! since the last call to MA30AD. It is not altered by MA30BD. +! A is a real(kind=wp) array of length LICN. The user must set +! entries IDISP(1) to IDISP(2) to contain the entries in the +! diagonal blocks of the matrix PAQ whose column numbers are +! held in ICN, using corresponding positions. Note that some +! zeros may need to be held explicitly. On output entries +! IDISP(1) to IDISP(2) of array A contain the LU decomposition +! of the diagonal blocks of PAQ. Entries A(1) to A(IDISP(1)-1) +! are neither required nor altered by MA30BD. +! LICN is an integer variable which must be set by the user to the +! length of arrays A and ICN. it is not altered by MA30BD. +! LENR, LENRL are integer arrays of length N. They should be +! unchanged since the last call to MA30AD. They are not +! altered by MA30BD. +! IDISP is an integer array of length 2. It should be unchanged +! since the last call to MA30AD. It is not altered by MA30BD. +! IP, IQ are integer arrays of length N. They should be unchanged +! since the last call to MA30AD. They are not altered by +! MA30BD. +! W is a REAL(KIND=WP) array of length N which is used as +! workspace by MA30BD. +! IW is an integer array of length N which is used as workspace +! by MA30BD. +! IFLAG is an integer variable. On output from MA30BD, IFLAG has +! the value zero if the factorization was successful, has the +! value I if pivot I was very small and has the value -I if +! an unexpected singularity was detected at stage I of the +! decomposition. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: IFLAG, LICN, N +! .. +! .. Array Arguments .. + REAL (WP) :: A(LICN), W(N) + INTEGER :: ICN(LICN), IDISP(2), IP(N), IQ(N), IW(N), LENR(N), LENRL(N) +! .. +! .. Local Scalars .. + REAL (WP) :: AU, ROWMAX + INTEGER :: I, IFIN, ILEND, IPIVJ, ISING, ISTART, J, JAY, JAYJAY, JFIN, & + JJ, PIVPOS + LOGICAL :: STAB +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MAX +! .. +! .. FIRST EXECUTABLE STATEMENT MA30BD +! .. + STAB = EPS <= ONE + RMIN = EPS + ISING = 0 + IFLAG = 0 + W(1:N) = ZERO +! Set up pointers to the beginning of the rows. + IW(1) = IDISP(1) + IF (N==1) GOTO 30 + DO I = 2, N + IW(I) = IW(I-1) + LENR(I-1) + END DO + +! Start of main loop. +! At step I, row I of A is transformed to row I of LU by +! adding appropriate multiples of rows 1 TO I-1 using row +! Gauss elimination. +30 DO 170 I = 1, N +! ISTART is beginning of row I of A and row I of L. + ISTART = IW(I) +! IFIN is end of row I of A and row I of U. + IFIN = ISTART + LENR(I) - 1 +! ILEND is end of row I of L. + ILEND = ISTART + LENRL(I) - 1 + IF (ISTART>ILEND) GOTO 100 +! Load row I of A into vector W. + DO JJ = ISTART, IFIN + J = ICN(JJ) + W(J) = A(JJ) + END DO + +! Add multiples of appropriate rows of I to I-1 to row I. + DO 80 JJ = ISTART, ILEND + J = ICN(JJ) +! IPIVJ is position of pivot in row J. + IPIVJ = IW(J) + LENRL(J) +! Form multiplier AU. + AU = -W(J)/A(IPIVJ) + IF (LBIG) BIG = MAX(ABS(AU),BIG) + W(J) = AU +! AU * ROW J (U part) is added to row I. + IPIVJ = IPIVJ + 1 + JFIN = IW(J) + LENR(J) - 1 + IF (IPIVJ>JFIN) GOTO 80 +! Innermost loop. + IF (LBIG) GOTO 60 + DO JAYJAY = IPIVJ, JFIN + JAY = ICN(JAYJAY) + W(JAY) = W(JAY) + AU*A(JAYJAY) + END DO + GOTO 80 +60 DO JAYJAY = IPIVJ, JFIN + JAY = ICN(JAYJAY) + W(JAY) = W(JAY) + AU*A(JAYJAY) + BIG = MAX(ABS(W(JAY)),BIG) + END DO +80 END DO + +! Reload W back into A (now LU). + DO JJ = ISTART, IFIN + J = ICN(JJ) + A(JJ) = W(J) + W(J) = ZERO + END DO +! We now perform the stability checks. +100 PIVPOS = ILEND + 1 + IF (IQ(I)>0) GOTO 150 +! Matrix had singularity at this point in MA30AD. +! Is it the first such pivot in current block? + IF (ISING==0) ISING = I +! Does current matrix have a singularity in the same place? + IF (PIVPOS>IFIN) GOTO 110 + IF (ABS(A(PIVPOS))>ZERO) GOTO 180 +! It does, so set ising if it is not the end of the current +! block. Check to see that appropriate part of LU is zero +! or null. +110 IF (ISTART>IFIN) GOTO 130 + DO 120 JJ = ISTART, IFIN + IF (ICN(JJ)ZERO) GOTO 180 +120 END DO +130 IF (PIVPOS<=IFIN) A(PIVPOS) = ONE + IF (IP(I)>0 .AND. I/=N) GOTO 170 +! End of current block. Reset zero pivots and ISING. + DO 140 J = ISING, I + IF ((LENR(J)-LENRL(J))==0) GOTO 140 + JJ = IW(J) + LENRL(J) + A(JJ) = ZERO +140 END DO + ISING = 0 + GOTO 170 +! Matrix had non-zero pivot in MA30AD at this stage. +150 IF (PIVPOS>IFIN) GOTO 180 + IF (ABS(A(PIVPOS))<=ZERO) GOTO 180 + IF (.NOT.STAB) GOTO 170 + ROWMAX = ZERO + DO JJ = PIVPOS, IFIN + ROWMAX = MAX(ROWMAX,ABS(A(JJ))) + END DO + IF (ABS(A(PIVPOS))/ROWMAX>=RMIN) GOTO 170 + IFLAG = I + RMIN = ABS(A(PIVPOS))/ROWMAX +! End of main loop. +170 END DO + + GOTO 190 +! Error return +180 IF (LP/=0) WRITE (LP,90000) I + IFLAG = -I + +190 RETURN +90000 FORMAT (' Error return from MA30BD; Singularity detected in',' row', & + I8) + + END SUBROUTINE MA30BD +!_______________________________________________________________________ + + SUBROUTINE MA30CD(N,ICN,A,LICN,LENR,LENRL,LENOFF,IDISP,IP,IQ,X, & + W,MTYPE) +! .. +! MA30CD uses the factors produced by MA30AD or MA30BD to solve +! AX = B or A TRANSPOSE X = B when the matrix P1*A*Q1(PAQ) is +! block lower triangular (including the case of only one diagonal +! block). +! .. +! We now describe the argument list for MA30CD. +! N is an integer variable set to the order of the matrix. It is +! not altered by the subroutine. +! ICN is an integer array of length LICN. Entries IDISP(1) to +! IDISP(2) should be unchanged since the last call to MA30AD. If +! the matrix has more than one diagonal block, then column indices +! corresponding to non-zeros in sub-diagonal blocks of PAQ must +! appear in positions 1 to IDISP(1)-1. For the same row those +! entries must be contiguous, with those in row I preceding those +! in row I+1 (I=1,...,N-1) and no wasted space between rows. +! Entries may be in any order within each row. It is not altered +! by MA30CD. +! A is a REAL(KIND=WP) array of length LICN. Entries IDISP(1) to +! IDISP(2) should be unchanged since the last call to MA30AD or +! MA30BD. If the matrix has more than one diagonal block, then +! the values of the non-zeros in sub-diagonal blocks must be in +! positions 1 to IDISP(1)-1 in the order given by ICN. It is not +! altered by MA30CD. +! LICN is an integer variable set to the size of arrays ICN and A. +! It is not altered by MA30CD. +! LENR, LENRL are integer arrays of length N which should be +! unchanged since the last call to MA30AD. They are not +! altered by MA30CD. +! LENOFF is an integer array of length N. If the matrix PAQ (or +! P1*A*Q1) has more than one diagonal block, then LENOFF(I), +! I=1,...,N should be set to the number of non-zeros in row I of +! the matrix PAQ which are in sub-diagonal blocks. If there is +! only one diagonal block then LENOFF(1) may be set TO -1, in +! which case the other entries of LENOFF are never accessed. +! It is not altered by ma30cd. +! IDISP is an integer array of length 2 which should be unchanged +! since the last call to MA30AD. It is not altered by MA30CD. +! IP, IQ are integer arrays of length N which should be unchanged +! since the last call to MA30AD. They are not altered by +! MA30CD. +! X is a REAL(KIND=WP) array of length N. It must be set by the +! user to the values of the right hand side vector B for the +! equations being solved. ON exit from MA30CD it will be equal +! to the solution X required. +! W is a REAL(KIND=WP) array of length N which is used as +! workspace by MA30CD. +! MTYPE is an integer variable which must be set by the user. If +! MTYPE = 1, then the solution to the system AX = B is returned. +! Any other value for mtype will return the solution to the +! system A TRANSPOSE X = B. It is not altered by MA30CD. +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: LICN, MTYPE, N +! .. +! .. Array Arguments .. + REAL (WP) :: A(LICN), W(N), X(N) + INTEGER :: ICN(LICN), IDISP(2), IP(N), IQ(N), LENOFF(N), LENR(N), & + LENRL(N) +! .. +! .. Local Scalars .. + REAL (WP) :: WI, WII + INTEGER :: I, IB, IBACK, IBLEND, IBLOCK, IEND, IFIRST, II, III, & + ILAST, J, J1, J2, J3, JJ, JPIV, JPIVP1, K, LJ1, LJ2, LT, LTEND, & + NUMBLK + LOGICAL :: NEG, NOBLOC +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, IABS, MAX +! .. +! .. FIRST EXECUTABLE STATEMENT MA30CD +! .. +! The final value of RESID is the maximum residual for an +! inconsistent set of equations. + RESID = ZERO +! NOBLOC is .TRUE. if subroutine block has been used +! previously and is .FALSE. otherwise. The value .FALSE. +! means that LENOFF will not be subsequently accessed. + NOBLOC = LENOFF(1) < 0 + IF (MTYPE/=1) GOTO 140 + +! We now solve A * X = B. +! NEG is used to indicate when the last row in a block +! has been reached. It is then set to .TRUE. whereafter +! back substitution is performed on the block. + NEG = .FALSE. +! IP(N) is negated so that the last row of the last +! block can be recognised. It is reset to its positive +! value on exit. + IP(N) = -IP(N) +! Preorder VECTOR ... W(I) = X(IP(I)) + DO II = 1, N + I = IP(II) + I = IABS(I) + W(II) = X(I) + END DO +! LT holds the position of the first non-zero in the current +! row of the off-diagonal blocks. + LT = 1 +! IFIRST holds the index of the first row in the current block. + IFIRST = 1 +! IBLOCK holds the position of the first non-zero in the current +! row of the LU decomposition of the diagonal blocks. + IBLOCK = IDISP(1) +! If I is not the last row of a block, then a pass through this +! loop adds the inner product of row I of the off-diagonal blocks +! and W to W and performs forward elimination using row I of the +! LU decomposition. If I is the last row of a block then, after +! performing these aforementioned operations, back substitution +! is performed using the rows of the block. + DO 120 I = 1, N + WI = W(I) + IF (NOBLOC) GOTO 30 + IF (LENOFF(I)==0) GOTO 30 +! Operations using lower triangular blocks. +! LTEND is the end of row I in the off-diagonal blocks. + LTEND = LT + LENOFF(I) - 1 + DO JJ = LT, LTEND + J = ICN(JJ) + WI = WI - A(JJ)*W(J) + END DO +! LT is set the beginning of the next off-diagonal row. + LT = LTEND + 1 +! Set NEG to .TRUE. if we are on the last row of the block. +30 IF (IP(I)<0) NEG = .TRUE. + IF (LENRL(I)==0) GOTO 50 +! Forward elimination phase. +! IEND is the end of the L part of row I in the LU decomposition. + IEND = IBLOCK + LENRL(I) - 1 + DO JJ = IBLOCK, IEND + J = ICN(JJ) + WI = WI + A(JJ)*W(J) + END DO +! IBLOCK is adjusted to point to the start of the next row. +50 IBLOCK = IBLOCK + LENR(I) + W(I) = WI + IF (.NOT.NEG) GOTO 120 +! Back substitution phase. +! J1 is position in A/ICN after end of block beginning in +! row IFIRST and ending in row I. + J1 = IBLOCK +! Are there any singularities in this block? If not, continue +! with the back substitution. + IB = I + IF (IQ(I)>0) GOTO 70 + DO III = IFIRST, I + IB = I - III + IFIRST + IF (IQ(IB)>0) GOTO 70 + J1 = J1 - LENR(IB) + RESID = MAX(RESID,ABS(W(IB))) + W(IB) = ZERO + END DO +! Entire block is singular. + GOTO 110 +! Each pass through this loop performs the back substitution +! operations for a single row, starting at the end of the +! block and working through it in reverse order. +70 DO III = IFIRST, IB + II = IB - III + IFIRST +! J2 is end of row II. + J2 = J1 - 1 +! J1 is beginning of row II. + J1 = J1 - LENR(II) +! JPIV is the position of the pivot in row II. + JPIV = J1 + LENRL(II) + JPIVP1 = JPIV + 1 +! JUMP if row II of U has no non-zeros. + IF (J2KMAX) GOTO 40 + KOR = KMAX + DO KDUMMY = KLO, KMAX +! Items KOR,KOR+1,...,KMAX are in order. + ACE = A(KOR-1) + ICE = INUM(KOR-1) + DO K = KOR, KMAX + IK = INUM(K) + IF (IABS(ICE)<=IABS(IK)) GOTO 20 + INUM(K-1) = IK + A(K-1) = A(K) + END DO + K = KMAX + 1 +20 INUM(K-1) = ICE + A(K-1) = ACE + KOR = KOR - 1 + END DO +! Next column. +40 KMAX = KLO - 2 + END DO + RETURN + + END SUBROUTINE MC20BD +!_______________________________________________________________________ + + SUBROUTINE MC21A(N,ICN,LICN,IP,LENR,IPERM,NUMNZ,IW) +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: LICN, N, NUMNZ +! .. +! .. Array Arguments .. + INTEGER :: ICN(LICN), IP(N), IPERM(N), IW(N,4), LENR(N) +! .. +! .. FIRST EXECUTABLE STATEMENT MA21A +! .. + CALL MC21B(N,ICN,LICN,IP,LENR,IPERM,NUMNZ,IW(1,1),IW(1,2), & + IW(1,3),IW(1,4)) + RETURN + + END SUBROUTINE MC21A +!_______________________________________________________________________ + + SUBROUTINE MC21B(N,ICN,LICN,IP,LENR,IPERM,NUMNZ,PR,ARP,CV,OUT) +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. +! .. + INTEGER :: LICN, N, NUMNZ +! .. +! .. Array Arguments .. + INTEGER :: ARP(N), CV(N), ICN(LICN), IP(N), IPERM(N), LENR(N), & + OUT(N), PR(N) +! .. +! .. Local Scalars .. + INTEGER :: I, II, IN1, IN2, IOUTK, J, J1, JORD, K, KK +! .. +! .. FIRST EXECUTABLE STATEMENT MA21B +! .. +! PR(I) is the previous row to I in the depth first search. +! It is used as a work array in the sorting algorithm. +! Elements (IPERM(I),I) I=1,...,N are non-zero at the +! end of the algorithm unless N assignments have not +! been made, in which case (IPERM(I),I) will be zero +! for N-NUMNZ entries. +! CV(I) is the most recent row extension at which column I +! was visited. +! ARP(I) is one less than the number of non-zeros in row I +! which have not been scanned when looking for a cheap +! assignment. +! OUT(I) is one less than the number of non-zeros in row I +! which have not been scanned during one pass through +! the main loop. + +! Initialization of arrays. + DO I = 1, N + ARP(I) = LENR(I) - 1 + CV(I) = 0 + IPERM(I) = 0 + END DO + NUMNZ = 0 + +! Main loop. +! Each pass round this loop either results in a new +! assignment or gives a row with no assignment. + DO 100 JORD = 1, N + J = JORD + PR(J) = -1 + DO 70 K = 1, JORD +! Look for a cheap assignment. + IN1 = ARP(J) + IF (IN1<0) GOTO 30 + IN2 = IP(J) + LENR(J) - 1 + IN1 = IN2 - IN1 + DO II = IN1, IN2 + I = ICN(II) + IF (IPERM(I)==0) GOTO 80 + END DO +! No cheap assignment in row. + ARP(J) = -1 +! Begin looking for assignment chain starting with row J. +30 OUT(J) = LENR(J) - 1 +! Inner loop. Extends chain by one or backtracks. + DO KK = 1, JORD + IN1 = OUT(J) + IF (IN1<0) GOTO 50 + IN2 = IP(J) + LENR(J) - 1 + IN1 = IN2 - IN1 +! Forward scan. + DO 40 II = IN1, IN2 + I = ICN(II) + IF (CV(I)==JORD) GOTO 40 +! Column I has not yet been accessed during this pass. + J1 = J + J = IPERM(I) + CV(I) = JORD + PR(J) = J1 + OUT(J1) = IN2 - II - 1 + GOTO 70 +40 END DO + +! Backtracking step. +50 J = PR(J) + IF (J==-1) GOTO 100 + END DO + +70 END DO + +! New assignment is made. +80 IPERM(I) = J + ARP(J) = IN2 - II - 1 + NUMNZ = NUMNZ + 1 + DO K = 1, JORD + J = PR(J) + IF (J==-1) GOTO 100 + II = IP(J) + LENR(J) - OUT(J) - 2 + I = ICN(II) + IPERM(I) = J + END DO + +100 END DO + +! If matrix is structurally singular, we now complete the +! permutation IPERM. + IF (NUMNZ==N) RETURN + ARP(1:N) = 0 + K = 0 + DO 130 I = 1, N + IF (IPERM(I)/=0) GOTO 120 + K = K + 1 + OUT(K) = I + GOTO 130 +120 J = IPERM(I) + ARP(J) = I +130 END DO + K = 0 + DO 140 I = 1, N + IF (ARP(I)/=0) GOTO 140 + K = K + 1 + IOUTK = OUT(K) + IPERM(IOUTK) = I +140 END DO + RETURN + + END SUBROUTINE MC21B +!_______________________________________________________________________ + + SUBROUTINE MC22AD(N,ICN,A,NZ,LENROW,IP,IQ,IW,IW1) +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: N, NZ +! .. +! .. Array Arguments .. + REAL (WP) :: A(NZ) + INTEGER :: ICN(NZ), IP(N), IQ(N), IW(N,2), IW1(NZ), LENROW(N) +! .. +! .. Local Scalars .. + REAL (WP) :: AVAL + INTEGER :: I, ICHAIN, IOLD, IPOS, J, J2, JJ, JNUM, JVAL, LENGTH, & + NEWPOS +! .. +! .. Intrinsic Functions .. + INTRINSIC IABS +! .. +! .. FIRST EXECUTABLE STATEMENT MA22AD +! .. + IF (NZ<=0) GOTO 90 + IF (N<=0) GOTO 90 +! Set start of row I in IW(I,1) and LENROW(I) in IW(I,2) + IW(1,1) = 1 + IW(1,2) = LENROW(1) + DO I = 2, N + IW(I,1) = IW(I-1,1) + LENROW(I-1) + IW(I,2) = LENROW(I) + END DO +! Permute LENROW according to IP. Set off-sets for new +! position of row IOLD in IW(IOLD,1) and put old row +! indices in IW1 in positions corresponding to the new +! position of this row in A/ICN. + JJ = 1 + DO 30 I = 1, N + IOLD = IP(I) + IOLD = IABS(IOLD) + LENGTH = IW(IOLD,2) + LENROW(I) = LENGTH + IF (LENGTH==0) GOTO 30 + IW(IOLD,1) = IW(IOLD,1) - JJ + J2 = JJ + LENGTH - 1 + DO 20 J = JJ, J2 +20 IW1(J) = IOLD + JJ = J2 + 1 +30 END DO +! Set inverse permutation to IQ in IW(:,2). + DO I = 1, N + IOLD = IQ(I) + IOLD = IABS(IOLD) + IW(IOLD,2) = I + END DO +! Permute A and ICN in place, changing to new column numbers. +! Main loop. +! Each pass through this loop places a closed chain of +! column indices in their new (and final) positions. +! This is recorded by setting the iw1 entry to zero so +! that any which are subsequently encountered during +! this major scan can be bypassed. + DO 80 I = 1, NZ + IOLD = IW1(I) + IF (IOLD==0) GOTO 80 + IPOS = I + JVAL = ICN(I) +! If row IOLD is in same positions after permutation, +! GOTO 150. + IF (IW(IOLD,1)==0) GOTO 70 + AVAL = A(I) +! Chain loop. +! Each pass through this loop places one(permuted) column +! index in its final position, viz. IPOS. + DO ICHAIN = 1, NZ +! NEWPOS is the original position in A/ICN of the +! element to be placed in position IPOS. It is also +! the position of the next element in the chain. + NEWPOS = IPOS + IW(IOLD,1) +! Is chain complete? + IF (NEWPOS==I) GOTO 60 + A(IPOS) = A(NEWPOS) + JNUM = ICN(NEWPOS) + ICN(IPOS) = IW(JNUM,2) + IPOS = NEWPOS + IOLD = IW1(IPOS) + IW1(IPOS) = 0 +! End of chain loop. + END DO +60 A(IPOS) = AVAL +70 ICN(IPOS) = IW(JVAL,2) +! END OF MAIN LOOP +80 END DO +90 RETURN + + END SUBROUTINE MC22AD +!_______________________________________________________________________ + + SUBROUTINE MC23AD(N,ICN,A,LICN,LENR,IDISP,IP,IQ,LENOFF,IW,IW1) +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: LICN, N +! .. +! .. Array Arguments .. + REAL (WP) :: A(LICN) + INTEGER :: ICN(LICN), IDISP(2), IP(N), IQ(N), IW(N,5), IW1(N,2), & + LENOFF(N), LENR(N) +! .. +! .. Local Scalars .. + INTEGER :: I, I1, I2, IBEG, IBLOCK, IEND, II, ILEND, INEW, IOLD, & + IROWB, IROWE, J, JJ, JNEW, JNPOS, JOLD, K, LENI, NZ +! .. +! .. Intrinsic Functions .. + INTRINSIC MAX, MIN +! .. +! .. FIRST EXECUTABLE STATEMENT MA23AD +! .. +! Input ... N,ICN ... A,ICN,LENR ... +! Set up pointers IW(:,1) to the beginning of the rows +! and set LENOFF equal to LENR. + IW1(1,1) = 1 + LENOFF(1) = LENR(1) + IF (N==1) GOTO 20 + DO I = 2, N + LENOFF(I) = LENR(I) + IW1(I,1) = IW1(I-1,1) + LENR(I-1) + END DO +! IDISP(1) points to the first position in A/ICN after +! the off-diagonal blocks and untreated rows. +20 IDISP(1) = IW1(N,1) + LENR(N) + +! Find row permutation ip to make diagonal zero-free. + CALL MC21A(N,ICN,LICN,IW1(1,1),LENR,IP,NUMNZ,IW(1,1)) + +! Possible error return for structurally singular matrices. + IF (NUMNZ/=N .AND. ABORT) GOTO 170 + +! IW1(:,2) and LENR are permutations of IW1(:,1) and +! LENR/LENOFF suitable for entry to MC13D since matrix +! with these row pointer and length arrays has maximum +! number of non-zeros on the diagonal. + DO II = 1, N + I = IP(II) + IW1(II,2) = IW1(I,1) + LENR(II) = LENOFF(I) + END DO + +! Find symmetric permutation IQ to block lower triangular form. + CALL MC13D(N,ICN,LICN,IW1(1,2),LENR,IQ,IW(1,4),NUM,IW) + + IF (NUM/=1) GOTO 60 + +! Action taken if matrix is irreducible: The +! whole matrix is just moved to the end of the storage. + DO I = 1, N + LENR(I) = LENOFF(I) + IP(I) = I + IQ(I) = I + END DO + LENOFF(1) = -1 +! IDISP(1) is the first position after the last element in +! the off-diagonal blocks and untreated rows. + NZ = IDISP(1) - 1 + IDISP(1) = 1 +! IDISP(2) Is the position in A/ICN of the first element +! in the diagonal blocks. + IDISP(2) = LICN - NZ + 1 + LARGE = N + IF (NZ==LICN) GOTO 200 + DO K = 1, NZ + J = NZ - K + 1 + JJ = LICN - K + 1 + A(JJ) = A(J) + ICN(JJ) = ICN(J) + END DO + GOTO 200 + +! Data structure reordered. + +! Form composite row permutation, IP(I) = IP(IQ(I)). +60 DO II = 1, N + I = IQ(II) + IW(II,1) = IP(I) + END DO + DO I = 1, N + IP(I) = IW(I,1) + END DO + +! Run through blocks in reverse order separating diagonal +! blocks which are moved to the end of the storage. Elements +! in off-diagonal blocks are left in place unless a compress +! is necessary. + +! IBEG indicates the lowest value of J for which ICN(J) has +! been set to zero when element in position J was moved to +! the diagonal block part of storage. + IBEG = LICN + 1 +! IEND is the position of the first element of those treated +! rows which are in diagonal blocks. + IEND = LICN + 1 +! LARGE is the dimension of the largest block encountered +! so far. + LARGE = 0 + +! NUM is the number of diagonal blocks. + DO K = 1, NUM + IBLOCK = NUM - K + 1 +! I1 is first row (in permuted form) of block IBLOCK. +! I2 is last row (in permuted form) of block IBLOCK. + I1 = IW(IBLOCK,4) + I2 = N + IF (K/=1) I2 = IW(IBLOCK+1,4) - 1 + LARGE = MAX(LARGE,I2-I1+1) +! Go through the rows of block IBLOCK in the reverse order. + DO II = I1, I2 + INEW = I2 - II + I1 +! We now deal with row inew in permuted form (row IOLD +! in original matrix). + IOLD = IP(INEW) +! If there is space to move up diagonal block portion +! of row GOTO 110. + IF (IEND-IDISP(1)>=LENOFF(IOLD)) GOTO 110 + +! In-line compress. +! Moves separated off-diagonal elements and untreated +! rows to front of storage. + JNPOS = IBEG + ILEND = IDISP(1) - 1 + IF (ILENDLICN) GOTO 200 + JNPOS = IBEG + ILEND = IDISP(1) - 1 + DO 160 J = IBEG, ILEND + IF (ICN(J)==0) GOTO 160 + ICN(JNPOS) = ICN(J) + A(JNPOS) = A(J) + JNPOS = JNPOS + 1 +160 END DO +! IDISP(1) is first position after last element of +! off-diagonal blocks. + IDISP(1) = JNPOS + GOTO 200 + +! Error return +170 IF (LP/=0) WRITE (LP,90000) NUMNZ +90000 FORMAT (' Matrix is structurally singular, rank = ',I6) + IDISP(1) = -1 + GOTO 190 +180 IF (LP/=0) WRITE (LP,90001) N +90001 FORMAT (' LICN is not big enough; increase by ',I6) + IDISP(1) = -2 +190 IF (LP/=0) WRITE (LP,90002) +90002 FORMAT (' + Error return from MC23AD') +200 RETURN + + END SUBROUTINE MC23AD +!_______________________________________________________________________ + + SUBROUTINE MC24AD(N,ICN,A,LICN,LENR,LENRL,W) +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: LICN, N +! .. +! .. Array Arguments .. + REAL (WP) :: A(LICN), W(N) + INTEGER :: ICN(LICN), LENR(N), LENRL(N) +! .. +! .. Local Scalars .. + REAL (WP) :: AMAXL, AMAXU, WROWL + INTEGER :: I, J, J0, J1, J2, JJ +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, MAX +! .. +! .. FIRST EXECUTABLE STATEMENT MA24AD +! .. + AMAXL = ZERO + W(1:N) = ZERO + J0 = 1 + DO 60 I = 1, N + IF (LENR(I)==0) GOTO 60 + J2 = J0 + LENR(I) - 1 + IF (LENRL(I)==0) GOTO 30 +! Calculation of 1-norm of L. + J1 = J0 + LENRL(I) - 1 + WROWL = ZERO + DO 20 JJ = J0, J1 +20 WROWL = WROWL + ABS(A(JJ)) +! AMAXL is the maximum norm of columns of L so far found. + AMAXL = MAX(AMAXL,WROWL) + J0 = J1 + 1 +! Calculation of norms of columns of U(MAX-NORMS). +30 J0 = J0 + 1 + IF (J0>J2) GOTO 50 + DO 40 JJ = J0, J2 + J = ICN(JJ) +40 W(J) = MAX(ABS(A(JJ)),W(J)) +50 J0 = J2 + 1 +60 END DO +! AMAXU is set to maximum max-norm of columns of U. + AMAXU = ZERO + DO I = 1, N + AMAXU = MAX(AMAXU,W(I)) + END DO +! GROFAC is MAX U max-norm times MAX L 1-norm. + W(1) = AMAXL*AMAXU + RETURN + + END SUBROUTINE MC24AD +!_______________________________________________________________________ + + SUBROUTINE MC19AD(N,NA,A,IRN,ICN,R,C,W) +! .. +! MC19A was altered to use same precision for R, C, +! and W as is used for other variables in program. +! .. +! REAL A(NA),R(N),C(N),W(N,5) +! R(I) is used to return log(scaling factor for row I). +! C(J) is used to return log(scaling factor for col J). +! W(I,1), W(I,2) hold row,col non-zero counts. +! W(J,3) holds - COL J LOG during execution. +! W(J,4) holds 2-iteration change in W(J,3). +! W(I,5) is used to save average element log for row I. +! INTEGER*2 IRN(NA), ICN(NA) +! IRN(K) gives row number of element in A(K). +! ICN(K) gives col number of element in A(K). +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER :: N, NA +! .. +! .. Array Arguments .. + REAL (WP) :: A(NA), C(N), R(N), W(N,5) + INTEGER :: ICN(NA), IRN(NA) +! .. +! .. Local Scalars .. + REAL (WP) :: E, E1, EM, Q, Q1, QM, S, S1, SM, SMIN, U, V + INTEGER :: I, I1, I2, ITER, J, K, MAXIT +! .. +! .. Intrinsic Functions .. + INTRINSIC ABS, LOG, REAL +! .. +! .. Data Statements .. +! MAXIT is the maximal permitted number of iterations. +! SMIN is used in a convergence test on (residual norm)**2. + DATA MAXIT/100/, SMIN/0.1/ +! .. +! .. FIRST EXECUTABLE STATEMENT MA19AD +! .. +! Check scalar data. + IFAIL = 1 + IF (N<1) GOTO 210 + IFAIL = 2 +! IF (N > 32767)GOTO 230 + IFAIL = 2 + IFAIL = 0 + +! Initialise for accumulation of sums and products. + C(1:N) = ZERO + R(1:N) = ZERO + W(1:N,1:4) = ZERO + IF (NA<=0) GOTO 220 + DO 40 K = 1, NA + U = ABS(A(K)) +! IF (U == ZERO) GOTO 30 + IF (ABS(U-ZERO)<=ZERO) GOTO 40 + U = LOG(U) + I1 = IRN(K) + I2 = ICN(K) + IF (I1>=1 .AND. I1<=N .AND. I2>=1 .AND. I2<=N) GOTO 30 + IF (LP>0) WRITE (LP,90000) K, I1, I2 +90000 FORMAT (' MC19 error. Element ',I5,' is in row ',I5,' and column ', & + I5) + IFAIL = 3 + GOTO 40 +! Count row/col non-zeros and compute rhs vectors. +30 W(I1,1) = W(I1,1) + 1. + W(I2,2) = W(I2,2) + 1. + R(I1) = R(I1) + U + W(I2,3) = W(I2,3) + U +40 END DO + IF (IFAIL==3) GOTO 210 + +! Divide rhs by diagonal matrices. + DO I = 1, N +! IF (W(I,1) == ZERO) W(I,1) = 1. + IF (ABS(W(I,1))<=ZERO) W(I,1) = ONE + R(I) = R(I)/W(I,1) +! SAVE R(I) FOR USE AT END. + W(I,5) = R(I) +! IF (W(I,2) == ZERO) W(I,2) = 1. + IF (ABS(W(I,2))<=ZERO) W(I,2) = ONE + W(I,3) = W(I,3)/W(I,2) + END DO +! SM = SMIN*FLOAT(NA) + SM = SMIN*REAL(NA) +! Sweep to compute initial residual vector. + DO 60 K = 1, NA +! IF (A(K) == ZERO) GOTO 80 + IF (ABS(A(K))<=ZERO) GOTO 60 + I = IRN(K) + J = ICN(K) + R(I) = R(I) - W(J,3)/W(I,1) +60 END DO + +! Initialise iteration. + E = ZERO + Q = 1. + S = ZERO + DO I = 1, N + S = S + W(I,1)*R(I)**2 + END DO + IF (S<=SM) GOTO 160 + +! Iteration loop. + DO ITER = 1, MAXIT +! Sweep through matrix to update residual vector. + DO 80 K = 1, NA +! IF (A(K) == ZERO) GOTO 130 + IF (ABS(A(K))<=ZERO) GOTO 80 + I = ICN(K) + J = IRN(K) + C(I) = C(I) + R(J) +80 END DO + S1 = S + S = ZERO + DO I = 1, N + V = -C(I)/Q + C(I) = V/W(I,2) + S = S + V*C(I) + END DO + E1 = E + E = Q*S/S1 + Q = 1. - E + IF (S<=SM) E = ZERO +! Update residual. + DO I = 1, N + R(I) = R(I)*E*W(I,1) + END DO + IF (S<=SM) GOTO 180 + EM = E*E1 +! Sweep through matrix to update residual vector. + DO 110 K = 1, NA +! IF (A(K) == ZERO) GOTO 152 + IF (ABS(A(K))<=ZERO) GOTO 110 + I = IRN(K) + J = ICN(K) + R(I) = R(I) + C(J) +110 END DO + S1 = S + S = ZERO + DO I = 1, N + V = -R(I)/Q + R(I) = V/W(I,1) + S = S + V*R(I) + END DO + E1 = E + E = Q*S/S1 + Q1 = Q + Q = 1. - E +! Special fixup for last iteration. + IF (S<=SM) Q = 1. +! Update colulm scaling powers. + QM = Q*Q1 + DO I = 1, N + W(I,4) = (EM*W(I,4)+C(I))/QM + W(I,3) = W(I,3) + W(I,4) + END DO + IF (S<=SM) GOTO 160 +! Update residual. + DO I = 1, N + C(I) = C(I)*E*W(I,2) + END DO + END DO +160 DO I = 1, N + R(I) = R(I)*W(I,1) + END DO + +! Sweep through matrix to prepare to get row scaling powers. +180 DO 190 K = 1, NA +! IF (A(K) == ZERO) GOTO 200 + IF (ABS(A(K))<=ZERO) GOTO 190 + I = IRN(K) + J = ICN(K) + R(I) = R(I) + W(J,3) +190 END DO + +! Final conversion to output values. + DO I = 1, N + R(I) = R(I)/W(I,1) - W(I,5) + C(I) = -W(I,3) + END DO + GOTO 220 +210 IF (LP>0) WRITE (LP,90001) IFAIL +90001 FORMAT (' Error return ',I2,' from MC19') +220 RETURN + END SUBROUTINE MC19AD +! End of MA28 subroutines. +!_______________________________________________________________________ +! Beginning of JACSP routines. +! Change Record: +! ST 09-01-05 +! Convert to F90 using Metcalf converter +! Trim trailing blanks using trimem.pl +! Convert arithmetic operators to F90 +! Replace R1MACH by EPSILON +! Run through nag tools suite +! ST 09-17-05 +! Delete RDUM and IDUM +! Change FCN argument list +! ST 09-18-05 +! Add subroutine DGROUPDS to define the IGP and JGP arrays for JACSP +! ST 09-20-05 +! Modify JACSP to produce JACSPDB for sparse, dense, and +! banded Jacobians +!_______________________________________________________________________ + + SUBROUTINE JACSP(FCN,N,T,Y,F,FJAC,NRFJAC,YSCALE,FAC,IOPT,WK, & + LWK,IWK,LIWK,MAXGRP,NGRP,JPNTR,INDROW) + +! BEGIN PROLOGUE JACSP +! DATE WRITTEN 850415 +! CATEGORY NO. D4 +! KEYWORDS NUMERICAL DIFFERENCING, SPARSE JACOBIANS +! AUTHOR SALANE, DOUGLAS E., SANDIA NATIONAL LABORATORIES +! NUMERICAL MATHEMATICS DIVISION,1642 +! ALBUQUERQUE, NM 87185 + +! SUBROUTINE JACSP USES FINITE DIFFERENCES TO COMPUTE THE JACOBIAN OF +! A SPARSE SYSTEM OF N EQUATIONS AND N UNKNOWNS. JACSP IS DESIGNED FOR +! USE IN NUMERICAL METHODS FOR SOLVING NONLINEAR PROBLEMS WHERE THE +! JACOBIAN IS EVALUATED REPEATEDLY AND OFTEN AT NEIGHBORING ARGUMENTS +! (E.G., NEWTON'S METHOD OR A BDF METHOD FOR SOVING STIFF ORDINARY +! DIFFERENTIAL EQUATIONS). JACSP IS INTENDED FOR APPLICATIONS IN WHICH +! THE REQUIRED JACOBIANS ARE LARGE AND SPARSE. + +! TAKING ADVANTAGE OF SPARSITY. + +! SUBROUTINE JACSP TAKES ADVANTAGE OF THE SPARSITY OF A MATRIX TO REDUCE +! THE NUMBER OF FUNCTION EVALUATIONS REQUIRED TO COMPUTE THE JACOBIAN. +! TO REALIZE THIS ADVANTAGE, THE USER MUST PROVIDE JACSP WITH A COLUMN +! GROUPING. THIS MEANS THE USER MUST ASSIGN COLUMNS OF THE JACOBIAN TO +! MUTUALLY EXCLUSIVE GROUPS BASED ON THE SPARSITY OF THE COLUMNS.THE +! DIFFERENCES REQUIRED TO COMPUTE THE NONZERO ELEMENTS IN ALL COLUMNS IN +! A GROUP ARE FORMED USING ONLY ONE ADDITIONAL FUNCTION EVALUATION. FOR +! MORE DETAILS, THE USER IS REFERRED TO THE REPORT BY D.E. SALANE AND +! L.F. SHAMPINE (REF. 1). + +! THE SUBROUTINE DVDSM (REF 2.) IS THE WAY MOST USERS WILL DETERMINE A +! COLUMN GROUPING FOR JACSP. THE USE OF DSM AND JACSP IS DESCRIBED +! LATER IN THE PROLOGUE. + +! STORAGE. + +! JACSP REQUIRES THE USER TO PROVIDE A SPARSE DATA STRUCTURE FOR THE +! JACOBIAN TO BE COMPUTED. JACSP REQUIRES THE USER TO SPECIFY THE +! INDICES OF THE NONZERO ELEMENTS IN A COLUMN PACKED SPARSE DATA +! STRUCTURE (SEE REF.1). THE SUBROUTINE DVDSM( REF 2.) CAN BE USED TO +! ESTABLISH THE SPARSE DATA STRUCTURE REQUIRED BY JACSP. THE USE OF +! JACSP AND DSM IS DESCRIBED LATER IN THE PROLOGUE. + +! ON OUTPUT, SUBROUTINE JACSP WILL RETURN THE JACOBIAN IN ANY ONE OF +! THE FOLLOWING THREE FORMATS SPECIFIED BY THE USER. + +! (1) FULL STORAGE FORMAT.......THE COMPUTED JACOBIAN IS STORED IN +! AN ARRAY WHOSE ROW AND COLUMN DIMENSIONS ARE THE NUMBER OF +! EQUATIONS. + +! (2) BANDED STORAGE FORMAT.....THE COMPUTED MATRIX IS STORED IN A TWO +! DIMENSIONAL ARRAY WHOSE ROW DIMENSION IS +! 2*(NUMBER OF LOWER DIAGONALS) + (THE NUMBER OF UPPER DIAGONALS) +! + 1. THE COLUMN DIMENSION OF THIS ARRAY IS THE NUMBER OF +! EQUATIONS. THIS STORAGE FORMAT IS COMPATIBLE WITH THE LINPACK +! GENERAL BAND MATRIX EQUATION SOLVER. + +! (3) SPARSE STORAGE FORMAT.....THE COMPUTED JACOBIAN IS STORED IN A +! ONE DIMENSIONAL ARRAY WHOSE LENGTH IS THE NUMBER OF NONZERO +! ELEMENTS IN THE JACOBIAN. + +! DESCRIPTION + +! SUBROUTINE PARAMETERS + +! FCN()......A USER-PROVIDED FUNCTION (SEE SUBROUTINE DESCRIPTION). +! N..........THE NUMBER OF EQUATIONS. +! T..........A SCALAR VARIABLE. T IS PROVIDED SO USERS CAN PASS THE +! VALUE OF AN INDEPENDENT VARIALBE TO THE FUNCTION +! EVALUATION ROUTINE. +! Y(*).......AN ARRAY OF DIMENSION N. THE POINT AT WHICH THE +! JACOBIAN IS TO BE EVALUATED. +! F(*).......AN ARRAY OF DIMENSION N. THE EQUATIONS EVALUATED AT +! THE POINT Y. +! FJAC(*,*)..AN ARRAY OF WHOSE DIMENSIONS DEPEND ON THE STORAGE +! FORMAT SELECTED BY THE USER. THE ROW AND COLUMN DIMENSIONS ARE +! SET BY THE USER. IF THE SPARSE MATRIX FORMAT IS SELECTED, FJAC +! SHOULD BE TREATED AS A ONE DIMENSIONAL ARRAY BY THE CALLING +! PROGRAM. FOR FURTHER DETAILS, SEE THE DESCRIPTION OF THE +! PARAMETER IOPT(1) WHICH CONTROLS THE STORAGE FORMAT. + +! NOTE THAT IF THE BANDED OR FULL OPTION IS USED,THE USER SHOULD +! ZERO OUT THOSE POSITIONS OF FJAC THAT WILL NOT BE ACCESSED BY +! JACSP. JACSP DOES NOT ZERO OUT THE MATRIX FJAC BEFORE +! COMPUTING THE JACOBIAN. POSITIONS OF FJAC THAT ARE NOT ASSIGNED +! VALUES BY JACSP WILL BE THE SAME ON EXIT AS ON ENTRY TO JACSP. + +! NRFJAC.....THE NUMBER OF ROWS IN FJAC AND THE LEADING DIMENSION +! OF FJAC (SEE IOPT(1)). +! NCFJAC.....THE NUMBER OF COLUMNS IN FJAC AND THE COLUMN DIMENSION +! OF FJAC (SEE IOPT(1)). +! YSCALE(*)..AN ARRAY OF DIMENSION N. YSCALE(I) CONTAINS A +! REPRESENTATIVE MAGNITUDE FOR Y(I). YSCALE(I) MUST BE POSITIVE. +! YSCALE IS AN OPTIONAL FEATURE OF THE ROUTINE. IF THE USER DOES +! NOT WISH TO PROVIDE YSCALE, IT CAN BE TREATED AS A DUMMY SCALAR +! VARIABLE (SEE IOPT(3)). + +! FAC(*).....AN ARRAY OF DIMENSION N. FAC CONTAINS A PERCENTAGE FOR +! USE IN COMPUTING THE INCREMENT. + +! THE NORMAL WAY TO USE THE CODE IS TO LET JACSP ADJUST FAC VALUES. +! IN THIS CASE FAC IS INITIALIZED BY JACSP. ALSO, THE USER MUST +! NOT ALTER FAC VALUES BETWEEN CALLS TO JACSP. + +! THE USER MAY PROVIDE FAC VALUES IF DESIRED (SEE IOPT(4) FOR +! DETAILS). IF THE USER PROVIDES FAC VALUES, FAC(I) SHOULD BE +! SHOULD BE SET TO A VALUE BETWEEN O AND 1. JACSP WILL NOT PERMIT +! FAC(I) TO BE SET TO A VALUE THAT RESULTS IN TOO SMALL AN +! INCREMENT. JACSP ENSURES THAT +! FACMIN <= FAC(I) <= FACMAX. +! FOR FURTHER DETAILS ON FACMIN AND FACMAX SEE +! THE REPORT(REF.3). + +! IOPT(*)....AN INTEGER ARRAY OF LENGTH 5 FOR USER SELECTED OPTIONS. + +! IOPT(1) CONTROLS THE STORAGE FORMAT. + +! IOPT(1) = 0 INDICATES FULL STORAGE FORMAT. SET BOTH +! NRFJAC = N AMD NCFJAC = N. + +! IOPT(1) = 1 INDICATES BANDED STORAGE FORMAT. SET +! NRFJAC = 2 * ML + MU + 1 WHERE +! ML = NUMBER OF SUB-DIAGONALS BELOW THE MAIN DIAGONAL AND +! MU = NUMBER OF SUPER-DIAGONALS ABOVE THE MAIN DIAGONAL. +! SET NCFJAC = N. + +! IOPT(1) = 2 INDICATES SPARSE STORAGE FORMAT. SET +! NRFJAC = TO THE NUMBER OF NONZEROS IN THE JACOBIAN AND +! SET NCFJAC = 1. + +! IOPT(2) MUST BE SET TO THE BANDWIDTH OF THE MATRIX. +! IOPT(2) NEED ONLY BE PROVIDED IF BANDED STORAGE +! FORMAT IS REQUESTED(I.E., IOPT(1) = 1). + +! IOPT(3) ALLOWS THE USER TO PROVIDE TYPICAL VALUES +! TO BE USED IN COMPUTING INCREMENTS FOR DIFFERENCING. + +! IOPT(3) = 0 INDICATES Y VALUES ARE USED IN COMPUTING +! INCREMENTS. IF IOPT(3) = 0, NO STORAGE IS REQUIRED FOR +! YSCALE AND IT MAY BE TREATED AS A DUMMY VARIABLE. + +! IOPT(3) = 1 INDICATES THAT YSCALE VALUES ARE TO BE USED. +! IF IOPT(3) = 1, THE USER MUST PROVIDE AN ARRAY OF NONZERO +! VALUES IN YSCALE. + +! IOPT(4) ALLOWS THE USER TO PROVIDE THE VALUES USED +! IN THE FAC ARRAY TO COMPUTE INCREMENTS FOR DIFFERENCING. + +! IF IOPT(4) = 0, EACH COMPONENT OF FAC IS +! SET TO THE SQUARE ROOT OF MACHINE UNIT ROUNDOFF ON THE +! FIRST CALL TO JACSP. IOPT(4) IS SET TO ONE ON RETURN. + +! IF IOPT(4) = 1, EACH COMPONENT OF FAC MUST BE SET +! BY THE CALLING ROUTINE. UNLESS THE USER WISHES TO +! INITIALIZE FAC, THE FAC ARRAY SHOULD NOT BE ALTERED +! BETWEEN SUBSEQUENT CALLS TO JACSP. ALSO, THE USER +! SHOULD NOT CHANGE THE VALUE OF IOPT(4) RETURNED BY +! JACSP. + +! IOPT(5) IS NOT USED IN JACSP. + +! WK(*)......A WORK ARRAY OF DIMENSION AT LEAST 3*N +! LWK........THE LENGTH OF THE WORK ARRAY. LWK IS AT LEAST 3*N. +! IWK(*)...AN INTEGER ARRAY OF LENGTH LIWK = 50 + N WHICH GIVES +! DIAGNOSTIC INFORMATION IN POSITIONS 1 THROUGH 50. POSITIONS 51 +! THROUGH 50 + N ARE USED AS INTEGER WORKSPACE. + +! IWK(1) GIVES THE NUMBER OF TIMES THE INCREMENT FOR DIFFERENCING +! (DEL) WAS COMPUTED AND HAD TO BE INCREASED BECAUSE (Y(JCOL)+DEL) +! -Y(JCOL)) WAS TOO SMALL RELATIVE TO Y(JCOL) OR YSCALE(JCOL). + +! IWK(2) GIVES THE NUMBER OF COLUMNS IN WHICH THREE ATTEMPTS WERE +! MADE TO INCREASE A PERCENTAGE FACTOR FOR DIFFERENCING (I.E., A +! COMPONENT IN THE FAC ARRAY) BUT THE COMPUTED DEL REMAINED +! UNACCEPTABLY SMALL RELATIVE TO Y(JCOL) OR YSCALE(JCOL). IN SUCH +! CASES THE PERCENTAGE FACTOR IS SET TO THE SQUARE ROOT OF THE UNIT +! ROUNDOFF OF THE MACHINE. THE FIRST 10 COLUMNS ARE GIVEN IN +! IWK(21),...,IWK(30). + +! IWK(3) GIVES THE NUMBER OF COLUMNS IN WHICH THE COMPUTED DEL WAS +! ZERO TO MACHINE PRECISION BECAUSE Y(JCOL) OR YSCALE(JCOL) WAS +! ZERO. IN SUCH CASES DEL IS SET TO THE SQUARE ROOT OF THE UNIT +! ROUNDOFF. + +! IWK(4) GIVES THE NUMBER OF COLUMNS WHICH HAD TO BE RECOMPUTED +! BECAUSE THE LARGEST DIFFERENCE FORMED IN THE COLUMN WAS VERY +! CLOSE TO ZERO RELATIVE TO SCALE WHERE + +! SCALE = MAX(F(Y),F(Y+DEL)) +! I I + +! AND I DENOTES THE ROW INDEX OF THE LARGEST DIFFERENCE IN THE +! COLUMN CURRENTLY BEING PROCESSED. IWK(31),...,IWK(40) GIVES THE +! FIRST 10 OF THESE COLUMNS. + +! IWK(5) GIVES THE NUMBER OF COLUMNS WHOSE LARGEST DIFFERENCE IS +! CLOSE TO ZERO RELATIVE TO SCALE AFTER THE COLUMN HAS BEEN +! RECOMPUTED. THE FIRST 10 OF THESE COLUMNS ARE GIVEN IN POSITIONS +! IWK(41)...IWK(50). +! IWK(6) GIVES THE NUMBER OF TIMES SCALE INFORMATION WAS NOT +! AVAILABLE FOR USE IN THE ROUNDOFF AND TRUNCATION ERROR TESTS. +! THIS OCCURS WHEN +! MIN(F(Y),F(Y+DEL)) = 0. +! I I +! WHERE I IS THE INDEX OF THE LARGEST DIFFERENCE FOR THE COLUMN +! CURRENTLY BEING PROCESSED. +! IWK(7) GIVES THE NUMBER OF TIMES THE FUNCTION EVALUATION ROUTINE +! WAS CALLED. +! IWK(8) GIVES THE NUMBER OF TIMES A COMPONENT OF THE FAC ARRAY WAS +! REDUCED BECAUSE CHANGES IN FUNCTION VALUES WERE LARGE AND EXCESS +! TRUNCATION ERROR WAS SUSPECTED. IWK(11),...,IWK(20) GIVES THE FIRST +! 10 OF THESE COLUMNS. +! IWK(9) AND IWK(10) ARE NOT USED IN JACSP. +! LIWK.....THE LENGTH OF THE ARRAY IWK. LIWK = 50 + N. +! THE FOLLOWING PARAMETERS MAY BE PROVIDED BY THE USER OR +! INITIALIZED BY THE SUBROUTINE DVDSM (SEE LONG DESCRIPTION +! SECTION OF THE PROLOGUE). +! PARAMETERS CONTAINING COLUMN GROUPING: +! MAXGRP.....THE NUMBER OF DIFFERENT GROUPS TO WHICH THE COLUMNS +! HAVE BEEN ASSIGNED. +! NGRP(*)....AN ARRAY OF LENGTH N THAT CONTAINS THE COLUMN GROUPING. +! NGRP(I) IS THE GROUP TO WHICH THE I-TH COLUMN HAS BEEN +! ASSIGNED. +! (SEE USE OF DSM AND JACSP FOR DETERMINING MAXGRP AND NGRP) +! PARAMETERS CONTAINING THE SPARSE DATA STRUCTURE: +! JPNTR(*)...AN ARRAY OF LENGTH N+1 THAT CONTAINS POINTERS TO +! THE ROW INDICES IN INDROW (SEE INDROW). +! INDROW(*)..AN ARRAY WHOSE LENGTH IS THE NUMBER OF NONZERO ELEMENTS +! IN THE JACOBIAN. INDROW CONTAINS THE ROW INDICES +! STORED IN COLUMN PACKED FORMAT. IN OTHER WORDS, THE +! ROW INDICES OF THE NONZERO ELEMENTS OF A GIVEN +! COLUMN OF THE JACOBIAN ARE STORED CONTIGUOUSLY IN +! INDROW. JPNTR(JCOL) POINTS TO THE ROW INDEX OF THE FIRST +! NONZERO ELEMENT IN THE COLUMN JCOL. JPNTR(JCOL+1) - 1 +! POINTS TO THE ROW INDEX OF THE LAST NONZERO ELEMENT IN +! THE COLUMN JCOL. +! (SEE USE OF DSM AND JACSP FOR DETERMINING INDROW AND JPNTR) +! REQUIRED SUBROUTINES: SUBROUTINE FCN(N,T,Y,F) +! T......AN INDEPENDENT SCALAR VARIABLE WHICH MAY BE USED +! IN EVALUATING F(E.G., F(Y(T),T)). +! Y(*)...AN ARRAY OF DIMENSION N WHICH CONTAINS THE POINT AT +! WHICH THE EQUATIONS ARE TO BE EVALUATED. +! F(*)...AN ARRAY OF DIMENSION N WHICH ON RETURN FROM FCN +! CONTAINS THE EQUATIONS EVALUATED AT Y. + +! LONG DESCRIPTION + +! ROUNDOFF AND TRUNCATION ERRORS. +! SUBROUTINE JACSP TAKES ADVANTAGE OF THE WAY IN WHICH THE JACOBIAN +! IS EVALUATED TO ADJUST INCREMENTS FOR DIFFERENCING TO CONTROL +! ROUNDOFF AND TRUNCATION ERRORS. THE ROUTINE SELDOM REQUIRES MORE +! THAN ONE ADDITIONAL FUNCTION EVALUATION TO COMPUTE A COLUMN OF THE +! JACOBIAN. ALSO, THE ROUTINE RETURNS A VARIETY OF ERROR DIAGNOSTICS +! TO WARN USERS WHEN COMPUTED DERIVATIVES MAY NOT BE ACCURATE. +! WARNING: JACSP CAN NOT GUARANTEE THE ACCURACY OF THE COMPUTED +! DERIVATIVES. IN ORDER O SAVE ON FUNCTION EVALUATIONS, HEURISTIC +! TECNIQUES FOR INCREMENT ADJUSTMENT AND SAFEGUARDING INCREMENTS ARE +! USED. THESE USUALLY WORK WELL. +! WARNING: SOME OF THE DIAGNOSTICS RETURNED CAN ONLY BE INTERPRETED +! WITH A DETAILED KNOWLEDGE OF THE ROUTINE. NEVERTHELESS, THEY ARE +! PROVIDED TO GIVE USERS FULL ACCESS TO THE INFORMATION PRODUCED BY +! THE SUBROUTINE. +! USE OF DSM AND JACSP. +! SUBROUTINE DVDSM CAN BE USED TO DETERMINE THE COLUMN GROUPING (MAXGRP +! AND NGRP(*)) AND THE SPARSE DATA STRUCTURE VARIABLES (JPNTR(*) AND +! INDROW(*)). THE USER CAN CALL DVDSM ONCE TO INITIALIZE +! MAXGRP,NGRP,JPNTR AND INDROW. JACSP MAY THEN BE CALLED REPEATEDLY TO +! EVALUATE THE JACOBIAN. THE FOLLOWING ARE THE IMPORTANT VARIABLES IN +! THE DSM CALLING SEQUENCE. +! SUBROUTINE DVDSM(...,INDROW,INDCOL,NGRP,MAXGRP,..,JPNTR,...) +! ON INPUT, THE USER MUST PROVIDE DSM WITH THE INTEGER ARRAYS INDROW AND +! INDCOL. THE PAIR +! (INDROW(I),INDCOL(I)) +! PROVIDES THE INDEX OF A NONZERO ELEMENT OF THE JACOBIAN. THE LENGTH OF +! INDROW AND INDCOL IS THE NUMBER OF NONZERO ELEMENTS IN THE JACOBIAN. +! NO ORDERING OF THE INDICES FOR NONZERO ELEMENTS IS REQUIRED IN THE +! ARRAYS INDROW AND INDCOL. +! ON RETURN FROM DSM, MAXGRP,NGRP,INDROW,JPNTR ARE INITIALIZED +! FOR INPUT TO JACSP. THE USER MUST NOT CHANGE MAXGRP,NGRP,JPNTR OR +! INDROW AFTER CALLING DSM. +! WE REFER THE READER TO THE IN CODE DOCUMENTATION FOR DSM OR THE REPORT +! BY MORE AND COLEMAN (REF 2.) FOR FURTHER DETAILS. +! REFERENCES +! (1) D.E. SALANE AND L. F. SHAMPINE +! "AN ECONOMICAL AND EFFICIENT ROUTINE FOR COMPUTING +! SPARSE JACOBIANS", REPORT NO. SAND85-0977, SANDIA NATIONAL +! LABORATORIES, ALBUQUERQUE,NM,87185. +! (2) T.F. COLEMAN AND J.J. MORE +! "SOFTWARE FOR ESTIMATING SPARSE JACOBIAN MATRICES" ACM TOMS, +! V10.,N0.3,SEPT. 1984. +! (3) D.E. SALANE AND L. F. SHAMPINE +! "THREE ADAPTIVE ROUTINES FOR FORMING JACOBIANS NUMERICALLY," +! REPORT NO. SAND86- ****, SANDIA NATIONAL LABORATORIES, +! ALBUQUERQUE, NM, 87185. +! REQUIRED FORTRAN INTRINSIC FUNCTIONS: MAX,MIN,ABS,SIGN +! MACHINE DEPENDENT CONSTANT: U (MACHINE UNIT ROUNDOFF) +! JACSP SETS THE REQUIRED MACHINE CONSTANT USING THE F90 +! INTRINSIC EPSILON. +! END PROLOGUE JACSP + + IMPLICIT NONE + +! .. Parameters .. + INTEGER, PARAMETER :: WP = KIND(0.0D0) +! .. +! .. Scalar Arguments .. + REAL (WP) :: T + INTEGER :: LIWK, LWK, MAXGRP, N, NRFJAC +! .. +! .. Array Arguments .. + REAL (WP) :: F(N), FAC(N), FJAC(NRFJAC,*), WK(LWK), Y(N), YSCALE(*) + INTEGER :: INDROW(*), IOPT(5), IWK(LIWK), JPNTR(*), NGRP(N) +! .. +! .. Subroutine Arguments .. + EXTERNAL FCN +! .. +! .. Local Scalars .. + REAL (WP) :: ADIFF, AY, DEL, DELM, DFMJ, DIFF, DMAX, EXPFMN, FACMAX, & + FACMIN, FJACL, FMJ, ONE, P125, P25, P75, P875, PERT, RDEL, RMNFDF, & + RMXFDF, SDF, SF, SGN, T1, T2, U, U3QRT, U7EGT, UEGT, UMEGT, UQRT, & + USQT, ZERO + INTEGER :: IDXL, IDXU, IFLAG1, IFLAG2, IRCMP, IRDEL, IROW, IROWB, & + IROWMX, ITRY, J, JCOL, KT1, KT2, KT3, KT4, KT5, L, NID1, NID2, NID3, & + NID4, NID5, NID6, NIFAC, NT2, NUMGRP +! .. +! .. Intrinsic Functions .. +! INTRINSIC ABS, EPSILON, KIND, MAX, MIN, SIGN, SQRT + INTRINSIC ABS, EPSILON, MAX, MIN, SIGN, SQRT +! .. +! .. Data Statements .. + DATA PERT/2.0E+0_WP/, FACMAX/1.E-1_WP/, EXPFMN/.75E+0_WP/ + DATA ONE/1.0E0_WP/, ZERO/0.0E0_WP/ + DATA P125/.125E+0_WP/, P25/.25E+0_WP/, P75/.75E+0_WP/, P875/.875E+0_WP/ + DATA NIFAC/3/, NID1/10/, NID2/20/, NID3/30/, NID4/40/, NID5/50/ +! .. +! COMPUTE ALGORITHM AND MACHINE CONSTANTS. +! .. +! .. FIRST EXECUTABLE STATEMENT JACSP +! .. + U = EPSILON(ONE) + USQT = SQRT(U) + UEGT = U**P125 + UMEGT = ONE/UEGT + UQRT = U**P25 + U3QRT = U**P75 + U7EGT = U**P875 + FACMIN = U**EXPFMN + + IF (IOPT(4) == 0) THEN + IOPT(4) = 1 + DO 10 J = 1, N + FAC(J) = USQT +10 CONTINUE + END IF + DO 20 J = 1, 50 + IWK(J) = 0 +20 CONTINUE + KT1 = NID1 + KT2 = NID2 + KT3 = NID3 + KT4 = NID4 + KT5 = NID5 + NID6 = LIWK + NT2 = 2*N + DO NUMGRP = 1, MAXGRP +! COMPUTE AND SAVE THE INCREMENTS FOR THE COLUMNS IN GROUP NUMGRP. + IRCMP = 0 + ITRY = 0 + DO 30 J = NID5 + 1, NID6 + IWK(J) = 0 +30 CONTINUE +40 CONTINUE + DO JCOL = 1, N + IF (NGRP(JCOL) == NUMGRP) THEN + WK(N+JCOL) = Y(JCOL) +! COMPUTE DEL. IF DEL IS TOO SMALL INCREASE FAC(JCOL) AND RECOMPUTE +! DEL. NIFAC ATTEMPTS ARE MADE TO INCREASE FAC(JCOL) AND FIND AN +! APPROPRIATE DEL. IF DEL CANT BE FOUND IN THIS MANNER, DEL IS COMPUTED +! WITH FAC(JCOL) SET TO THE SQUARE ROOT OF THE MACHINE PRECISION (USQT). +! IF DEL IS ZERO TO MACHINE PRECISION BECAUSE Y(JCOL) IS ZERO OR +! YSCALE(JCOL) IS ZERO, DEL IS SET TO USQT. + SGN = SIGN(ONE,F(JCOL)) + IRDEL = 0 + IF (IOPT(3) == 1) THEN + AY = ABS(YSCALE(JCOL)) + ELSE + AY = ABS(Y(JCOL)) + END IF + DELM = U7EGT*AY + + DO 50 J = 1, NIFAC + DEL = FAC(JCOL)*AY*SGN +! IF (DEL == ZERO) THEN + IF (ABS(DEL) <= ZERO) THEN + + DEL = USQT*SGN + IF (ITRY == 0) IWK(3) = IWK(3) + 1 + END IF + T1 = Y(JCOL) + DEL + DEL = T1 - Y(JCOL) + IF (ABS(DEL) < DELM) THEN + IF (J >= NIFAC) GOTO 50 + IF (IRDEL == 0) THEN + IRDEL = 1 + IWK(1) = IWK(1) + 1 + END IF + T1 = FAC(JCOL)*UMEGT + FAC(JCOL) = MIN(T1,FACMAX) + ELSE + GOTO 60 + END IF +50 END DO + + FAC(JCOL) = USQT + DEL = USQT*AY*SGN + IWK(2) = IWK(2) + 1 + IF (KT2 < NID3) THEN + KT2 = KT2 + 1 + IWK(KT2) = JCOL + END IF + +60 CONTINUE + WK(NT2+JCOL) = DEL + Y(JCOL) = Y(JCOL) + DEL + END IF + END DO + + IWK(7) = IWK(7) + 1 + CALL FCN(N,T,Y,WK) + DO JCOL = 1, N + IF (NGRP(JCOL) == NUMGRP) Y(JCOL) = WK(N+JCOL) + END DO + +! COMPUTE THE JACOBIAN ENTRIES FOR ALL COLUMNS IN NUMGRP. +! STORE ENTRIES ACCORDING TO SELECTED STORAGE FORMAT. +! USE LARGEST ELEMENTS IN A COLUMN TO DETERMINE SCALING +! INFORMATION FOR ROUNDOFF AND TRUNCATION ERROR TESTS. + DO JCOL = 1, N + IF (NGRP(JCOL) == NUMGRP) THEN + IDXL = JPNTR(JCOL) + IDXU = JPNTR(JCOL+1) - 1 + DMAX = ZERO + RDEL = ONE/WK(NT2+JCOL) + IROWMX = 1 + DO L = IDXL, IDXU + IROW = INDROW(L) + DIFF = WK(IROW) - F(IROW) + ADIFF = ABS(DIFF) + IF (ADIFF >= DMAX) THEN + IROWMX = IROW + DMAX = ADIFF + SF = F(IROW) + SDF = WK(IROW) + END IF + FJACL = DIFF*RDEL + IF (ITRY == 1) WK(IROW) = FJACL + + IF (IOPT(1) == 0) THEN + IF (ITRY == 1) WK(IROW+N) = FJAC(IROW,JCOL) + FJAC(IROW,JCOL) = FJACL + END IF + IF (IOPT(1) == 1) THEN + IROWB = IROW - JCOL + IOPT(2) + IF (ITRY == 1) WK(IROW+N) = FJAC(IROWB,JCOL) + FJAC(IROWB,JCOL) = FJACL + END IF + IF (IOPT(1) == 2) THEN + IF (ITRY == 1) WK(IROW+N) = FJAC(L,1) + FJAC(L,1) = FJACL + END IF + END DO + +! IF A COLUMN IS BEING RECOMPUTED (ITRY=1),THIS SECTION OF THE +! CODE PERFORMS AN EXTRAPOLATION TEST TO ENABLE THE CODE TO +! COMPUTE SMALL DERIVATIVES MORE ACCURATELY. THIS TEST IS ONLY +! PERFORMED ON THOSE COLUMNS WHOSE LARGEST DIFFERENCE IS CLOSE +! TO ZERO RELATIVE TO SCALE. + IF (ITRY == 1) THEN + IFLAG1 = 0 + IFLAG2 = 0 + DO 100 J = NID5 + 1, NID6 + IF (IWK(J) == JCOL) IFLAG1 = 1 +100 CONTINUE + IF (IFLAG1 == 1) THEN + IFLAG1 = 0 + T1 = WK(IROWMX+N) + T2 = WK(IROWMX)*FAC(JCOL) + IF (ABS(T2) < ABS(T1)*PERT) IFLAG2 = 1 + END IF + + IF (IFLAG2 == 1) THEN + IFLAG2 = 0 + T1 = FAC(JCOL)*FAC(JCOL) + FAC(JCOL) = MAX(T1,FACMIN) + DO L = IDXL, IDXU + IROW = INDROW(L) + FJACL = WK(IROW+N) + IF (IOPT(1) == 0) FJAC(IROW,JCOL) = FJACL + IF (IOPT(1) == 1) THEN + IROWB = IROW - JCOL + IOPT(2) + FJAC(IROWB,JCOL) = FJACL + END IF + IF (IOPT(1) == 2) FJAC(L,1) = FJACL + END DO + END IF + END IF + + FMJ = ABS(SF) + DFMJ = ABS(SDF) + RMXFDF = MAX(FMJ,DFMJ) + RMNFDF = MIN(FMJ,DFMJ) + +! IF SCALE INFORMATION IS NOT AVAILABLE, PERFORM NO ROUNDOFF +! OR TRUNCATION ERROR TESTS. IF THE EXTRAPOLATION TEST HAS +! CAUSED FAC(JCOL) TO BE RESET TO ITS PREVIOUS VALUE (IAJAC=1) +! THEN NO FURTHER ROUNDOFF OR TRUNCATION ERROR TESTS ARE +! PERFORMED. +! IF (RMNFDF/=ZERO) THEN + IF (ABS(RMNFDF) > ZERO) THEN +! TEST FOR POSSIBLE ROUNDOFF ERROR (FIRST TEST) +! AND ALSO FOR POSSIBLE SERIOUS ROUNDOFF ERROR (SECOND TEST). + IF (DMAX <= (U3QRT*RMXFDF)) THEN + IF (DMAX <= (U7EGT*RMXFDF)) THEN + IF (ITRY == 0) THEN + T1 = SQRT(FAC(JCOL)) + FAC(JCOL) = MIN(T1,FACMAX) + IRCMP = 1 + IF (KT5 < NID6) THEN + KT5 = KT5 + 1 + IWK(KT5) = JCOL + END IF + IWK(4) = IWK(4) + 1 + IF (KT3 < NID4) THEN + KT3 = KT3 + 1 + IWK(KT3) = JCOL + END IF + ELSE + IWK(5) = IWK(5) + 1 + IF (KT4 < NID5) THEN + KT4 = KT4 + 1 + IWK(KT4) = JCOL + END IF + END IF + ELSE + T1 = UMEGT*FAC(JCOL) + FAC(JCOL) = MIN(T1,FACMAX) + END IF + END IF +! TEST FOR POSSIBLE TRUNCATION ERROR. + IF (DMAX > UQRT*RMXFDF) THEN + T1 = FAC(JCOL)*UEGT + FAC(JCOL) = MAX(T1,FACMIN) + IWK(8) = IWK(8) + 1 + IF (KT1 < NID2) THEN + KT1 = KT1 + 1 + IWK(KT1) = JCOL + END IF + END IF + ELSE + IWK(6) = IWK(6) + 1 + END IF + END IF + END DO + +! IF SERIOUS ROUNDOFF ERROR IS SUSPECTED, RECOMPUTE ALL +! COLUMNS IN GROUP NUMGRP. + IF (IRCMP == 1) THEN + IRCMP = 0 + ITRY = 1 + GOTO 40 + END IF + ITRY = 0 + END DO + RETURN + + END SUBROUTINE JACSP +!_______________________________________________________________________ + + SUBROUTINE DEGR(N,INDROW,JPNTR,INDCOL,IPNTR,NDEG,IWA) + +! GIVEN THE SPARSITY PATTERN OF AN M BY N MATRIX A, +! THIS SUBROUTINE DETERMINES THE DEGREE SEQUENCE FOR +! THE INTERSECTION GRAPH OF THE COLUMNS OF A. +! IN GRAPH-THEORY TERMINOLOGY, THE INTERSECTION GRAPH OF +! THE COLUMNS OF A IS THE LOOPLESS GRAPH G WITH VERTICES +! A(J), J = 1,2,...,N WHERE A(J) IS THE J-TH COLUMN OF A +! AND WITH EDGE (A(I),A(J)) IF AND ONLY IF COLUMNS I AND J +! HAVE A NON-ZERO IN THE SAME ROW POSITION. +! NOTE THAT THE VALUE OF M IS NOT NEEDED BY DEGR AND IS +! THEREFORE NOT PRESENT IN THE SUBROUTINE STATEMENT. +! THE SUBROUTINE STATEMENT IS +! SUBROUTINE DEGR(N,INDROW,JPNTR,INDCOL,IPNTR,NDEG,IWA) +! WHERE +! N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF COLUMNS OF A. +! INDROW IS AN INTEGER INPUT ARRAY WHICH CONTAINS THE ROW +! INDICES FOR THE NON-ZEROES IN THE MATRIX A. +! JPNTR IS AN INTEGER INPUT ARRAY OF LENGTH N + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE ROW INDICES IN INDROW. +! THE ROW INDICES FOR COLUMN J ARE +! INDROW(K), K = JPNTR(J),...,JPNTR(J+1)-1. +! NOTE THAT JPNTR(N+1)-1 IS THEN THE NUMBER OF NON-ZERO +! ELEMENTS OF THE MATRIX A. +! INDCOL IS AN INTEGER INPUT ARRAY WHICH CONTAINS THE +! COLUMN INDICES FOR THE NON-ZEROES IN THE MATRIX A. +! IPNTR IS AN INTEGER INPUT ARRAY OF LENGTH M + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE COLUMN INDICES IN INDCOL. +! THE COLUMN INDICES FOR ROW I ARE +! INDCOL(K), K = IPNTR(I),...,IPNTR(I+1)-1. +! NOTE THAT IPNTR(M+1)-1 IS THEN THE NUMBER OF NON-ZERO +! ELEMENTS OF THE MATRIX A. +! NDEG IS AN INTEGER OUTPUT ARRAY OF LENGTH N WHICH +! SPECIFIES THE DEGREE SEQUENCE. THE DEGREE OF THE +! J-TH COLUMN OF A IS NDEG(J). +! IWA IS AN INTEGER WORK ARRAY OF LENGTH N. +! ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JULY 1983. +! THOMAS F. COLEMAN, BURTON S. GARBOW, JORGE J. MORE' + + IMPLICIT NONE + +! .. Scalar Arguments .. + INTEGER :: N +! .. +! .. Array Arguments .. + INTEGER :: INDCOL(*), INDROW(*), IPNTR(*), IWA(N), JPNTR(N+1), NDEG(N) +! .. +! .. Local Scalars .. + INTEGER :: IC, IP, IR, JCOL, JP +! .. +! .. FIRST EXECUTABLE STATEMENT DEGR +! .. +! INITIALIZATION BLOCK. + NDEG(1:N) = 0 + IWA(1:N) = 0 + +! COMPUTE THE DEGREE SEQUENCE BY DETERMINING THE CONTRIBUTIONS +! TO THE DEGREES FROM THE CURRENT (JCOL) COLUMN AND FURTHER +! COLUMNS WHICH HAVE NOT YET BEEN CONSIDERED. + DO JCOL = 2, N + IWA(JCOL) = N +! DETERMINE ALL POSITIONS (IR,JCOL) WHICH CORRESPOND +! TO NON-ZEROES IN THE MATRIX. + DO JP = JPNTR(JCOL), JPNTR(JCOL+1) - 1 + IR = INDROW(JP) +! FOR EACH ROW IR, DETERMINE ALL POSITIONS (IR,IC) +! WHICH CORRESPOND TO NON-ZEROES IN THE MATRIX. + DO IP = IPNTR(IR), IPNTR(IR+1) - 1 + IC = INDCOL(IP) +! ARRAY IWA MARKS COLUMNS WHICH HAVE CONTRIBUTED TO +! THE DEGREE COUNT OF COLUMN JCOL. UPDATE THE DEGREE +! COUNTS OF THESE COLUMNS AS WELL AS COLUMN JCOL. + IF (IWA(IC) < JCOL) THEN + IWA(IC) = JCOL + NDEG(IC) = NDEG(IC) + 1 + NDEG(JCOL) = NDEG(JCOL) + 1 + END IF + END DO + END DO + END DO + RETURN + + END SUBROUTINE DEGR +!_______________________________________________________________________ + + SUBROUTINE IDO(M,N,INDROW,JPNTR,INDCOL,IPNTR,NDEG,LIST,MAXCLQ, & + IWA1,IWA2,IWA3,IWA4) + +! SUBROUTINE IDO +! GIVEN THE SPARSITY PATTERN OF AN M BY N MATRIX A, THIS +! SUBROUTINE DETERMINES AN INCIDENCE-DEGREE ORDERING OF THE +! COLUMNS OF A. +! THE INCIDENCE-DEGREE ORDERING IS DEFINED FOR THE LOOPLESS +! GRAPH G WITH VERTICES A(J), J = 1,2,...,N WHERE A(J) IS THE +! J-TH COLUMN OF A AND WITH EDGE (A(I),A(J)) IF AND ONLY IF +! COLUMNS I AND J HAVE A NON-ZERO IN THE SAME ROW POSITION. +! THE INCIDENCE-DEGREE ORDERING IS DETERMINED RECURSIVELY BY +! LETTING LIST(K), K = 1,...,N BE A COLUMN WITH MAXIMAL +! INCIDENCE TO THE SUBGRAPH SPANNED BY THE ORDERED COLUMNS. +! AMONG ALL THE COLUMNS OF MAXIMAL INCIDENCE, IDO CHOOSES A +! COLUMN OF MAXIMAL DEGREE. +! THE SUBROUTINE STATEMENT IS +! SUBROUTINE IDO(M,N,INDROW,JPNTR,INDCOL,IPNTR,NDEG,LIST, +! MAXCLQ,IWA1,IWA2,IWA3,IWA4) +! WHERE +! M IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF ROWS OF A. +! N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF COLUMNS OF A. +! INDROW IS AN INTEGER INPUT ARRAY WHICH CONTAINS THE ROW +! INDICES FOR THE NON-ZEROES IN THE MATRIX A. +! JPNTR IS AN INTEGER INPUT ARRAY OF LENGTH N + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE ROW INDICES IN INDROW. +! THE ROW INDICES FOR COLUMN J ARE +! INDROW(K), K = JPNTR(J),...,JPNTR(J+1)-1. +! NOTE THAT JPNTR(N+1)-1 IS THEN THE NUMBER OF NON-ZERO +! ELEMENTS OF THE MATRIX A. +! INDCOL IS AN INTEGER INPUT ARRAY WHICH CONTAINS THE +! COLUMN INDICES FOR THE NON-ZEROES IN THE MATRIX A. +! IPNTR IS AN INTEGER INPUT ARRAY OF LENGTH M + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE COLUMN INDICES IN INDCOL. +! THE COLUMN INDICES FOR ROW I ARE +! INDCOL(K), K = IPNTR(I),...,IPNTR(I+1)-1. +! NOTE THAT IPNTR(M+1)-1 IS THEN THE NUMBER OF NON-ZERO +! ELEMENTS OF THE MATRIX A. +! NDEG IS AN INTEGER INPUT ARRAY OF LENGTH N WHICH SPECIFIES +! THE DEGREE SEQUENCE. THE DEGREE OF THE J-TH COLUMN +! OF A IS NDEG(J). +! LIST IS AN INTEGER OUTPUT ARRAY OF LENGTH N WHICH SPECIFIES +! THE INCIDENCE-DEGREE ORDERING OF THE COLUMNS OF A. THE J-TH +! COLUMN IN THIS ORDER IS LIST(J). +! MAXCLQ IS AN INTEGER OUTPUT VARIABLE SET TO THE SIZE +! OF THE LARGEST CLIQUE FOUND DURING THE ORDERING. +! IWA1,IWA2,IWA3, AND IWA4 ARE INTEGER WORK ARRAYS OF LENGTH N. +! SUBPROGRAMS CALLED +! MINPACK-SUPPLIED ... NUMSRT +! INTRINSIC ... MAX +! ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JULY 1983. +! THOMAS F. COLEMAN, BURTON S. GARBOW, JORGE J. MORE' + + IMPLICIT NONE + +! .. Scalar Arguments .. + INTEGER :: M, MAXCLQ, N +! .. +! .. Array Arguments .. + INTEGER :: INDCOL(*), INDROW(*), IPNTR(M+1), IWA1(0:N-1), IWA2(N), & + IWA3(N), IWA4(N), JPNTR(N+1), LIST(N), NDEG(N) +! .. +! .. Local Scalars .. + INTEGER :: IC, IP, IR, JCOL, JP, MAXINC, MAXLST, NCOMP, NUMINC, NUMLST, & + NUMORD, NUMWGT +! .. +! .. External Subroutines .. +! EXTERNAL NUMSRT +! .. +! .. Intrinsic Functions .. + INTRINSIC MAX +! .. +! .. FIRST EXECUTABLE STATEMENT IDO +! .. +! SORT THE DEGREE SEQUENCE. + CALL NUMSRT(N,N-1,NDEG,-1,IWA4,IWA2,IWA3) + +! INITIALIZATION BLOCK. + +! CREATE A DOUBLY-LINKED LIST TO ACCESS THE INCIDENCES OF THE +! COLUMNS. THE POINTERS FOR THE LINKED LIST ARE AS FOLLOWS. + +! EACH UNORDERED COLUMN IC IS IN A LIST (THE INCIDENCE LIST) +! OF COLUMNS WITH THE SAME INCIDENCE. + +! IWA1(NUMINC) IS THE FIRST COLUMN IN THE NUMINC LIST +! UNLESS IWA1(NUMINC) = 0. IN THIS CASE THERE ARE +! NO COLUMNS IN THE NUMINC LIST. + +! IWA2(IC) IS THE COLUMN BEFORE IC IN THE INCIDENCE LIST +! UNLESS IWA2(IC) = 0. IN THIS CASE IC IS THE FIRST +! COLUMN IN THIS INCIDENCE LIST. + +! IWA3(IC) IS THE COLUMN AFTER IC IN THE INCIDENCE LIST +! UNLESS IWA3(IC) = 0. IN THIS CASE IC IS THE LAST +! COLUMN IN THIS INCIDENCE LIST. + +! IF IC IS AN UN-ORDERED COLUMN, THEN LIST(IC) IS THE +! INCIDENCE OF IC TO THE GRAPH INDUCED BY THE ORDERED +! COLUMNS. IF JCOL IS AN ORDERED COLUMN, THEN LIST(JCOL) +! IS THE INCIDENCE-DEGREE ORDER OF COLUMN JCOL. + + MAXINC = 0 + DO JP = N, 1, -1 + IC = IWA4(JP) + IWA1(N-JP) = 0 + IWA2(IC) = 0 + IWA3(IC) = IWA1(0) + IF (IWA1(0) > 0) IWA2(IWA1(0)) = IC + IWA1(0) = IC + IWA4(JP) = 0 + LIST(JP) = 0 + END DO + +! DETERMINE THE MAXIMAL SEARCH LENGTH FOR THE LIST +! OF COLUMNS OF MAXIMAL INCIDENCE. + MAXLST = 0 + DO IR = 1, M + MAXLST = MAXLST + (IPNTR(IR+1)-IPNTR(IR))**2 + END DO + MAXLST = MAXLST/N + MAXCLQ = 0 + NUMORD = 1 + +! BEGINNING OF ITERATION LOOP. + +30 CONTINUE + +! UPDATE THE SIZE OF THE LARGEST CLIQUE +! FOUND DURING THE ORDERING. + IF (MAXINC == 0) NCOMP = 0 + NCOMP = NCOMP + 1 + IF (MAXINC+1 == NCOMP) MAXCLQ = MAX(MAXCLQ,NCOMP) + +! CHOOSE A COLUMN JCOL OF MAXIMAL DEGREE AMONG THE +! COLUMNS OF MAXIMAL INCIDENCE MAXINC. +40 CONTINUE + JP = IWA1(MAXINC) + IF (JP > 0) GOTO 50 + MAXINC = MAXINC - 1 + GOTO 40 +50 CONTINUE + NUMWGT = -1 + DO NUMLST = 1, MAXLST + IF (NDEG(JP) > NUMWGT) THEN + NUMWGT = NDEG(JP) + JCOL = JP + END IF + JP = IWA3(JP) + IF (JP <= 0) GOTO 70 + END DO +70 CONTINUE + LIST(JCOL) = NUMORD + NUMORD = NUMORD + 1 + +! TERMINATION TEST. + IF (NUMORD > N) GOTO 100 + +! DELETE COLUMN JCOL FROM THE MAXINC LIST. + IF (IWA2(JCOL) == 0) THEN + IWA1(MAXINC) = IWA3(JCOL) + ELSE + IWA3(IWA2(JCOL)) = IWA3(JCOL) + END IF + IF (IWA3(JCOL) > 0) IWA2(IWA3(JCOL)) = IWA2(JCOL) + +! FIND ALL COLUMNS ADJACENT TO COLUMN JCOL. + IWA4(JCOL) = N + +! DETERMINE ALL POSITIONS(IR,JCOL) WHICH CORRESPOND +! TO NON-ZEROES IN THE MATRIX. + DO JP = JPNTR(JCOL), JPNTR(JCOL+1) - 1 + IR = INDROW(JP) +! FOR EACH ROW IR, DETERMINE ALL POSITIONS(IR,IC) +! WHICH CORRESPOND TO NON-ZEROES IN THE MATRIX. + DO IP = IPNTR(IR), IPNTR(IR+1) - 1 + IC = INDCOL(IP) +! ARRAY IWA4 MARKS COLUMNS WHICH ARE ADJACENT TO +! COLUMN JCOL. + IF (IWA4(IC) < NUMORD) THEN + IWA4(IC) = NUMORD +! UPDATE THE POINTERS TO THE CURRENT INCIDENCE LISTS. + NUMINC = LIST(IC) + LIST(IC) = LIST(IC) + 1 + MAXINC = MAX(MAXINC,LIST(IC)) +! DELETE COLUMN IC FROM THE NUMINC LIST. + IF (IWA2(IC) == 0) THEN + IWA1(NUMINC) = IWA3(IC) + ELSE + IWA3(IWA2(IC)) = IWA3(IC) + END IF + IF (IWA3(IC) > 0) IWA2(IWA3(IC)) = IWA2(IC) +! ADD COLUMN IC TO THE NUMINC+1 LIST. + IWA2(IC) = 0 + IWA3(IC) = IWA1(NUMINC+1) + IF (IWA1(NUMINC+1) > 0) IWA2(IWA1(NUMINC+1)) = IC + IWA1(NUMINC+1) = IC + END IF + END DO + END DO +! END OF ITERATION LOOP. + GOTO 30 +100 CONTINUE +! INVERT THE ARRAY LIST. + DO JCOL = 1, N + IWA2(LIST(JCOL)) = JCOL + END DO + LIST(1:N) = IWA2(1:N) + RETURN + + END SUBROUTINE IDO +!_______________________________________________________________________ + + SUBROUTINE NUMSRT(N,NMAX,NUM,MODE,INDEX,LAST,NEXT) + +! GIVEN A SEQUENCE OF INTEGERS, THIS SUBROUTINE GROUPS TOGETHER THOSE +! INDICES WITH THE SAME SEQUENCE VALUE AND, OPTIONALLY, SORTS THE +! SEQUENCE INTO EITHER ASCENDING OR DESCENDING ORDER. THE SEQUENCE +! OF INTEGERS IS DEFINED BY THE ARRAY NUM, AND IT IS ASSUMED THAT THE +! INTEGERS ARE EACH FROM THE SET 0,1,...,NMAX. ON OUTPUT THE INDICES +! K SUCH THAT NUM(K) = L FOR ANY L = 0,1,...,NMAX CAN BE OBTAINED +! FROM THE ARRAYS LAST AND NEXT AS FOLLOWS. +! K = LAST(L) +! WHILE(K /= 0) K = NEXT(K) +! OPTIONALLY, THE SUBROUTINE PRODUCES AN ARRAY INDEX SO THAT +! THE SEQUENCE NUM(INDEX(I)), I = 1,2,...,N IS SORTED. +! THE SUBROUTINE STATEMENT IS +! SUBROUTINE NUMSRT(N,NMAX,NUM,MODE,INDEX,LAST,NEXT) +! WHERE +! N IS A POSITIVE INTEGER INPUT VARIABLE. +! NMAX IS A POSITIVE INTEGER INPUT VARIABLE. +! NUM IS AN INPUT ARRAY OF LENGTH N WHICH CONTAINS THE +! SEQUENCE OF INTEGERS TO BE GROUPED AND SORTED. IT +! IS ASSUMED THAT THE INTEGERS ARE EACH FROM THE SET +! 0,1,...,NMAX. +! MODE IS AN INTEGER INPUT VARIABLE. THE SEQUENCE NUM IS +! SORTED IN ASCENDING ORDER IF MODE IS POSITIVE AND IN +! DESCENDING ORDER IF MODE IS NEGATIVE. IF MODE IS 0, +! NO SORTING IS DONE. +! INDEX IS AN INTEGER OUTPUT ARRAY OF LENGTH N SET SO +! THAT THE SEQUENCE +! NUM(INDEX(I)), I = 1,2,...,N +! IS SORTED ACCORDING TO THE SETTING OF MODE. IF MODE +! IS 0, INDEX IS NOT REFERENCED. +! LAST IS AN INTEGER OUTPUT ARRAY OF LENGTH NMAX + 1. THE +! INDEX OF NUM FOR THE LAST OCCURRENCE OF L IS LAST(L) +! FOR ANY L = 0,1,...,NMAX UNLESS LAST(L) = 0. IN +! THIS CASE L DOES NOT APPEAR IN NUM. +! NEXT IS AN INTEGER OUTPUT ARRAY OF LENGTH N. IF +! NUM(K) = L, THEN THE INDEX OF NUM FOR THE PREVIOUS +! OCCURRENCE OF L IS NEXT(K) FOR ANY L = 0,1,...,NMAX +! UNLESS NEXT(K) = 0. IN THIS CASE THERE IS NO PREVIOUS +! OCCURRENCE OF L IN NUM. +! ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JULY 1983. +! THOMAS F. COLEMAN, BURTON S. GARBOW, JORGE J. MORE' + + IMPLICIT NONE + +! .. Scalar Arguments .. + INTEGER :: MODE, N, NMAX +! .. +! .. Array Arguments .. + INTEGER :: INDEX(N), LAST(0:NMAX), NEXT(N), NUM(N) +! .. +! .. Local Scalars .. + INTEGER :: I, J, JINC, JL, JU, K, L +! .. +! .. FIRST EXECUTABLE STATEMENT NUMSRT +! .. +! DETERMINE THE ARRAYS NEXT AND LAST. + LAST(0:NMAX) = 0 + DO K = 1, N + L = NUM(K) + NEXT(K) = LAST(L) + LAST(L) = K + END DO + IF (MODE == 0) RETURN + +! STORE THE POINTERS TO THE SORTED ARRAY IN INDEX. + I = 1 + IF (MODE > 0) THEN + JL = 0 + JU = NMAX + JINC = 1 + ELSE + JL = NMAX + JU = 0 + JINC = -1 + END IF + DO J = JL, JU, JINC + K = LAST(J) +30 CONTINUE + IF (K == 0) GOTO 40 + INDEX(I) = K + I = I + 1 + K = NEXT(K) + GOTO 30 +40 CONTINUE + END DO + RETURN + + END SUBROUTINE NUMSRT +!_______________________________________________________________________ + + SUBROUTINE SEQ(N,INDROW,JPNTR,INDCOL,IPNTR,LIST,NGRP,MAXGRP,IWA) + +! GIVEN THE SPARSITY PATTERN OF AN M BY N MATRIX A, THIS +! SUBROUTINE DETERMINES A CONSISTENT PARTITION OF THE +! COLUMNS OF A BY A SEQUENTIAL ALGORITHM. +! A CONSISTENT PARTITION IS DEFINED IN TERMS OF THE LOOPLESS +! GRAPH G WITH VERTICES A(J), J = 1,2,...,N WHERE A(J) IS THE +! J-TH COLUMN OF A AND WITH EDGE(A(I),A(J)) IF AND ONLY IF +! COLUMNS I AND J HAVE A NON-ZERO IN THE SAME ROW POSITION. +! A PARTITION OF THE COLUMNS OF A INTO GROUPS IS CONSISTENT +! IF THE COLUMNS IN ANY GROUP ARE NOT ADJACENT IN THE GRAPH G. +! IN GRAPH-THEORY TERMINOLOGY, A CONSISTENT PARTITION OF THE +! COLUMNS OF A CORRESPONDS TO A COLORING OF THE GRAPH G. +! THE SUBROUTINE EXAMINES THE COLUMNS IN THE ORDER SPECIFIED +! BY THE ARRAY LIST, AND ASSIGNS THE CURRENT COLUMN TO THE +! GROUP WITH THE SMALLEST POSSIBLE NUMBER. +! NOTE THAT THE VALUE OF M IS NOT NEEDED BY SEQ AND IS +! THEREFORE NOT PRESENT IN THE SUBROUTINE STATEMENT. +! THE SUBROUTINE STATEMENT IS +! SUBROUTINE SEQ(N,INDROW,JPNTR,INDCOL,IPNTR,LIST,NGRP,MAXGRP,IWA) +! WHERE +! N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF COLUMNS OF A. +! INDROW IS AN INTEGER INPUT ARRAY WHICH CONTAINS THE ROW +! INDICES FOR THE NON-ZEROES IN THE MATRIX A. +! JPNTR IS AN INTEGER INPUT ARRAY OF LENGTH N + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE ROW INDICES IN INDROW. +! THE ROW INDICES FOR COLUMN J ARE +! INDROW(K), K = JPNTR(J),...,JPNTR(J+1)-1. +! NOTE THAT JPNTR(N+1)-1 IS THEN THE NUMBER OF NON-ZERO +! ELEMENTS OF THE MATRIX A. +! INDCOL IS AN INTEGER INPUT ARRAY WHICH CONTAINS THE +! COLUMN INDICES FOR THE NON-ZEROES IN THE MATRIX A. +! IPNTR IS AN INTEGER INPUT ARRAY OF LENGTH M + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE COLUMN INDICES IN INDCOL. +! THE COLUMN INDICES FOR ROW I ARE +! INDCOL(K), K = IPNTR(I),...,IPNTR(I+1)-1. +! NOTE THAT IPNTR(M+1)-1 IS THEN THE NUMBER OF NON-ZERO +! ELEMENTS OF THE MATRIX A. +! LIST IS AN INTEGER INPUT ARRAY OF LENGTH N WHICH SPECIFIES +! THE ORDER TO BE USED BY THE SEQUENTIAL ALGORITHM. +! THE J-TH COLUMN IN THIS ORDER IS LIST(J). +! NGRP IS AN INTEGER OUTPUT ARRAY OF LENGTH N WHICH SPECIFIES +! THE PARTITION OF THE COLUMNS OF A. COLUMN JCOL BELONGS +! TO GROUP NGRP(JCOL). +! MAXGRP IS AN INTEGER OUTPUT VARIABLE WHICH SPECIFIES THE +! NUMBER OF GROUPS IN THE PARTITION OF THE COLUMNS OF A. +! IWA IS AN INTEGER WORK ARRAY OF LENGTH N. +! ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JULY 1983. +! THOMAS F. COLEMAN, BURTON S. GARBOW, JORGE J. MORE' + + IMPLICIT NONE + +! .. Scalar Arguments .. + INTEGER :: MAXGRP, N +! .. +! .. Array Arguments .. + INTEGER :: INDCOL(*), INDROW(*), IPNTR(*), IWA(N), JPNTR(N+1), & + LIST(N), NGRP(N) +! .. +! .. Local Scalars .. + INTEGER :: IC, IP, IR, J, JCOL, JP +! .. +! .. FIRST EXECUTABLE STATEMENT SEQ +! .. +! INITIALIZATION BLOCK. + MAXGRP = 0 + NGRP(1:N) = N + IWA(1:N) = 0 + +! BEGINNING OF ITERATION LOOP. + + DO J = 1, N + JCOL = LIST(J) +! FIND ALL COLUMNS ADJACENT TO COLUMN JCOL. +! DETERMINE ALL POSITIONS (IR,JCOL) WHICH CORRESPOND +! TO NON-ZEROES IN THE MATRIX. + DO JP = JPNTR(JCOL), JPNTR(JCOL+1) - 1 + IR = INDROW(JP) +! FOR EACH ROW IR, DETERMINE ALL POSITIONS (IR,IC) +! WHICH CORRESPOND TO NON-ZEROES IN THE MATRIX. + DO IP = IPNTR(IR), IPNTR(IR+1) - 1 + IC = INDCOL(IP) +! ARRAY IWA MARKS THE GROUP NUMBERS OF THE +! COLUMNS WHICH ARE ADJACENT TO COLUMN JCOL. + IWA(NGRP(IC)) = J + END DO + END DO +! ASSIGN THE SMALLEST UN-MARKED GROUP NUMBER TO JCOL. + DO JP = 1, MAXGRP + IF (IWA(JP)/=J) GOTO 50 + END DO + MAXGRP = MAXGRP + 1 +50 CONTINUE + NGRP(JCOL) = JP + END DO + +! END OF ITERATION LOOP. + + RETURN + + END SUBROUTINE SEQ +!_______________________________________________________________________ + + SUBROUTINE SETR(M,N,INDROW,JPNTR,INDCOL,IPNTR,IWA) + +! GIVEN A COLUMN-ORIENTED DEFINITION OF THE SPARSITY PATTERN +! OF AN M BY N MATRIX A, THIS SUBROUTINE DETERMINES A +! ROW-ORIENTED DEFINITION OF THE SPARSITY PATTERN OF A. +! ON INPUT THE COLUMN-ORIENTED DEFINITION IS SPECIFIED BY +! THE ARRAYS INDROW AND JPNTR. ON OUTPUT THE ROW-ORIENTED +! DEFINITION IS SPECIFIED BY THE ARRAYS INDCOL AND IPNTR. +! THE SUBROUTINE STATEMENT IS +! SUBROUTINE SETR(M,N,INDROW,JPNTR,INDCOL,IPNTR,IWA) +! WHERE +! M IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF ROWS OF A. +! N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF COLUMNS OF A. +! INDROW IS AN INTEGER INPUT ARRAY WHICH CONTAINS THE ROW +! INDICES FOR THE NON-ZEROES IN THE MATRIX A. +! JPNTR IS AN INTEGER INPUT ARRAY OF LENGTH N + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE ROW INDICES IN INDROW. +! THE ROW INDICES FOR COLUMN J ARE +! INDROW(K), K = JPNTR(J),...,JPNTR(J+1)-1. +! NOTE THAT JPNTR(N+1)-1 IS THEN THE NUMBER OF NON-ZERO +! ELEMENTS OF THE MATRIX A. +! INDCOL IS AN INTEGER OUTPUT ARRAY WHICH CONTAINS THE +! COLUMN INDICES FOR THE NON-ZEROES IN THE MATRIX A. +! IPNTR IS AN INTEGER OUTPUT ARRAY OF LENGTH M + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE COLUMN INDICES IN INDCOL. +! THE COLUMN INDICES FOR ROW I ARE +! INDCOL(K), K = IPNTR(I),...,IPNTR(I+1)-1. +! NOTE THAT IPNTR(1) IS SET TO 1 AND THAT IPNTR(M+1)-1 IS +! THEN THE NUMBER OF NON-ZERO ELEMENTS OF THE MATRIX A. +! IWA IS AN INTEGER WORK ARRAY OF LENGTH M. +! ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JULY 1983. +! THOMAS F. COLEMAN, BURTON S. GARBOW, JORGE J. MORE' + + IMPLICIT NONE + +! .. Scalar Arguments .. + INTEGER :: M, N +! .. +! .. Array Arguments .. + INTEGER :: INDCOL(*), INDROW(*), IPNTR(M+1), IWA(M), JPNTR(N+1) +! .. +! .. Local Scalars .. + INTEGER :: IR, JCOL, JP +! .. +! .. FIRST EXECUTABLE STATEMENT SETR +! .. +! STORE IN ARRAY IWA THE COUNTS OF NON-ZEROES IN THE ROWS. + IWA(1:M) = 0 + DO JP = 1, JPNTR(N+1) - 1 + IWA(INDROW(JP)) = IWA(INDROW(JP)) + 1 + END DO + +! SET POINTERS TO THE START OF THE ROWS IN INDCOL. + IPNTR(1) = 1 + DO IR = 1, M + IPNTR(IR+1) = IPNTR(IR) + IWA(IR) + IWA(IR) = IPNTR(IR) + END DO + +! FILL INDCOL. + DO JCOL = 1, N + DO JP = JPNTR(JCOL), JPNTR(JCOL+1) - 1 + IR = INDROW(JP) + INDCOL(IWA(IR)) = JCOL + IWA(IR) = IWA(IR) + 1 + END DO + END DO + RETURN + + END SUBROUTINE SETR +!_______________________________________________________________________ + + SUBROUTINE SLO(N,INDROW,JPNTR,INDCOL,IPNTR,NDEG,LIST,MAXCLQ,IWA1,IWA2, & + IWA3,IWA4) + +! GIVEN THE SPARSITY PATTERN OF AN M BY N MATRIX A, THIS +! SUBROUTINE DETERMINES THE SMALLEST-LAST ORDERING OF THE +! COLUMNS OF A. +! THE SMALLEST-LAST ORDERING IS DEFINED FOR THE LOOPLESS +! GRAPH G WITH VERTICES A(J), J = 1,2,...,N WHERE A(J) IS THE +! J-TH COLUMN OF A AND WITH EDGE(A(I),A(J)) IF AND ONLY IF +! COLUMNS I AND J HAVE A NON-ZERO IN THE SAME ROW POSITION. +! THE SMALLEST-LAST ORDERING IS DETERMINED RECURSIVELY BY +! LETTING LIST(K), K = N,...,1 BE A COLUMN WITH LEAST DEGREE +! IN THE SUBGRAPH SPANNED BY THE UN-ORDERED COLUMNS. +! NOTE THAT THE VALUE OF M IS NOT NEEDED BY SLO AND IS +! THEREFORE NOT PRESENT IN THE SUBROUTINE STATEMENT. +! THE SUBROUTINE STATEMENT IS +! SUBROUTINE SLO(N,INDROW,JPNTR,INDCOL,IPNTR,NDEG,LIST, & +! MAXCLQ,IWA1,IWA2,IWA3,IWA4) +! WHERE +! N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF COLUMNS OF A. +! INDROW IS AN INTEGER INPUT ARRAY WHICH CONTAINS THE ROW +! INDICES FOR THE NON-ZEROES IN THE MATRIX A. +! JPNTR IS AN INTEGER INPUT ARRAY OF LENGTH N + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE ROW INDICES IN INDROW. +! THE ROW INDICES FOR COLUMN J ARE +! INDROW(K), K = JPNTR(J),...,JPNTR(J+1)-1. +! NOTE THAT JPNTR(N+1)-1 IS THEN THE NUMBER OF NON-ZERO +! ELEMENTS OF THE MATRIX A. +! INDCOL IS AN INTEGER INPUT ARRAY WHICH CONTAINS THE +! COLUMN INDICES FOR THE NON-ZEROES IN THE MATRIX A. +! IPNTR IS AN INTEGER INPUT ARRAY OF LENGTH M + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE COLUMN INDICES IN INDCOL. +! THE COLUMN INDICES FOR ROW I ARE +! INDCOL(K), K = IPNTR(I),...,IPNTR(I+1)-1. +! NOTE THAT IPNTR(M+1)-1 IS THEN THE NUMBER OF NON-ZERO +! ELEMENTS OF THE MATRIX A. +! NDEG IS AN INTEGER INPUT ARRAY OF LENGTH N WHICH SPECIFIES +! THE DEGREE SEQUENCE. THE DEGREE OF THE J-TH COLUMN +! OF A IS NDEG(J). +! LIST IS AN INTEGER OUTPUT ARRAY OF LENGTH N WHICH SPECIFIES +! THE SMALLEST-LAST ORDERING OF THE COLUMNS OF A. THE J-TH +! COLUMN IN THIS ORDER IS LIST(J). +! MAXCLQ IS AN INTEGER OUTPUT VARIABLE SET TO THE SIZE +! OF THE LARGEST CLIQUE FOUND DURING THE ORDERING. +! IWA1,IWA2,IWA3, AND IWA4 ARE INTEGER WORK ARRAYS OF LENGTH N. +! SUBPROGRAMS CALLED +! INTRINSIC ... MIN +! ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JULY 1983. +! THOMAS F. COLEMAN, BURTON S. GARBOW, JORGE J. MORE' + + IMPLICIT NONE + +! .. Scalar Arguments .. + INTEGER :: MAXCLQ, N +! .. +! .. Array Arguments .. + INTEGER :: INDCOL(*), INDROW(*), IPNTR(*), IWA1(0:N-1), IWA2(N), & + IWA3(N), IWA4(N), JPNTR(N+1), LIST(N), NDEG(N) +! .. +! .. Local Scalars .. + INTEGER :: IC, IP, IR, JCOL, JP, MINDEG, NUMDEG, NUMORD +! .. +! .. Intrinsic Functions .. + INTRINSIC MIN +! .. +! .. FIRST EXECUTABLE STATEMENT SLO +! .. +! INITIALIZATION BLOCK. + MINDEG = N + DO JP = 1, N + IWA1(JP-1) = 0 + IWA4(JP) = N + LIST(JP) = NDEG(JP) + MINDEG = MIN(MINDEG,NDEG(JP)) + END DO + +! CREATE A DOUBLY-LINKED LIST TO ACCESS THE DEGREES OF THE +! COLUMNS. THE POINTERS FOR THE LINKED LIST ARE AS FOLLOWS. + +! EACH UN-ORDERED COLUMN IC IS IN A LIST (THE DEGREE LIST) +! OF COLUMNS WITH THE SAME DEGREE. + +! IWA1(NUMDEG) IS THE FIRST COLUMN IN THE NUMDEG LIST +! UNLESS IWA1(NUMDEG) = 0. IN THIS CASE THERE ARE +! NO COLUMNS IN THE NUMDEG LIST. + +! IWA2(IC) IS THE COLUMN BEFORE IC IN THE DEGREE LIST +! UNLESS IWA2(IC) = 0. IN THIS CASE IC IS THE FIRST +! COLUMN IN THIS DEGREE LIST. + +! IWA3(IC) IS THE COLUMN AFTER IC IN THE DEGREE LIST +! UNLESS IWA3(IC) = 0. IN THIS CASE IC IS THE LAST +! COLUMN IN THIS DEGREE LIST. + +! IF IC IS AN UN-ORDERED COLUMN, THEN LIST(IC) IS THE +! DEGREE OF IC IN THE GRAPH INDUCED BY THE UN-ORDERED +! COLUMNS. IF JCOL IS AN ORDERED COLUMN, THEN LIST(JCOL) +! IS THE SMALLEST-LAST ORDER OF COLUMN JCOL. + + DO JP = 1, N + NUMDEG = NDEG(JP) + IWA2(JP) = 0 + IWA3(JP) = IWA1(NUMDEG) + IF (IWA1(NUMDEG) > 0) IWA2(IWA1(NUMDEG)) = JP + IWA1(NUMDEG) = JP + END DO + MAXCLQ = 0 + NUMORD = N + +! BEGINNING OF ITERATION LOOP. + +30 CONTINUE + +! MARK THE SIZE OF THE LARGEST CLIQUE +! FOUND DURING THE ORDERING. + IF (MINDEG+1 == NUMORD .AND. MAXCLQ == 0) MAXCLQ = NUMORD + +! CHOOSE A COLUMN JCOL OF MINIMAL DEGREE MINDEG. +40 CONTINUE + JCOL = IWA1(MINDEG) + IF (JCOL > 0) GOTO 50 + MINDEG = MINDEG + 1 + GOTO 40 +50 CONTINUE + LIST(JCOL) = NUMORD + NUMORD = NUMORD - 1 + +! TERMINATION TEST. + IF (NUMORD == 0) GOTO 80 + +! DELETE COLUMN JCOL FROM THE MINDEG LIST. + IWA1(MINDEG) = IWA3(JCOL) + IF (IWA3(JCOL) > 0) IWA2(IWA3(JCOL)) = 0 + +! FIND ALL COLUMNS ADJACENT TO COLUMN JCOL. + IWA4(JCOL) = 0 + +! DETERMINE ALL POSITIONS (IR,JCOL) WHICH CORRESPOND +! TO NON-ZEROES IN THE MATRIX. + DO JP = JPNTR(JCOL), JPNTR(JCOL+1) - 1 + IR = INDROW(JP) +! FOR EACH ROW IR, DETERMINE ALL POSITIONS (IR,IC) +! WHICH CORRESPOND TO NON-ZEROES IN THE MATRIX. + DO IP = IPNTR(IR), IPNTR(IR+1) - 1 + IC = INDCOL(IP) +! ARRAY IWA4 MARKS COLUMNS WHICH ARE ADJACENT TO +! COLUMN JCOL. + + IF (IWA4(IC) > NUMORD) THEN + IWA4(IC) = NUMORD +! UPDATE THE POINTERS TO THE CURRENT DEGREE LISTS. + NUMDEG = LIST(IC) + LIST(IC) = LIST(IC) - 1 + MINDEG = MIN(MINDEG,LIST(IC)) +! DELETE COLUMN IC FROM THE NUMDEG LIST. + IF (IWA2(IC) == 0) THEN + IWA1(NUMDEG) = IWA3(IC) + ELSE + IWA3(IWA2(IC)) = IWA3(IC) + END IF + IF (IWA3(IC) > 0) IWA2(IWA3(IC)) = IWA2(IC) +! ADD COLUMN IC TO THE NUMDEG-1 LIST. + IWA2(IC) = 0 + IWA3(IC) = IWA1(NUMDEG-1) + IF (IWA1(NUMDEG-1) > 0) IWA2(IWA1(NUMDEG-1)) = IC + IWA1(NUMDEG-1) = IC + END IF + END DO + END DO +! END OF ITERATION LOOP. + + GOTO 30 +80 CONTINUE + +! INVERT THE ARRAY LIST. + DO JCOL = 1, N + IWA2(LIST(JCOL)) = JCOL + END DO + LIST(1:N) = IWA2(1:N) + RETURN + + END SUBROUTINE SLO +!_______________________________________________________________________ + + SUBROUTINE SRTDAT(N,NNZ,INDROW,INDCOL,JPNTR,IWA) + +! GIVEN THE NON-ZERO ELEMENTS OF AN M BY N MATRIX A IN ARBITRARY +! ORDER AS SPECIFIED BY THEIR ROW AND COLUMN INDICES, THIS SUBROUTINE +! PERMUTES THESE ELEMENTS SO THAT THEIR COLUMN INDICES ARE IN +! NON-DECREASING ORDER. ON INPUT IT IS ASSUMED THAT THE ELEMENTS ARE +! SPECIFIED IN +! INDROW(K),INDCOL(K), K = 1,...,NNZ. +! ON OUTPUT THE ELEMENTS ARE PERMUTED SO THAT INDCOL IS IN +! NON-DECREASING ORDER. IN ADDITION, THE ARRAY JPNTR IS SET SO THAT +! THE ROW INDICES FOR COLUMN J ARE +! INDROW(K), K = JPNTR(J),...,JPNTR(J+1)-1. +! NOTE THAT THE VALUE OF M IS NOT NEEDED BY SRTDAT AND IS THEREFORE +! NOT PRESENT IN THE SUBROUTINE STATEMENT. +! THE SUBROUTINE STATEMENT IS +! SUBROUTINE SRTDAT(N,NNZ,INDROW,INDCOL,JPNTR,IWA) +! WHERE +! N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF COLUMNS OF A. +! NNZ IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF NON-ZERO ELEMENTS OF A. +! INDROW IS AN INTEGER ARRAY OF LENGTH NNZ. ON INPUT INDROW +! MUST CONTAIN THE ROW INDICES OF THE NON-ZERO ELEMENTS OF A. +! ON OUTPUT INDROW IS PERMUTED SO THAT THE CORRESPONDING +! COLUMN INDICES OF INDCOL ARE IN NON-DECREASING ORDER. +! INDCOL IS AN INTEGER ARRAY OF LENGTH NNZ. ON INPUT INDCOL +! MUST CONTAIN THE COLUMN INDICES OF THE NON-ZERO ELEMENTS +! OF A. ON OUTPUT INDCOL IS PERMUTED SO THAT THESE INDICES +! ARE IN NON-DECREASING ORDER. +! JPNTR IS AN INTEGER OUTPUT ARRAY OF LENGTH N + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE ROW INDICES IN THE OUTPUT +! INDROW. THE ROW INDICES FOR COLUMN J ARE +! INDROW(K), K = JPNTR(J),...,JPNTR(J+1)-1. +! NOTE THAT JPNTR(1) IS SET TO 1 AND THAT JPNTR(N+1)-1 +! IS THEN NNZ. +! IWA IS AN INTEGER WORK ARRAY OF LENGTH N. +! SUBPROGRAMS CALLED - NONE +! INTRINSIC - MAX +! ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JULY 1983. +! THOMAS F. COLEMAN, BURTON S. GARBOW, JORGE J. MORE' + + IMPLICIT NONE + +! .. Scalar Arguments .. + INTEGER :: N, NNZ +! .. +! .. Array Arguments .. + INTEGER :: INDCOL(NNZ), INDROW(NNZ), IWA(N), JPNTR(N+1) +! .. +! .. Local Scalars .. + INTEGER :: I, J, K, L +! .. +! .. Intrinsic Functions .. + INTRINSIC MAX +! .. +! .. FIRST EXECUTABLE STATEMENT SRTDAT +! .. +! STORE IN ARRAY IWA THE COUNTS OF NON-ZEROES IN THE COLUMNS. + IWA(1:N) = 0 + DO K = 1, NNZ + IWA(INDCOL(K)) = IWA(INDCOL(K)) + 1 + END DO + +! SET POINTERS TO THE START OF THE COLUMNS IN INDROW. + JPNTR(1) = 1 + DO J = 1, N + JPNTR(J+1) = JPNTR(J) + IWA(J) + IWA(J) = JPNTR(J) + END DO + K = 1 + +! BEGIN IN-PLACE SORT. +40 CONTINUE + J = INDCOL(K) + IF (K >= JPNTR(J)) THEN +! CURRENT ELEMENT IS IN POSITION. NOW EXAMINE THE +! NEXT ELEMENT OR THE FIRST UN-SORTED ELEMENT IN +! THE J-TH GROUP. + K = MAX(K+1,IWA(J)) + ELSE +! CURRENT ELEMENT IS NOT IN POSITION. PLACE ELEMENT +! IN POSITION AND MAKE THE DISPLACED ELEMENT THE +! CURRENT ELEMENT. + L = IWA(J) + IWA(J) = IWA(J) + 1 + I = INDROW(K) + INDROW(K) = INDROW(L) + INDCOL(K) = INDCOL(L) + INDROW(L) = I + INDCOL(L) = J + END IF + IF (K <= NNZ) GOTO 40 + RETURN + + END SUBROUTINE SRTDAT +!_______________________________________________________________________ + + SUBROUTINE FDJS(M,N,COL,IND,NPNTR,NGRP,NUMGRP,D,FJACD,FJAC) + +! GIVEN A CONSISTENT PARTITION OF THE COLUMNS OF AN M BY N +! JACOBIAN MATRIX INTO GROUPS, THIS SUBROUTINE COMPUTES +! APPROXIMATIONS TO THOSE COLUMNS IN A GIVEN GROUP. THE +! APPROXIMATIONS ARE STORED INTO EITHER A COLUMN-ORIENTED +! OR A ROW-ORIENTED PATTERN. +! A PARTITION IS CONSISTENT IF THE COLUMNS IN ANY GROUP +! DO NOT HAVE A NON-ZERO IN THE SAME ROW POSITION. +! APPROXIMATIONS TO THE COLUMNS OF THE JACOBIAN MATRIX IN A +! GIVEN GROUP CAN BE OBTAINED BY SPECIFYING A DIFFERENCE +! PARAMETER ARRAY D WITH D(JCOL) NON-ZERO IF AND ONLY IF +! JCOL IS A COLUMN IN THE GROUP, AND AN APPROXIMATION TO +! JAC*D WHERE JAC DENOTES THE JACOBIAN MATRIX OF A MAPPING F. +! D CAN BE DEFINED WITH THE FOLLOWING SEGMENT OF CODE. +! DO 10 JCOL = 1, N +! D(JCOL) = 0.0 +! IF (NGRP(JCOL) == NUMGRP) D(JCOL) = ETA(JCOL) +! 10 CONTINUE +! IN THE ABOVE CODE NUMGRP IS THE GIVEN GROUP NUMBER, +! NGRP(JCOL) IS THE GROUP NUMBER OF COLUMN JCOL, AND +! ETA(JCOL) IS THE DIFFERENCE PARAMETER USED TO +! APPROXIMATE COLUMN JCOL OF THE JACOBIAN MATRIX. +! SUITABLE VALUES FOR THE ARRAY ETA MUST BE PROVIDED. +! AS MENTIONED ABOVE, AN APPROXIMATION TO JAC*D MUST +! ALSO BE PROVIDED. FOR EXAMPLE, THE APPROXIMATION +! F(X+D) - F(X) +! CORRESPONDS TO THE FORWARD DIFFERENCE FORMULA AT X. +! THE SUBROUTINE STATEMENT IS +! SUBROUTINE FDJS(M,N,COL,IND,NPNTR,NGRP,NUMGRP,D,FJACD,FJAC) +! WHERE +! M IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF ROWS OF THE JACOBIAN MATRIX. +! N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF COLUMNS OF THE JACOBIAN MATRIX. +! COL IS A LOGICAL INPUT VARIABLE. IF COL IS SET TRUE, THEN THE +! JACOBIAN APPROXIMATIONS ARE STORED INTO A COLUMN-ORIENTED +! PATTERN. IF COL IS SET FALSE, THEN THE JACOBIAN +! APPROXIMATIONS ARE STORED INTO A ROW-ORIENTED PATTERN. +! IND IS AN INTEGER INPUT ARRAY WHICH CONTAINS THE ROW +! INDICES FOR THE NON-ZEROES IN THE JACOBIAN MATRIX +! IF COL IS TRUE, AND CONTAINS THE COLUMN INDICES FOR +! THE NON-ZEROES IN THE JACOBIAN MATRIX IF COL IS FALSE. +! NPNTR IS AN INTEGER INPUT ARRAY WHICH SPECIFIES THE +! LOCATIONS OF THE ROW INDICES IN IND IF COL IS TRUE, AND +! SPECIFIES THE LOCATIONS OF THE COLUMN INDICES IN IND IF +! COL IS FALSE. IF COL IS TRUE, THE INDICES FOR COLUMN J ARE +! IND(K), K = NPNTR(J),...,NPNTR(J+1)-1. +! IF COL IS FALSE, THE INDICES FOR ROW I ARE +! IND(K), K = NPNTR(I),...,NPNTR(I+1)-1. +! NOTE THAT NPNTR(N+1)-1 IF COL IS TRUE, OR NPNTR(M+1)-1 +! IF COL IS FALSE, IS THEN THE NUMBER OF NON-ZERO ELEMENTS +! OF THE JACOBIAN MATRIX. +! NGRP IS AN INTEGER INPUT ARRAY OF LENGTH N WHICH SPECIFIES +! THE PARTITION OF THE COLUMNS OF THE JACOBIAN MATRIX. +! COLUMN JCOL BELONGS TO GROUP NGRP(JCOL). +! NUMGRP IS A POSITIVE INTEGER INPUT VARIABLE SET TO A GROUP +! NUMBER IN THE PARTITION. THE COLUMNS OF THE JACOBIAN +! MATRIX IN THIS GROUP ARE TO BE ESTIMATED ON THIS CALL. +! D IS AN INPUT ARRAY OF LENGTH N WHICH CONTAINS THE +! DIFFERENCE PARAMETER VECTOR FOR THE ESTIMATE OF +! THE JACOBIAN MATRIX COLUMNS IN GROUP NUMGRP. +! FJACD IS AN INPUT ARRAY OF LENGTH M WHICH CONTAINS +! AN APPROXIMATION TO THE DIFFERENCE VECTOR JAC*D, +! WHERE JAC DENOTES THE JACOBIAN MATRIX. +! FJAC IS AN OUTPUT ARRAY OF LENGTH NNZ, WHERE NNZ IS THE +! NUMBER OF ITS NON-ZERO ELEMENTS. AT EACH CALL OF FDJS, +! FJAC IS UPDATED TO INCLUDE THE NON-ZERO ELEMENTS OF THE +! JACOBIAN MATRIX FOR THOSE COLUMNS IN GROUP NUMGRP. FJAC +! SHOULD NOT BE ALTERED BETWEEN SUCCESSIVE CALLS TO FDJS. +! ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JULY 1983. +! THOMAS F. COLEMAN, BURTON S. GARBOW, JORGE J. MORE' + + IMPLICIT NONE + +! .. Parameters .. + INTEGER, PARAMETER :: WP = KIND(0.0D0) +! .. +! .. Scalar Arguments .. + INTEGER :: M, N, NUMGRP + LOGICAL :: COL +! .. +! .. Array Arguments .. + REAL (WP) :: D(N), FJAC(*), FJACD(M) + INTEGER :: IND(*), NGRP(N), NPNTR(*) +! .. +! .. Local Scalars .. + INTEGER :: IP, IROW, JCOL, JP +! .. +! .. Intrinsic Functions .. +! INTRINSIC KIND +! .. +! .. FIRST EXECUTABLE STATEMENT FDJS +! .. +! COMPUTE ESTIMATES OF JACOBIAN MATRIX COLUMNS IN GROUP +! NUMGRP. THE ARRAY FJACD MUST CONTAIN AN APPROXIMATION +! TO JAC*D, WHERE JAC DENOTES THE JACOBIAN MATRIX AND D +! IS A DIFFERENCE PARAMETER VECTOR WITH D(JCOL) NON-ZERO +! IF AND ONLY IF JCOL IS A COLUMN IN GROUP NUMGRP. + IF (COL) THEN +! COLUMN ORIENTATION. + DO JCOL = 1, N + IF (NGRP(JCOL) == NUMGRP) THEN + DO JP = NPNTR(JCOL), NPNTR(JCOL+1) - 1 + IROW = IND(JP) + FJAC(JP) = FJACD(IROW)/D(JCOL) + END DO + END IF + END DO + ELSE +! ROW ORIENTATION. + DO IROW = 1, M + DO IP = NPNTR(IROW), NPNTR(IROW+1) - 1 + JCOL = IND(IP) + IF (NGRP(JCOL) == NUMGRP) THEN + FJAC(IP) = FJACD(IROW)/D(JCOL) + GOTO 40 + END IF + END DO +40 CONTINUE + END DO + END IF + RETURN + + END SUBROUTINE FDJS +!_______________________________________________________________________ + + SUBROUTINE DVDSM(M,N,NPAIRS,INDROW,INDCOL,NGRP,MAXGRP,MINGRP,INFO, & + IPNTR, JPNTR,IWA,LIWA) + +! THE PURPOSE OF DSM IS TO DETERMINE AN OPTIMAL OR NEAR- +! OPTIMAL CONSISTENT PARTITION OF THE COLUMNS OF A SPARSE +! M BY N MATRIX A. +! THE SPARSITY PATTERN OF THE MATRIX A IS SPECIFIED BY +! THE ARRAYS INDROW AND INDCOL. ON INPUT THE INDICES +! FOR THE NON-ZERO ELEMENTS OF A ARE +! INDROW(K),INDCOL(K), K = 1,2,...,NPAIRS. +! THE(INDROW,INDCOL) PAIRS MAY BE SPECIFIED IN ANY ORDER. +! DUPLICATE INPUT PAIRS ARE PERMITTED, BUT THE SUBROUTINE +! ELIMINATES THEM. +! THE SUBROUTINE PARTITIONS THE COLUMNS OF A INTO GROUPS +! SUCH THAT COLUMNS IN THE SAME GROUP DO NOT HAVE A +! NON-ZERO IN THE SAME ROW POSITION. A PARTITION OF THE +! COLUMNS OF A WITH THIS PROPERTY IS CONSISTENT WITH THE +! DIRECT DETERMINATION OF A. +! THE SUBROUTINE STATEMENT IS +! SUBROUTINE DVDSM(M,N,NPAIRS,INDROW,INDCOL,NGRP,MAXGRP,MINGRP, +! INFO,IPNTR,JPNTR,IWA,LIWA) +! WHERE +! M IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF ROWS OF A. +! N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER +! OF COLUMNS OF A. +! NPAIRS IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE +! NUMBER OF (INDROW,INDCOL) PAIRS USED TO DESCRIBE THE +! SPARSITY PATTERN OF A. +! INDROW IS AN INTEGER ARRAY OF LENGTH NPAIRS. ON INPUT INDROW +! MUST CONTAIN THE ROW INDICES OF THE NON-ZERO ELEMENTS OF A. +! ON OUTPUT INDROW IS PERMUTED SO THAT THE CORRESPONDING +! COLUMN INDICES ARE IN NON-DECREASING ORDER. THE COLUMN +! INDICES CAN BE RECOVERED FROM THE ARRAY JPNTR. +! INDCOL IS AN INTEGER ARRAY OF LENGTH NPAIRS. ON INPUT INDCOL +! MUST CONTAIN THE COLUMN INDICES OF THE NON-ZERO ELEMENTS OF +! A. ON OUTPUT INDCOL IS PERMUTED SO THAT THE CORRESPONDING +! ROW INDICES ARE IN NON-DECREASING ORDER. THE ROW INDICES +! CAN BE RECOVERED FROM THE ARRAY IPNTR. +! NGRP IS AN INTEGER OUTPUT ARRAY OF LENGTH N WHICH SPECIFIES +! THE PARTITION OF THE COLUMNS OF A. COLUMN JCOL BELONGS +! TO GROUP NGRP(JCOL). +! MAXGRP IS AN INTEGER OUTPUT VARIABLE WHICH SPECIFIES THE +! NUMBER OF GROUPS IN THE PARTITION OF THE COLUMNS OF A. +! MINGRP IS AN INTEGER OUTPUT VARIABLE WHICH SPECIFIES A LOWER +! BOUND FOR THE NUMBER OF GROUPS IN ANY CONSISTENT PARTITION +! OF THE COLUMNS OF A. +! INFO IS AN INTEGER OUTPUT VARIABLE SET AS FOLLOWS. FOR +! NORMAL TERMINATION INFO = 1. IF M, N, OR NPAIRS IS NOT +! POSITIVE OR LIWA IS LESS THAN MAX(M,6*N), THEN INFO = 0. +! IF THE K-TH ELEMENT OF INDROW IS NOT AN INTEGER BETWEEN +! 1 AND M OR THE K-TH ELEMENT OF INDCOL IS NOT AN INTEGER +! BETWEEN 1 AND N, THEN INFO = -K. +! IPNTR IS AN INTEGER OUTPUT ARRAY OF LENGTH M + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE COLUMN INDICES IN INDCOL. +! THE COLUMN INDICES FOR ROW I ARE +! INDCOL(K), K = IPNTR(I),...,IPNTR(I+1)-1. +! NOTE THAT IPNTR(M+1)-1 IS THEN THE NUMBER OF NON-ZERO +! ELEMENTS OF THE MATRIX A. +! JPNTR IS AN INTEGER OUTPUT ARRAY OF LENGTH N + 1 WHICH +! SPECIFIES THE LOCATIONS OF THE ROW INDICES IN INDROW. +! THE ROW INDICES FOR COLUMN J ARE +! INDROW(K), K = JPNTR(J),...,JPNTR(J+1)-1. +! NOTE THAT JPNTR(N+1)-1 IS THEN THE NUMBER OF NON-ZERO +! ELEMENTS OF THE MATRIX A. +! IWA IS AN INTEGER WORK ARRAY OF LENGTH LIWA. +! LIWA IS A POSITIVE INTEGER INPUT VARIABLE NOT LESS THAN +! MAX(M,6*N). +! MINPACK-SUPPLIED ... DEGR,IDO,NUMSRT,SEQ,SETR,SLO,SRTDAT +! INTRINSIC - MAX +! ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JULY 1983. +! THOMAS F. COLEMAN, BURTON S. GARBOW, JORGE J. MORE' + + IMPLICIT NONE + +! .. Scalar Arguments .. + INTEGER :: INFO, LIWA, M, MAXGRP, MINGRP, N, NPAIRS +! .. +! .. Array Arguments .. + INTEGER :: INDCOL(NPAIRS), INDROW(NPAIRS), IPNTR(M+1), IWA(LIWA), & + JPNTR(N+1), NGRP(N) +! .. +! .. Local Scalars .. + INTEGER :: I, IR, J, JP, K, MAXCLQ, NNZ, NUMGRP +! .. +! .. External Subroutines .. +! EXTERNAL DEGR, IDO, NUMSRT, SEQ, SETR, SLO, SRTDAT +! .. +! .. Intrinsic Functions .. + INTRINSIC MAX +! .. +! .. FIRST EXECUTABLE STATEMENT DSM +! .. +! CHECK THE INPUT DATA. + INFO = 0 + IF (M<1 .OR. N<1 .OR. NPAIRS<1 .OR. LIWAM .OR. INDCOL(K)<1 .OR. INDCOL(K)>N) & + RETURN + END DO + INFO = 1 + +! SORT THE DATA STRUCTURE BY COLUMNS. + CALL SRTDAT(N,NPAIRS,INDROW,INDCOL,JPNTR,IWA) + +! COMPRESS THE DATA AND DETERMINE THE NUMBER OF +! NON-ZERO ELEMENTS OF A. + IWA(1:M) = 0 + NNZ = 1 + DO J = 1, N + K = NNZ + DO JP = JPNTR(J), JPNTR(J+1) - 1 + IR = INDROW(JP) + IF (IWA(IR)/=J) THEN + INDROW(NNZ) = IR + NNZ = NNZ + 1 + IWA(IR) = J + END IF + END DO + JPNTR(J) = K + END DO + JPNTR(N+1) = NNZ + +! EXTEND THE DATA STRUCTURE TO ROWS. + CALL SETR(M,N,INDROW,JPNTR,INDCOL,IPNTR,IWA) + +! DETERMINE A LOWER BOUND FOR THE NUMBER OF GROUPS. + MINGRP = 0 + DO I = 1, M + MINGRP = MAX(MINGRP,IPNTR(I+1)-IPNTR(I)) + END DO + +! DETERMINE THE DEGREE SEQUENCE FOR THE INTERSECTION +! GRAPH OF THE COLUMNS OF A. + CALL DEGR(N,INDROW,JPNTR,INDCOL,IPNTR,IWA(5*N+1),IWA(N+1)) + +! COLOR THE INTERSECTION GRAPH OF THE COLUMNS OF A +! WITH THE SMALLEST-LAST (SL) ORDERING. + CALL SLO(N,INDROW,JPNTR,INDCOL,IPNTR,IWA(5*N+1),IWA(4*N+1),MAXCLQ, & + IWA(1),IWA(N+1),IWA(2*N+1),IWA(3*N+1)) + CALL SEQ(N,INDROW,JPNTR,INDCOL,IPNTR,IWA(4*N+1),NGRP,MAXGRP,IWA(N+1)) + MINGRP = MAX(MINGRP,MAXCLQ) + +! EXIT IF THE SMALLEST-LAST ORDERING IS OPTIMAL. + + IF (MAXGRP == MINGRP) RETURN + +! COLOR THE INTERSECTION GRAPH OF THE COLUMNS OF A +! WITH THE INCIDENCE-DEGREE(ID) ORDERING. + CALL IDO(M,N,INDROW,JPNTR,INDCOL,IPNTR,IWA(5*N+1),IWA(4*N+1),MAXCLQ, & + IWA(1),IWA(N+1),IWA(2*N+1),IWA(3*N+1)) + CALL SEQ(N,INDROW,JPNTR,INDCOL,IPNTR,IWA(4*N+1),IWA(1),NUMGRP,IWA(N+1)) + MINGRP = MAX(MINGRP,MAXCLQ) + +! RETAIN THE BETTER OF THE TWO ORDERINGS SO FAR. + IF (NUMGRP < MAXGRP) THEN + MAXGRP = NUMGRP + NGRP(1:N) = IWA(1:N) + +! EXIT IF THE INCIDENCE-DEGREE ORDERING IS OPTIMAL. + IF (MAXGRP == MINGRP) RETURN + END IF + +! COLOR THE INTERSECTION GRAPH OF THE COLUMNS OF A +! WITH THE LARGEST-FIRST (LF) ORDERING. + CALL NUMSRT(N,N-1,IWA(5*N+1),-1,IWA(4*N+1),IWA(2*N+1),IWA(N+1)) + CALL SEQ(N,INDROW,JPNTR,INDCOL,IPNTR,IWA(4*N+1),IWA(1),NUMGRP,IWA(N+1)) + +! RETAIN THE BEST OF THE THREE ORDERINGS AND EXIT. + IF (NUMGRP < MAXGRP) THEN + MAXGRP = NUMGRP + NGRP(1:N) = IWA(1:N) + END IF + RETURN + + END SUBROUTINE DVDSM +!_______________________________________________________________________ + + SUBROUTINE DGROUPDS(N,MAXGRPDS,NGRPDS,IGP,JGP) +! .. +! Construct the column grouping arrays IGP anf JGP needed by DVJACS28 +! from the DSM array NGRPDS. +! .. +! Input: +! +! N = the order of the matrix +! MAXGRPDS = number of groups (from DSM) +! NGRPDS = DSM output array +! Output: +! JGP = array of length N containing the column +! indices by groups +! IGP = pointer array of length NGRP + 1 to the +! locations in JGP of the beginning of +! each group +! .. + IMPLICIT NONE +! .. +! .. Scalar Arguments .. + INTEGER, INTENT (IN) :: N, MAXGRPDS +! .. +! .. Array Arguments .. + INTEGER, INTENT (IN) :: NGRPDS(MAXGRPDS) + INTEGER, INTENT (OUT) :: IGP(MAXGRPDS+1), JGP(N) +! .. +! .. Local Scalars .. + INTEGER :: IGRP, INDEX, JCOL +! .. +! .. FIRST EXECUTABLE STATEMENT DGROUPDS +! .. + IGP(1) = 1 + INDEX = 0 + DO IGRP = 1, MAXGRPDS + IGP(IGRP+1) = IGP(IGRP) + DO JCOL = 1, N + IF (NGRPDS(JCOL) == IGRP) THEN + IGP(IGRP+1) = IGP(IGRP+1) + 1 + INDEX = INDEX + 1 + JGP(INDEX) = JCOL + END IF + END DO + END DO + RETURN + + END SUBROUTINE DGROUPDS +!_______________________________________________________________________ + + SUBROUTINE JACSPDB(FCN,N,T,Y,F,FJAC,NRFJAC,YSCALE,FAC,IOPT, & + WK,LWK,IWK,LIWK,MAXGRP,NGRP,JPNTR,INDROW) + +! This is a modified version of JACSP which does not require the NGRP, +! JPNTR and INDROW sparse pointer arrays in the event a dense or a +! banded matrix is being processed. + +! Refer to the documentation for JACSP for a description of the +! parameters. If the banded option is used, IOPT(5) is used to +! input the lower bandwidth ML in this version. + + IMPLICIT NONE + +! .. Parameters .. + INTEGER, PARAMETER :: WP = KIND(0.0D0) +! .. +! .. Scalar Arguments .. + REAL (WP) :: T + INTEGER :: LIWK, LWK, MAXGRP, N, NRFJAC +! .. +! .. Array Arguments .. + REAL (WP) :: F(N), FAC(N), FJAC(NRFJAC,*), WK(LWK), Y(N), YSCALE(*) + INTEGER :: INDROW(*), IOPT(5), IWK(LIWK), JPNTR(*), NGRP(N) +! .. +! .. Subroutine Arguments .. + EXTERNAL FCN +! .. +! .. Local Scalars .. + REAL (WP) :: ADIFF, AY, DEL, DELM, DFMJ, DIFF, DMAX, EXPFMN, FACMAX, & + FACMIN, FJACL, FMJ, ONE, P125, P25, P75, P875, PERT, RDEL, RMNFDF, & + RMXFDF, SDF, SF, SGN, T1, T2, U, U3QRT, U7EGT, UEGT, UMEGT, UQRT, & + USQT, ZERO + INTEGER :: IDXL, IDXU, IFLAG1, IFLAG2, IRCMP, IRDEL, IROW, IROWB, & + IROWMX, ITRY, J, JCOL, JFIRST, JINC, JLAST, KT1, KT2, KT3, KT4, & + KT5, L, MBAND, ML, MU, NID1, NID2, NID3, NID4, NID5, NID6, & + NIFAC, NT2, NUMGRP +! KT5, L, MBAND, MEB1, ML, MU, NID1, NID2, NID3, NID4, NID5, NID6, & + LOGICAL :: DOTHISBLOCK + CHARACTER (80) :: MSG +! .. +! .. Intrinsic Functions .. +! INTRINSIC ABS, EPSILON, KIND, MAX, MIN, SIGN, SQRT + INTRINSIC ABS, EPSILON, MAX, MIN, SIGN, SQRT +! .. +! .. Data Statements .. + DATA PERT/2.0E+0_WP/, FACMAX/1.E-1_WP/, EXPFMN/.75E+0_WP/ + DATA ONE/1.0E0_WP/, ZERO/0.0E0_WP/ + DATA P125/.125E+0_WP/, P25/.25E+0_WP/, P75/.75E+0_WP/, P875/.875E+0_WP/ + DATA NIFAC/3/, NID1/10/, NID2/20/, NID3/30/, NID4/40/, NID5/50/ +! .. +! COMPUTE ALGORITHM AND MACHINE CONSTANTS. +! .. +! .. FIRST EXECUTABLE STATEMENT JACSPDB +! .. + IF (IOPT(1) == 0 .AND. MAXGRP /= N) THEN + MSG = 'JACSPDB requires that MAXGRP=N for a dense matrix.' + CALL XERRDV(MSG,1800,2,0,0,0,0,ZERO,ZERO) + END IF + + IF (IOPT(1) == 1) THEN + MBAND = IOPT(2) + ML = IOPT(5) + MU = MBAND - ML - 1 +! MEB1 = 2*ML + MU + END IF + + U = EPSILON(ONE) + USQT = SQRT(U) + UEGT = U**P125 + UMEGT = ONE/UEGT + UQRT = U**P25 + U3QRT = U**P75 + U7EGT = U**P875 + FACMIN = U**EXPFMN + + IF (IOPT(4) == 0) THEN + IOPT(4) = 1 + DO 10 J = 1, N + FAC(J) = USQT +10 CONTINUE + END IF + DO 20 J = 1, 50 + IWK(J) = 0 +20 CONTINUE + KT1 = NID1 + KT2 = NID2 + KT3 = NID3 + KT4 = NID4 + KT5 = NID5 + NID6 = LIWK + NT2 = 2*N + + DO NUMGRP = 1, MAXGRP +! COMPUTE AND SAVE THE INCREMENTS FOR THE COLUMNS IN GROUP NUMGRP. + IRCMP = 0 + ITRY = 0 + DO 30 J = NID5 + 1, NID6 + IWK(J) = 0 +30 CONTINUE +40 CONTINUE + +! Note: For banded in DVJAC: +! mba = min(mband,n) +! Groups: j=1,...,mba +! Columns in group j: jj=j,...,n by mband +! Rows in column jj: i1=max(jj-mu,1),...,i2=min(jj+ml,n) + + IF (IOPT(1) == 0 .OR. IOPT(1) == 2) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = NUMGRP + JLAST = N + JINC = MBAND + END IF + DO JCOL = JFIRST, JLAST, JINC + DOTHISBLOCK = .FALSE. + IF (IOPT(1) == 2) THEN + IF (NGRP(JCOL) == NUMGRP) DOTHISBLOCK = .TRUE. + ELSE + IF (IOPT(1) == 0) THEN + IF (JCOL == NUMGRP) DOTHISBLOCK = .TRUE. + ELSE + IF (IOPT(1) == 1) THEN + DOTHISBLOCK = .TRUE. + END IF + END IF + END IF + + IF (DOTHISBLOCK) THEN + WK(N+JCOL) = Y(JCOL) +! COMPUTE DEL. IF DEL IS TOO SMALL INCREASE FAC(JCOL) AND RECOMPUTE +! DEL. NIFAC ATTEMPTS ARE MADE TO INCREASE FAC(JCOL) AND FIND AN +! APPROPRIATE DEL. IF DEL CANT BE FOUND IN THIS MANNER, DEL IS COMPUTED +! WITH FAC(JCOL) SET TO THE SQUARE ROOT OF THE MACHINE PRECISION (USQT). +! IF DEL IS ZERO TO MACHINE PRECISION BECAUSE Y(JCOL) IS ZERO OR +! YSCALE(JCOL) IS ZERO, DEL IS SET TO USQT. + SGN = SIGN(ONE,F(JCOL)) + IRDEL = 0 + IF (IOPT(3) == 1) THEN + AY = ABS(YSCALE(JCOL)) + ELSE + AY = ABS(Y(JCOL)) + END IF + DELM = U7EGT*AY + + DO 50 J = 1, NIFAC + DEL = FAC(JCOL)*AY*SGN + IF (ABS(DEL) <= ZERO) THEN + DEL = USQT*SGN + IF (ITRY == 0) IWK(3) = IWK(3) + 1 + END IF + T1 = Y(JCOL) + DEL + DEL = T1 - Y(JCOL) + IF (ABS(DEL) < DELM) THEN + IF (J >= NIFAC) GOTO 50 + IF (IRDEL == 0) THEN + IRDEL = 1 + IWK(1) = IWK(1) + 1 + END IF + T1 = FAC(JCOL)*UMEGT + FAC(JCOL) = MIN(T1,FACMAX) + ELSE + GOTO 60 + END IF +50 END DO + + FAC(JCOL) = USQT + DEL = USQT*AY*SGN + IWK(2) = IWK(2) + 1 + IF (KT2 < NID3) THEN + KT2 = KT2 + 1 + IWK(KT2) = JCOL + END IF + +60 CONTINUE + WK(NT2+JCOL) = DEL + Y(JCOL) = Y(JCOL) + DEL + END IF + END DO + + IWK(7) = IWK(7) + 1 + CALL FCN(N,T,Y,WK) + + DO JCOL = JFIRST, JLAST, JINC + IF (IOPT(1) == 2) THEN + IF (NGRP(JCOL) == NUMGRP) Y(JCOL) = WK(N+JCOL) + ELSE + IF (IOPT(1) == 0) THEN + IF (JCOL == NUMGRP) Y(JCOL) = WK(N+JCOL) + ELSE + IF (IOPT(1) == 1) Y(JCOL) = WK(N+JCOL) + END IF + END IF + END DO + +! COMPUTE THE JACOBIAN ENTRIES FOR ALL COLUMNS IN NUMGRP. +! STORE ENTRIES ACCORDING TO SELECTED STORAGE FORMAT. +! USE LARGEST ELEMENTS IN A COLUMN TO DETERMINE SCALING +! INFORMATION FOR ROUNDOFF AND TRUNCATION ERROR TESTS. + + DO JCOL = JFIRST, JLAST, JINC + DOTHISBLOCK = .FALSE. + IF (IOPT(1) == 2) THEN + IF (NGRP(JCOL) == NUMGRP) DOTHISBLOCK = .TRUE. + ELSE + IF (IOPT(1) == 0) THEN + IF (JCOL == NUMGRP) DOTHISBLOCK = .TRUE. + ELSE + IF(IOPT(1) == 1) DOTHISBLOCK = .TRUE. + END IF + END IF + IF (DOTHISBLOCK) THEN + IF (IOPT(1) == 2) THEN + IDXL = JPNTR(JCOL) + IDXU = JPNTR(JCOL+1) - 1 + ELSE + IF (IOPT(1) == 0) THEN + IDXL = 1 + IDXU = N + ELSE + IF (IOPT(1) == 1) THEN + IDXL = MAX(JCOL-MU,1) + IDXU = MIN(JCOL+ML,N) + END IF + END IF + END IF + DMAX = ZERO + RDEL = ONE/WK(NT2+JCOL) + IROWMX = 1 + DO L = IDXL, IDXU + IF (IOPT(1) == 2) THEN + IROW = INDROW(L) + ELSE + IF (IOPT(1)==0 .OR. IOPT(1)==1) IROW = L + END IF + DIFF = WK(IROW) - F(IROW) + ADIFF = ABS(DIFF) + IF (ADIFF >= DMAX) THEN + IROWMX = IROW + DMAX = ADIFF + SF = F(IROW) + SDF = WK(IROW) + END IF + FJACL = DIFF*RDEL + IF (ITRY == 1) WK(IROW) = FJACL + + IF (IOPT(1) == 0) THEN + IF (ITRY == 1) WK(IROW+N) = FJAC(IROW,JCOL) + FJAC(IROW,JCOL) = FJACL + END IF + IF (IOPT(1) == 1) THEN + IROWB = IROW - JCOL + IOPT(2) + IF (ITRY == 1) WK(IROW+N) = FJAC(IROWB,JCOL) + FJAC(IROWB,JCOL) = FJACL +! IROWB = JCOL * MEB1 - ML + L +! IF (ITRY == 1) WK(IROW+N) = FJAC(IROWB,1) +! FJAC(IROWB,1) = FJACL + END IF + IF (IOPT(1) == 2) THEN + IF (ITRY == 1) WK(IROW+N) = FJAC(L,1) + FJAC(L,1) = FJACL + END IF + END DO + +! IF A COLUMN IS BEING RECOMPUTED (ITRY=1),THIS SECTION OF THE +! CODE PERFORMS AN EXTRAPOLATION TEST TO ENABLE THE CODE TO +! COMPUTE SMALL DERIVATIVES MORE ACCURATELY. THIS TEST IS ONLY +! PERFORMED ON THOSE COLUMNS WHOSE LARGEST DIFFERENCE IS CLOSE +! TO ZERO RELATIVE TO SCALE. + IF (ITRY == 1) THEN + IFLAG1 = 0 + IFLAG2 = 0 + DO 100 J = NID5 + 1, NID6 + IF (IWK(J) == JCOL) IFLAG1 = 1 +100 CONTINUE + IF (IFLAG1 == 1) THEN + IFLAG1 = 0 + T1 = WK(IROWMX+N) + T2 = WK(IROWMX)*FAC(JCOL) + IF (ABS(T2) < ABS(T1)*PERT) IFLAG2 = 1 + END IF + + IF (IFLAG2 == 1) THEN + IFLAG2 = 0 + T1 = FAC(JCOL)*FAC(JCOL) + FAC(JCOL) = MAX(T1,FACMIN) + DO L = IDXL, IDXU + IF (IOPT(1) == 2) THEN + IROW = INDROW(L) + ELSE + IF (IOPT(1)==0 .OR. IOPT(1)==1) IROW = L + END IF + FJACL = WK(IROW+N) + IF (IOPT(1) == 0) FJAC(IROW,JCOL) = FJACL + IF (IOPT(1) == 1) THEN + IROWB = IROW - JCOL + IOPT(2) + FJAC(IROWB,JCOL) = FJACL +! IROWB = JCOL * MEB1 - ML + L +! FJAC(IROWB,1) = FJACL + END IF + IF (IOPT(1) == 2) FJAC(L,1) = FJACL + END DO + END IF + END IF + + FMJ = ABS(SF) + DFMJ = ABS(SDF) + RMXFDF = MAX(FMJ,DFMJ) + RMNFDF = MIN(FMJ,DFMJ) + +! IF SCALE INFORMATION IS NOT AVAILABLE, PERFORM NO ROUNDOFF +! OR TRUNCATION ERROR TESTS. IF THE EXTRAPOLATION TEST HAS +! CAUSED FAC(JCOL) TO BE RESET TO ITS PREVIOUS VALUE (IAJAC=1) +! THEN NO FURTHER ROUNDOFF OR TRUNCATION ERROR TESTS ARE +! PERFORMED. +! IF (RMNFDF/=ZERO) THEN + IF (ABS(RMNFDF) > ZERO) THEN +! TEST FOR POSSIBLE ROUNDOFF ERROR (FIRST TEST) +! AND ALSO FOR POSSIBLE SERIOUS ROUNDOFF ERROR (SECOND TEST). + IF (DMAX <= (U3QRT*RMXFDF)) THEN + IF (DMAX <= (U7EGT*RMXFDF)) THEN + IF (ITRY == 0) THEN + T1 = SQRT(FAC(JCOL)) + FAC(JCOL) = MIN(T1,FACMAX) + IRCMP = 1 + IF (KT5 < NID6) THEN + KT5 = KT5 + 1 + IWK(KT5) = JCOL + END IF + IWK(4) = IWK(4) + 1 + IF (KT3 < NID4) THEN + KT3 = KT3 + 1 + IWK(KT3) = JCOL + END IF + ELSE + IWK(5) = IWK(5) + 1 + IF (KT4 < NID5) THEN + KT4 = KT4 + 1 + IWK(KT4) = JCOL + END IF + END IF + ELSE + T1 = UMEGT*FAC(JCOL) + FAC(JCOL) = MIN(T1,FACMAX) + END IF + END IF +! TEST FOR POSSIBLE TRUNCATION ERROR. + IF (DMAX > UQRT*RMXFDF) THEN + T1 = FAC(JCOL)*UEGT + FAC(JCOL) = MAX(T1,FACMIN) + IWK(8) = IWK(8) + 1 + IF (KT1 < NID2) THEN + KT1 = KT1 + 1 + IWK(KT1) = JCOL + END IF + END IF + ELSE + IWK(6) = IWK(6) + 1 + END IF + END IF + END DO + +! IF SERIOUS ROUNDOFF ERROR IS SUSPECTED, RECOMPUTE ALL +! COLUMNS IN GROUP NUMGRP. + IF (IRCMP == 1) THEN + IRCMP = 0 + ITRY = 1 + GOTO 40 + END IF + ITRY = 0 + END DO + RETURN + + END SUBROUTINE JACSPDB + +! End of JACSP routines. +!_______________________________________________________________________ +! *****MA48 build change point. Insert the MA48 Jacobian related +! routines DVNLSS48, DVSOLS48, DVPREPS48, and DVJACS48 here. +! filename = jacobian_for_ma48.f90. Insert the following line +! after the first line of this file: +! USE hsl_ma48_double +!_______________________________________________________________________ + +! *****LAPACK build change point. Insert the following line after the +! first line of this file: +! USE lapackd_f90_m +! Include the module lapackd_f90_m.f90 at the beginning of this file +! or as an external module. +!_______________________________________________________________________ + + END MODULE DVODE_F90_M +!_______________________________________________________________________ diff --git a/python/03_henon_heiles/vode/example1.f90 b/python/03_henon_heiles/vode/example1.f90 new file mode 100644 index 0000000..8e45a37 --- /dev/null +++ b/python/03_henon_heiles/vode/example1.f90 @@ -0,0 +1,117 @@ + MODULE EXAMPLE1 + +! DEMONSTRATION PROGRAM FOR THE DVODE_F90 PACKAGE. + +! The following is a simple example problem, with the coding +! needed for its solution by DVODE_F90. The problem is from +! chemical kinetics, and consists of the following three rate +! equations: +! dy1/dt = -.04d0*y1 + 1.d4*y2*y3 +! dy2/dt = .04d0*y1 - 1.d4*y2*y3 - 3.d7*y2**2 +! dy3/dt = 3.d7*y2**2 +! on the interval from t = 0.0d0 to t = 4.d10, with initial +! conditions y1 = 1.0d0, y2 = y3 = 0.0d0. The problem is stiff. + +! The following coding solves this problem with DVODE_F90, +! using a user supplied Jacobian and printing results at +! t = .4, 4.,...,4.d10. It uses ITOL = 2 and ATOL much smaller +! for y2 than y1 or y3 because y2 has much smaller values. At +! the end of the run, statistical quantities of interest are +! printed. (See optional output in the full DVODE description +! below.) Output is written to the file example1.dat. + + + CONTAINS + + SUBROUTINE FEX(NEQ,T,Y,YDOT) + IMPLICIT NONE + INTEGER, INTENT (IN) :: NEQ + DOUBLE PRECISION, INTENT (IN) :: T + DOUBLE PRECISION, INTENT (IN) :: Y(NEQ) + DOUBLE PRECISION, INTENT (OUT) :: YDOT(NEQ) + YDOT(1) = -.04D0*Y(1) + 1.D4*Y(2)*Y(3) + YDOT(3) = 3.E7*Y(2)*Y(2) + YDOT(2) = -YDOT(1) - YDOT(3) + RETURN + END SUBROUTINE FEX + + SUBROUTINE JEX(NEQ,T,Y,ML,MU,PD,NRPD) + IMPLICIT NONE + INTEGER, INTENT (IN) :: NEQ, ML, MU, NRPD + DOUBLE PRECISION, INTENT (IN) :: T + DOUBLE PRECISION, INTENT (IN) :: Y(NEQ) + DOUBLE PRECISION, INTENT (OUT) :: PD(NRPD,NEQ) + PD(1,1) = -.04D0 + PD(1,2) = 1.D4*Y(3) + PD(1,3) = 1.D4*Y(2) + PD(2,1) = .04D0 + PD(2,3) = -PD(1,3) + PD(3,2) = 6.E7*Y(2) + PD(2,2) = -PD(1,2) - PD(3,2) + RETURN + END SUBROUTINE JEX + + END MODULE EXAMPLE1 + +!****************************************************************** + + PROGRAM RUNEXAMPLE1 + + USE DVODE_F90_M + USE EXAMPLE1 + + IMPLICIT NONE + DOUBLE PRECISION ATOL, RTOL, T, TOUT, Y, RSTATS + INTEGER NEQ, ITASK, ISTATE, ISTATS, IOUT, IERROR, I + DIMENSION Y(3), ATOL(3), RSTATS(22), ISTATS(31) + + TYPE (VODE_OPTS) :: OPTIONS + + OPEN (UNIT=6,FILE='example1.dat') + IERROR = 0 + NEQ = 3 + Y(1) = 1.0D0 + Y(2) = 0.0D0 + Y(3) = 0.0D0 + T = 0.0D0 + TOUT = 0.4D0 + RTOL = 1.D-4 + ATOL(1) = 1.D-8 + ATOL(2) = 1.D-14 + ATOL(3) = 1.D-6 + ITASK = 1 + ISTATE = 1 +! OPTIONS = SET_OPTS(DENSE_J=.TRUE.,ABSERR_VECTOR=ATOL,RELERR=RTOL, & +! USER_SUPPLIED_JACOBIAN=.TRUE.) + OPTIONS = SET_NORMAL_OPTS(DENSE_J=.TRUE.,ABSERR_VECTOR=ATOL, & + RELERR=RTOL,USER_SUPPLIED_JACOBIAN=.TRUE.) + DO IOUT = 1, 12 + CALL DVODE_F90(FEX,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS,J_FCN=JEX) + CALL GET_STATS(RSTATS,ISTATS) + WRITE (6,90003) T, Y(1), Y(2), Y(3) + DO I = 1, NEQ + IF (Y(I)<0.0D0) IERROR = 1 + END DO + IF (ISTATE<0) THEN + WRITE (6,90004) ISTATE + STOP + END IF + TOUT = TOUT*10.0D0 + END DO + WRITE (6,90000) ISTATS(11), ISTATS(12), ISTATS(13), ISTATS(19), & + ISTATS(20), ISTATS(21), ISTATS(22) + IF (IERROR==1) THEN + WRITE (6,90001) + ELSE + WRITE (6,90002) + END IF +90000 FORMAT (/' No. steps =',I4,' No. f-s =',I4,' No. J-s =',I4, & + ' No. LU-s =',I4/' No. nonlinear iterations =', & + I4/' No. nonlinear convergence failures =', & + I4/' No. error test failures =',I4/) +90001 FORMAT (/' An error occurred.') +90002 FORMAT (/' No errors occurred.') +90003 FORMAT (' At t =',D12.4,' y =',3D14.6) +90004 FORMAT (///' Error halt: ISTATE =',I3) + STOP + END PROGRAM RUNEXAMPLE1 diff --git a/python/03_henon_heiles/vode/example2.f90 b/python/03_henon_heiles/vode/example2.f90 new file mode 100644 index 0000000..fd9abac --- /dev/null +++ b/python/03_henon_heiles/vode/example2.f90 @@ -0,0 +1,118 @@ + MODULE EXAMPLE2 + +! DEMONSTRATION PROGRAM FOR THE DVODE_F90 PACKAGE. + +! The following is a modification of the previous example +! program to illustrate root finding. The problem is from +! chemical kinetics, and consists of the following three +! rate equations: +! dy1/dt = -.04d0*y1 + 1.d4*y2*y3 +! dy2/dt = .04d0*y1 - 1.d4*y2*y3 - 3.d7*y2**2 +! dy3/dt = 3.d7*y2**2 +! on the interval from t = 0.0d0 to t = 4.d10, with initial +! conditions y1 = 1.0d0, y2 = y3 = 0.0d0. The problem is stiff. +! In addition, we want to find the values of t, y1, y2, +! and y3 at which: +! (1) y1 reaches the value 1.d-4, and +! (2) y3 reaches the value 1.d-2. + +! The following coding solves this problem with DVODE_F90 +! using an internally generated dense Jacobian and +! printing results at t = .4, 4., ..., 4.d10, and at the +! computed roots. It uses ITOL = 2 and ATOL much smaller +! for y2 than y1 or y3 because y2 has much smaller values. +! At the end of the run, statistical quantities of interest +! are printed (see optional outputs in the full description +! below). Output is written to the file example2.dat. + + CONTAINS + + SUBROUTINE FEX(NEQ,T,Y,YDOT) + IMPLICIT NONE + INTEGER, INTENT (IN) :: NEQ + DOUBLE PRECISION, INTENT (IN) :: T + DOUBLE PRECISION, INTENT (IN) :: Y(NEQ) + DOUBLE PRECISION, INTENT (OUT) :: YDOT(NEQ) + YDOT(1) = -0.04D0*Y(1) + 1.0D4*Y(2)*Y(3) + YDOT(3) = 3.0D7*Y(2)*Y(2) + YDOT(2) = -YDOT(1) - YDOT(3) + RETURN + END SUBROUTINE FEX + + SUBROUTINE GEX(NEQ,T,Y,NG,GOUT) + IMPLICIT NONE + INTEGER, INTENT (IN) :: NEQ, NG + DOUBLE PRECISION, INTENT (IN) :: T + DOUBLE PRECISION, INTENT (IN) :: Y(NEQ) + DOUBLE PRECISION, INTENT (OUT) :: GOUT(NG) + GOUT(1) = Y(1) - 1.0D-4 + GOUT(2) = Y(3) - 1.0D-2 + RETURN + END SUBROUTINE GEX + + END MODULE EXAMPLE2 + +!****************************************************************** + + PROGRAM RUNEXAMPLE2 + + USE DVODE_F90_M + USE EXAMPLE2 + + IMPLICIT NONE + INTEGER ITASK, ISTATE, NG, NEQ, IOUT, JROOT, ISTATS, IERROR, I + DOUBLE PRECISION ATOL, RTOL, RSTATS, T, TOUT, Y + DIMENSION Y(3), ATOL(3), RSTATS(22), ISTATS(31), JROOT(2) + + TYPE (VODE_OPTS) :: OPTIONS + + OPEN (UNIT=6,FILE='example2.dat') + IERROR = 0 + NEQ = 3 + Y(1) = 1.0D0 + Y(2) = 0.0D0 + Y(3) = 0.0D0 + T = 0.0D0 + TOUT = 0.4D0 + RTOL = 1.0D-4 + ATOL(1) = 1.0D-8 + ATOL(2) = 1.0D-10 + ATOL(3) = 1.0D-8 + ITASK = 1 + ISTATE = 1 + NG = 2 +! OPTIONS = SET_OPTS(DENSE_J=.TRUE.,RELERR=RTOL,ABSERR_VECTOR=ATOL, & +! NEVENTS=NG) + OPTIONS = SET_NORMAL_OPTS(DENSE_J=.TRUE.,RELERR=RTOL, & + ABSERR_VECTOR=ATOL,NEVENTS=NG) + DO 20 IOUT = 1, 12 +10 CONTINUE + CALL DVODE_F90(FEX,NEQ,Y,T,TOUT,ITASK,ISTATE,OPTIONS,G_FCN=GEX) + CALL GET_STATS(RSTATS,ISTATS,NG,JROOT) + WRITE (6,90000) T, Y(1), Y(2), Y(3) + DO I = 1, NEQ + IF (Y(I)<0.0D0) IERROR = 1 + END DO +90000 FORMAT (' At t =',D12.4,' Y =',3D14.6) + IF (ISTATE<0) GO TO 30 + IF (ISTATE==2) GO TO 20 + WRITE (6,90001) JROOT(1), JROOT(2) +90001 FORMAT (5X,' The above line is a root, JROOT =',2I5) + ISTATE = 2 + GO TO 10 +20 TOUT = TOUT*10.0D0 + WRITE (6,90002) ISTATS(11), ISTATS(12), ISTATS(13), ISTATS(10) + IF (IERROR==1) THEN + WRITE (6,90003) + ELSE + WRITE (6,90004) + END IF +90002 FORMAT (/' No. steps =',I4,' No. f-s =',I4,' No. J-s =',I4, & + ' No. g-s =',I4/) + STOP +30 WRITE (6,90005) ISTATE +90003 FORMAT (/' An error occurred.') +90004 FORMAT (/' No errors occurred.') +90005 FORMAT (///' Error halt.. ISTATE =',I3) + STOP + END PROGRAM RUNEXAMPLE2 diff --git a/python/03_henon_heiles/vode/quickstart.tex b/python/03_henon_heiles/vode/quickstart.tex new file mode 100644 index 0000000..9b388be --- /dev/null +++ b/python/03_henon_heiles/vode/quickstart.tex @@ -0,0 +1,95 @@ +\input amstex +\NoBlackBoxes +\nopagenumbers + +\centerline{{\bf Quickstart Instructions for DVODE.F90}} +\vskip 0.50in + +\item{ $\bullet$ } {\bf Compiling DVODE.F90} +\vskip .10cm +\itemitem{ 1. } +DVODE.F90 is a Fortran 90 extension of the f77 DVODE.F +ODE solver of Brown, Byrne, and Hindmarsh from the +LLNL ODEPACK suite. +It incoporates additional options from other solvers +in ODEPACK most notably ones from LSODES and LSODAR. +It also incoporates a Fortran 90 translation of the +Harwell MA28 routines for sparse Jacobians. +The three Fortran 90 modules that constitute DVODE.F90 are +contained in the distribution file vode\_double\_m.f90. +Compile this file using commands appropriate for your +Fortran 90 compiler. +\vskip .05cm +\itemitem{ 2. } +Compile, link, and execute the program example1.f90. The last +line in the output file example1.dat should be: +``No errors occurred.'' +Compare your results in example1.dat with those in +Save\_example1.dat. +\vskip .05cm +\itemitem{ 3. } +Repeat Step 2 for the program example2.f90. +Compare your results in example2.dat with those in +Save\_example2.dat. +\vskip .05cm +\itemitem{ 4. } +Refer to the demonstration programs available from the DVODE.F90 +support page for examples which illustrate the various features +and options available in DVODE.F90. +If your compiler complains about argument and array declarations +in the user subroutines, refer to example1.f90 and example2.f90 +to see how to declare the INTENT of the subroutine arguments. +\vskip .025in + +\item{ $\bullet$ } {\bf Documentation} +\vskip .10cm +\itemitem{ 1. } +The distribution file vode\_double\_m.f90 contains a detailed +documentation prologue for DVODE.F90. +\vskip .05cm +\itemitem{ 2. } +The DVODE.F90 support page contains a break down of the +documentation prologue by major task. +\vskip .025in + +\item{ $\bullet$ } {\bf Changing Working Precision} +\vskip .10cm +\itemitem{ 1. } +The distribution file contains a double precision version of +DVODE.F90. If you wish to use it in single precision change +the selected precision in a statement at the very beginning +of the file. +\vskip .025in + +\item{ $\bullet$ } {\bf Web Support Page} +\vskip .10cm +\itemitem{ 1. } +The source code, demonstration programs, and other +material for DVODE.F90 is located at: +\vskip .05cm +\itemitem{ } +\hskip .50in http://www.radford.edu/$\sim$thompson/vodef90web.html +\vskip .025in + +\item{ $\bullet$ } {\bf Contact Information} +\vskip .10cm +\itemitem{ 1. } +Please contact the authors if you have any questions +concerning DVODE.F90: +\vskip .01cm +\hskip .50in G.D. Byrne +\vskip .01cm +\hskip .50in gdbyrne847\@yahoo.com +\vskip .05cm +\hskip .50in S. Thompson +\vskip .01cm +\hskip .50in thompson\@radford.edu +\vskip .025in +\itemitem{ 2. } +Note that Alan Hindmarsh is not involved with the distribution +or support of DVODE.F90. {\it Please} do not bug him with +questions about DVODE.F90. + +\vfil + +\bye diff --git a/python/03_henon_heiles/vode/vode_f90_license.txt b/python/03_henon_heiles/vode/vode_f90_license.txt new file mode 100644 index 0000000..60e3757 --- /dev/null +++ b/python/03_henon_heiles/vode/vode_f90_license.txt @@ -0,0 +1,40 @@ +! +! If your usage of this software requires a license, the following +! BSD-like license is applicable. If it is not appropriate for your +! usage, please contact the authors. +! +! Copyright (c) 2009 +! S. Thompson, Radford University, Radford, VA +! L.F. Shampine, Southern Methodist University, Dallas, TX +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! Redistributions of source code must retain the above copyright +! notice, this list of conditions, and the following disclaimer. +! +! Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer +! in the documentation and/or other materials provided with the +! distribution. +! +! Neither the name of the organizations nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + diff --git a/python/04_standard_map/Makefile b/python/04_standard_map/Makefile new file mode 100644 index 0000000..e4e38fc --- /dev/null +++ b/python/04_standard_map/Makefile @@ -0,0 +1,13 @@ +FC = gfortran +FFLAGS = -Wall -march=native -O2 -g -fbacktrace +PYTHON = python3 +NAME = kernels + +all: $(NAME).f90 + $(PYTHON) -m numpy.f2py -m $(NAME) -c $(NAME).f90 --f90flags='$(FFLAGS)' -lgomp + +$(NAME).f90: init_func.py + $(PYTHON) init_func.py + +clean: + rm -f *.x *.so *.o *.mod *.pyf *.pyd *.dylib *.dll $(NAME).f90 diff --git a/python/04_standard_map/func.py b/python/04_standard_map/func.py new file mode 100644 index 0000000..ca3da7b --- /dev/null +++ b/python/04_standard_map/func.py @@ -0,0 +1,286 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Apr 21 16:47:00 2020 + +@author: Katharina Rath +""" + +import numpy as np +from scipy.optimize import newton, bisect +from scipy.linalg import solve_triangular +import scipy +from sklearn.metrics import mean_squared_error +from scipy.integrate import solve_ivp + +from kernels import * # implicit SympGPR +#from kernels_expl_per_q_sq_p import * #explicit SympGPR +def f_kern(x, y, x0, y0, l): + return kern_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdx0(x, y, x0, y0, l): + return d2kdxdx0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydy0(x, y, x0, y0, l): + return d2kdydy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdy0(x, y, x0, y0, l): + return d2kdxdy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydx0(x, y, x0, y0, l): + return d2kdxdy0(x, y, x0, y0, l) + +def build_K(xin, x0in, hyp, K): + # set up covariance matrix with derivative observations, Eq. (38) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0]//2 + N0 = K.shape[1]//2 + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = d2kdxdx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,lk] = d2kdxdy0( + x0[lk], y0[lk], x[k], y[k], l) + K[k,N0+lk] = d2kdydx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,N0+lk] = d2kdydy0( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def buildKreg(xin, x0in, hyp, K): + # set up "usual" covariance matrix for GP on regular grid (q,p) + # print(hyp) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0] + N0 = K.shape[1] + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = f_kern( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + + +def gpsolve(Ky, ft): + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_triangular( + L.T, solve_triangular(L, ft, lower=True, check_finite=False), + lower=False, check_finite=False) + + return L, alpha + +# compute log-likelihood according to RW, p.19 +def solve_cholesky(L, b): + return solve_triangular( + L.T, solve_triangular(L, b, lower=True, check_finite=False), + lower=False, check_finite=False) + +def nll_chol_reg(hyp, x, y, N): + K = np.empty((N, N)) + buildKreg(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + ret = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + return ret +# negative log-posterior +def nll_chol(hyp, x, y, N): + K = np.empty((N, N)) + build_K(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + ret = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + return ret + +def build_K_expl(xin, x0in, hyp, K): + # set up covariance matrix with derivative observations, Eq. (38) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0]//2 + N0 = K.shape[1]//2 + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = d2kdxdx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,lk] = d2kdxdy0( + x0[lk], y0[lk], x[k], y[k], l) + K[k,N0+lk] = d2kdydx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,N0+lk] = d2kdydy0( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def nll_expl(hyp, x, y, N, ind): + K = np.empty((N, N)) + if ind == 0: + build_K_expl(x, x, np.hstack((hyp[0], 0, hyp[1])), K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + Ky = Ky[0:len(y), 0:len(y)] + else: + build_K_expl(x, x, np.hstack((0, hyp[0], hyp[1])), K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + Ky = Ky[len(y):2*len(y), len(y):2*len(y)] + + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + nlp_val = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + + return nlp_val + +def guessP(x, y, hypp, xtrainp, ztrainp, Kyinvp, N): + Ntest = 1 + Kstar = np.empty((Ntest, int(len(xtrainp)/2))) + buildKreg(np.hstack((x,y)), xtrainp, hypp, Kstar) + Ef = Kstar.dot(Kyinvp.dot(ztrainp)) + return Ef + +def calcQ(x,y, xtrain, l, Kyinv, ztrain): + # get \Delta q from GP on mixed grid. + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack(([x], [y])), l, Kstar) + qGP = Kstar.T.dot(Kyinv.dot(ztrain)) + dq = qGP[1] + return dq + +def Pnewton(P, x, y, l, xtrain, Kyinv, ztrain): + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack((x, P)), l, Kstar) + pGP = Kstar.T.dot(Kyinv.dot(ztrain)) + f = pGP[0] - y + P + # print(pGP[0]) + return f + +def calcP(x,y, l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest): + # as P is given in an implicit relation, use newton to solve for P (Eq. (42)) + # use the GP on regular grid (q,p) for a first guess for P + pgss = guessP([x], [y], hypp, xtrainp, ztrainp, Kyinvp, Ntest) + res, r = newton(Pnewton, pgss, full_output=True, maxiter=205000, disp=True, + args = (np.array([x]), np.array ([y]), l, xtrain, Kyinv, ztrain)) + return res + +def calcP_expl(x,y, l, xtrain, ztrain, Kyinv): + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack((x,y)), l, Kstar) + pGP = Kstar.T.dot(Kyinv.dot(ztrain)) + res = -pGP[0] + y + return res + + + # Application of symplectic map + #init + pmap = np.zeros([nm, Ntest]) + pdiff = np.zeros([nm, Ntest]) + qmap = np.zeros([nm, Ntest]) + #set initial conditions + pmap[0,:] = P0map + qmap[0,:] = Q0map + pdiff[0,:] = P0map + # loop through all test points and all time steps + for i in range(0,nm-1): + for k in range(0, Ntest): + # set new P including Newton for implicit Eq. (42) + # print('nm = ', i, 'N_i = ',k) + # print('pmap = ', pmap[i, k]) + pmap[i+1, k] = calcP(qmap[i,k], pmap[i, k], l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest) + # if np.mod(pmap[i+1, k], 2*np.pi) > 0.0: + pdiff[i+1, k] = pdiff[i, k] + (pmap[i+1, k] - pmap[i, k]) + # pmap[i+1, k] = np.mod(pmap[i+1, k], 2*np.pi) # only for standard map + # print('nm = ', i, 'N_i = ', k, 'pdiff =', pdiff[i+1,k], 'pdiff -1 = ', pdiff[i, k], 'dp =', (pmap[i+1, k] - pmap[i, k])) + # else: + # pdiff[i+1, k] = pdiff[i,k] + (pmap[i+1, k] - pmap[i, k])# + # pmap[i+1, k] = np.mod(pmap[i+1, k], 2*np.pi) + # print('nm = ', i, 'N_i = ', k, 'pdiff =', pdiff[i+1,k], 'pdiff -1 = ', pdiff[i, k], 'dp =', (pmap[i+1, k] - pmap[i, k])) + + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + # then: set new Q via calculating \Delta q and adding q (Eq. (43)) + dqmap = calcQ(qmap[i,k], pmap[i+1,k], xtrain, l, Kyinv, ztrain) + # print(dqmap) + # qmap[i+1, k] = np.mod(dqmap + qmap[i, k], 2.0*np.pi) + # qmap[i+1, k] = np.mod(dqmap + qmap[i, k], 1) + qmap[i+1, k] = dqmap + qmap[i, k] + return qmap, pmap, pdiff +def applymap(nm, Ntest, l, hypp, Q0map, P0map, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv): + # Application of symplectic map + #init + pmap = np.zeros([nm, Ntest]) + pdiff = np.zeros([nm, Ntest]) + qmap = np.zeros([nm, Ntest]) + #set initial conditions + pmap[0,:] = P0map + qmap[0,:] = Q0map + pdiff[0,:] = P0map + # loop through all test points and all time steps + for i in range(0,nm-1): + for k in range(0, Ntest): + # set new P including Newton for implicit Eq. (42) + # print('nm = ', i, 'N_i = ',k) + # print('pmap = ', pmap[i, k]) + pmap[i+1, k] = calcP(qmap[i,k], pmap[i, k], l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest) + # if np.mod(pmap[i+1, k], 2*np.pi) > 0.0: + pdiff[i+1, k] = pdiff[i, k] + (pmap[i+1, k] - pmap[i, k]) + pmap[i+1, k] = np.mod(pmap[i+1, k], 2*np.pi) # only for standard map + # print('nm = ', i, 'N_i = ', k, 'pdiff =', pdiff[i+1,k], 'pdiff -1 = ', pdiff[i, k], 'dp =', (pmap[i+1, k] - pmap[i, k])) + # else: + # pdiff[i+1, k] = pdiff[i,k] + (pmap[i+1, k] - pmap[i, k])# + # pmap[i+1, k] = np.mod(pmap[i+1, k], 2*np.pi) + # print('nm = ', i, 'N_i = ', k, 'pdiff =', pdiff[i+1,k], 'pdiff -1 = ', pdiff[i, k], 'dp =', (pmap[i+1, k] - pmap[i, k])) + + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + # then: set new Q via calculating \Delta q and adding q (Eq. (43)) + dqmap = calcQ(qmap[i,k], pmap[i+1,k], xtrain, l, Kyinv, ztrain) + # print(dqmap) + qmap[i+1, k] = np.mod(dqmap + qmap[i, k], 2.0*np.pi) + # qmap[i+1, k] = np.mod(dqmap + qmap[i, k], 1) + # qmap[i+1, k] = dqmap + qmap[i, k] + return qmap, pmap, pdiff + +def applymap_expl(nm, Ntest, l, Q0map, P0map, xtrain, ztrain, Kyinv): + # Application of symplectic map + #init + pmap = np.zeros([nm, Ntest]) + pdiff = np.zeros([nm, Ntest]) + qmap = np.zeros([nm, Ntest]) + #set initial conditions + pmap[0,:] = P0map + qmap[0,:] = Q0map + pdiff[0,:] = P0map + + # loop through all test points and all time steps + for i in range(0,nm-1): + for k in range(0, Ntest): + # set new P including Newton for implicit Eq (42) + # print('nm = ', i, 'N_i = ',k) + pmap[i+1, k] = calcP_expl(qmap[i,k], pmap[i, k], l, xtrain, ztrain, Kyinv) + pdiff[i+1, k] = pdiff[i, k] + (pmap[i+1, k] - pmap[i, k]) + pmap[i+1, k] = np.mod(pmap[i+1, k], 2*np.pi) + # pmap[i+1, k] = np.mod(pmap[i+1, k], 2*np.pi) + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + # then: set new Q via calculating \Delta q and adding q (Eq. (43)) + dqmap = calcQ(qmap[i,k], pmap[i+1,k], xtrain, l, Kyinv, ztrain) + # print(dqmap) + # qmap[i+1, k] = np.mod(dqmap + qmap[i, k], 2.0*np.pi) + qmap[i+1, k] = dqmap + qmap[i, k] + return qmap, pmap, pdiff + diff --git a/python/04_standard_map/init_func.py b/python/04_standard_map/init_func.py new file mode 100644 index 0000000..15ce043 --- /dev/null +++ b/python/04_standard_map/init_func.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed May 6 14:50:32 2020 + +@author: ert +""" +#%% +from sympy import * +from sympy.utilities.autowrap import autowrap, ufuncify +from sympy.utilities.lambdify import lambdastr, lambdify, implemented_function +import shutil + +tmp = '.' +#try: +# shutil.rmtree(tmp) +#except: +# pass + +#%% # Kernel functions (kernel, 1st and second derivatives) +x, y, xa, ya, xb, yb, l, lx, ly = symbols(r'x y x_a y_a x_b, y_b, l, lx, ly') + + +kern_sqexp = exp(-x**2/(2.0*l**2)) +kern_sqexp_sin = exp(-sin(0.5*x)**2/(2.0*l**2)) + +kern_y = kern_sqexp.subs(x, ya-yb).subs(l, ly) +kern_x = kern_sqexp_sin.subs(x, xa-xb).subs(l, lx) +kern = (kern_x*kern_y).simplify() + +dkdxa = diff(kern, xa).simplify() +dkdxb = diff(kern, xb).simplify() +dkdya = diff(kern, ya).simplify() +dkdyb = diff(kern, yb).simplify() +dkdxadxb = diff(kern, xa, xb).simplify() +dkdyadyb = diff(kern, ya, yb).simplify() +dkdxadyb = diff(kern, xa, yb).simplify() + +d3kdxdx0dy0 = diff(kern, xa, xb, yb).simplify() +d3kdydy0dy0 = diff(kern, ya, yb, yb).simplify() +d3kdxdy0dy0 = diff(kern, xa, yb, yb).simplify() + +dkdlx = diff(kern, lx).simplify() +dkdly = diff(kern, ly).simplify() + +d3kdxdx0dlx = diff(kern, xa, xb, lx).simplify() +d3kdydy0dlx = diff(kern, ya, yb, lx).simplify() +d3kdxdy0dlx = diff(kern, xa, yb, lx).simplify() + +d3kdxdx0dly = diff(kern, xa, xb, ly).simplify() +d3kdydy0dly = diff(kern, ya, yb, ly).simplify() +d3kdxdy0dly = diff(kern, xa, yb, ly).simplify() + +#%% +from sympy.utilities.codegen import codegen +seq = (xa, ya, xb, yb, lx, ly) +[(name, code), (h_name, header)] = \ +codegen([('kern_num', kern), + ('dkdx_num', dkdxa), + ('dkdy_num', dkdya), + ('dkdx0_num', dkdxb), + ('dkdy0_num', dkdyb), + ('d2kdxdx0_num', dkdxadxb), + ('d2kdydy0_num', dkdyadyb), + ('d2kdxdy0_num', dkdxadyb), + ('d3kdxdx0dy0_num', d3kdxdx0dy0), + ('d3kdydy0dy0_num', d3kdydy0dy0), + ('d3kdxdy0dy0_num', d3kdxdy0dy0), + ('dkdlx_num', dkdlx), + ('dkdly_num', dkdly), + ('d3kdxdx0dlx_num', d3kdxdx0dlx), + ('d3kdydy0dlx_num', d3kdydy0dlx), + ('d3kdxdy0dlx_num', d3kdxdy0dlx), + ('d3kdxdx0dly_num', d3kdxdx0dly), + ('d3kdydy0dly_num', d3kdydy0dly), + ('d3kdxdy0dly_num', d3kdxdy0dly), + ], "F95", "kernels", + argument_sequence=seq, + header=False, empty=False) +with open(name, 'w') as f: + f.write(code) diff --git a/python/04_standard_map/kernels.f90 b/python/04_standard_map/kernels.f90 new file mode 100644 index 0000000..a6a970b --- /dev/null +++ b/python/04_standard_map/kernels.f90 @@ -0,0 +1,231 @@ +REAL*8 function kern_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +kern_num = exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0*sin(0.5d0*x_a - 0.5d0 & + *x_b)**2/lx**2) +end function +REAL*8 function dkdx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx_num = -0.5d0*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a & + - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy_num = 1.0d0*(-y_a + y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2* & + sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function dkdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx0_num = 0.5d0*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a & + - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy0_num = 1.0d0*(y_a - y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2* & + sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function d2kdxdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdx0_num = 0.25d0*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b) - sin(0.5d0*x_a & + - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp(0.5d0*(-lx**2* & + (y_a - y_b)**2 - ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly** & + 2))/lx**4 +end function +REAL*8 function d2kdydy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdydy0_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp(0.5d0*(-lx**2*(y_a - & + y_b)**2 - ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly & + **4 +end function +REAL*8 function d2kdxdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdy0_num = -0.5d0*(y_a - y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly & + **2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - & + 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/(lx**2*ly**2) +end function +REAL*8 function d3kdxdx0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dy0_num = 0.25d0*(y_a - y_b)*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b) - & + sin(0.5d0*x_a - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp( & + -0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)** & + 2)/(lx**2*ly**2))/(lx**4*ly**2) +end function +REAL*8 function d3kdydy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dy0_num = (3.0d0*ly**2 - (y_a - y_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx & + **2*ly**2))/ly**6 +end function +REAL*8 function d3kdxdy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dy0_num = 0.5d0*(1.0d0*ly**2 - (y_a - y_b)**2)*exp(-0.5d0*(lx**2 & + *(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly & + **2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/(lx** & + 2*ly**4) +end function +REAL*8 function dkdlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdlx_num = 1.0d0*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0*sin(0.5d0*x_a & + - 0.5d0*x_b)**2/lx**2)*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**3 +end function +REAL*8 function dkdly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdly_num = 1.0d0*(y_a - y_b)**2*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0 & + *sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)/ly**3 +end function +REAL*8 function d3kdxdx0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dlx_num = (-0.5d0*lx**4*cos(1.0d0*x_a - 1.0d0*x_b) + lx**2*( & + 0.75d0*cos(1.0d0*x_a - 1.0d0*x_b) + 0.5d0)*sin(0.5d0*x_a - 0.5d0* & + x_b)**2 - 0.25d0*sin(0.5d0*x_a - 0.5d0*x_b)**4*cos(0.5d0*x_a - & + 0.5d0*x_b)**2)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0 & + *x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/lx**7 +end function +REAL*8 function d3kdydy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dlx_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp(-0.5d0*(lx**2*(y_a & + - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))* & + sin(0.5d0*x_a - 0.5d0*x_b)**2/(lx**3*ly**4) +end function +REAL*8 function d3kdxdy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dlx_num = (lx**2 - 0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2)*(y_a - & + y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - & + 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/(lx**5*ly**2) +end function +REAL*8 function d3kdxdx0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dly_num = 0.25d0*(y_a - y_b)**2*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b & + ) - sin(0.5d0*x_a - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)* & + exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0* & + x_b)**2)/(lx**2*ly**2))/(lx**4*ly**3) +end function +REAL*8 function d3kdydy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dly_num = (-2.0d0*ly**4 + 5.0d0*ly**2*(y_a - y_b)**2 - 1.0d0*( & + y_a - y_b)**4)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0 & + *x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**7 +end function +REAL*8 function d3kdxdy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dly_num = (ly**2 - 0.5d0*(y_a - y_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx & + **2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b) & + /(lx**2*ly**5) +end function diff --git a/python/04_standard_map/kernels.py b/python/04_standard_map/kernels.py new file mode 100644 index 0000000..06f1f13 --- /dev/null +++ b/python/04_standard_map/kernels.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun Aug 9 22:30:35 2020 + +@author: manal +""" + +def __bootstrap__(): + global __bootstrap__, __loader__, __file__ + import sys, pkg_resources, imp + __file__ = pkg_resources.resource_filename(__name__,'kernels.cpython-38-x86_64-linux-gnu.so') + __loader__ = None; del __bootstrap__, __loader__ + imp.load_dynamic(__name__,__file__) +__bootstrap__() \ No newline at end of file diff --git a/python/04_standard_map/kernels_expl_per_q_sq_p.f90 b/python/04_standard_map/kernels_expl_per_q_sq_p.f90 new file mode 100644 index 0000000..d0c12b0 --- /dev/null +++ b/python/04_standard_map/kernels_expl_per_q_sq_p.f90 @@ -0,0 +1,206 @@ +REAL*8 function kern_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +kern_num = exp((-0.5d0*y_a**2 + y_a*y_b - 0.5d0*y_b**2)/ly**2) + exp( & + -0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2) +end function +REAL*8 function dkdx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx_num = -1.0d0/2.0d0*exp(-0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)* & + sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy_num = (-y_a + y_b)*exp(0.5d0*(-y_a**2 + 2.0d0*y_a*y_b - y_b**2)/ly & + **2)/ly**2 +end function +REAL*8 function dkdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx0_num = (1.0d0/2.0d0)*exp(-0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2 & + )*sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy0_num = (y_a - y_b)*exp(0.5d0*(-y_a**2 + 2.0d0*y_a*y_b - y_b**2)/ly & + **2)/ly**2 +end function +REAL*8 function d2kdxdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdx0_num = ((1.0d0/4.0d0)*lx**2*cos(x_a - x_b) - 1.0d0/16.0d0*sin( & + x_a - x_b)**2)*exp(-0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)/lx & + **4 +end function +REAL*8 function d2kdydy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdydy0_num = (ly**2 - (y_a - y_b)**2)*exp(0.5d0*(-y_a**2 + 2.0d0*y_a* & + y_b - y_b**2)/ly**2)/ly**4 +end function +REAL*8 function d2kdxdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdy0_num = 0 +end function +REAL*8 function d3kdxdx0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dy0_num = 0 +end function +REAL*8 function d3kdydy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dy0_num = (3*ly**2 - (y_a - y_b)**2)*(y_a - y_b)*exp(0.5d0*(-y_a & + **2 + 2.0d0*y_a*y_b - y_b**2)/ly**2)/ly**6 +end function +REAL*8 function d3kdxdy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dy0_num = 0 +end function +REAL*8 function dkdlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdlx_num = exp(-0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)*sin(0.5d0* & + x_a - 0.5d0*x_b)**2/lx**3 +end function +REAL*8 function dkdly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdly_num = (y_a**2 - 2*y_a*y_b + y_b**2)*exp(0.5d0*(-y_a**2 + 2.0d0*y_a & + *y_b - y_b**2)/ly**2)/ly**3 +end function +REAL*8 function d3kdxdx0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dlx_num = (1.0d0/4.0d0)*(-2*lx**4*cos(x_a - x_b) + lx**2*(3*cos( & + x_a - x_b) + 2)*sin(0.5d0*x_a - 0.5d0*x_b)**2 - sin(0.5d0*x_a - & + 0.5d0*x_b)**4*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp(-0.5d0*sin(0.5d0 & + *x_a - 0.5d0*x_b)**2/lx**2)/lx**7 +end function +REAL*8 function d3kdydy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dlx_num = 0 +end function +REAL*8 function d3kdxdy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dlx_num = 0 +end function +REAL*8 function d3kdxdx0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dly_num = 0 +end function +REAL*8 function d3kdydy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dly_num = (-2*ly**4 + ly**2*(y_a**2 - 2*y_a*y_b + y_b**2 + 4*( & + y_a - y_b)**2) + (y_a - y_b)**2*(-y_a**2 + 2*y_a*y_b - y_b**2))* & + exp(0.5d0*(-y_a**2 + 2.0d0*y_a*y_b - y_b**2)/ly**2)/ly**7 +end function +REAL*8 function d3kdxdy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dly_num = 0 +end function diff --git a/python/04_standard_map/main.py b/python/04_standard_map/main.py new file mode 100644 index 0000000..dfec8b6 --- /dev/null +++ b/python/04_standard_map/main.py @@ -0,0 +1,210 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Sep 29 11:42:52 2020 + +@author: Katharina Rath +""" + +import numpy as np +from scipy.optimize import minimize +from func import (build_K, buildKreg, applymap, nll_chol, applymap_expl, nll_expl) +import tkinter +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +from sklearn.metrics import mean_squared_error +import scipy +import ghalton +import random +#%% init parameters +k = 1.0 # stochasticity parameter +N = 20 #training data +nm = 1000 # map applications +Ntest = 18 # test data +sig2_n = 1e-8 #noise**2 in observations + +def StandardMap(x, k): + outJ = (x[1] + k*np.sin(x[0])) + outth = (x[0] + outJ) + return [outth, outJ] + + +def StandardMapIterate(k, nm, N, X0): + f = np.zeros((2, N, nm)) + f[:,:, 0] = X0 + for i in range(0, N): + for l in range(0, nm-1): + temp = StandardMap(f[:, i, l], k) + f[:, i, l+1] = temp + return f + +# sample initial data points from halton sequence +seq = ghalton.Halton(2) +X0 = seq.get(N)*np.array([2*np.pi, 2*np.pi]) +fmap = StandardMapIterate(k, 2, N, X0.T) + +#set training data +q = fmap[0, :, 0] +p = fmap[1, :, 0] +Q = fmap[0, :, 1] +P = fmap[1, :, 1] + + +zqtrain = Q - q +zptrain = p - P + +xtrain = q.flatten() +ytrain = P.flatten() +xtrain = np.hstack((q, P)).T +ztrain = np.concatenate((zptrain.flatten(), zqtrain.flatten())) + +#define boundaries for test data +qminmap = np.array(0) +qmaxmap = np.array(2*np.pi) +pminmap = np.array(0) +pmaxmap = np.array(2*np.pi) + +#set initial conditions (q0, p0) randomly for test data +random.seed(2) +q0map = np.linspace(qminmap,qmaxmap,Ntest) +p0map = np.linspace(pminmap,pmaxmap,Ntest) +q0map = random.sample(list(q0map), Ntest) +p0map = random.sample(list(p0map), Ntest) +Q0map = np.array(q0map) +P0map = np.array(p0map) +X0test = np.stack((Q0map, P0map)).T + +# calculate reference data (test) +yinttest = StandardMapIterate(k, nm, Ntest, X0test.T) + +#%% set up GP +# as indicated in Algorithm 1: Semi-implicit symplectic GP map +#hyperparameter optimization of length scales (lq, lp) +method = 'implicit' +if method == 'implicit': + log10l0 = np.array((-1, -1), dtype = float) + + # Step 1: Usual GP regression of P over (q,p) + #fit GP + hyperparameter optimization to have a first guess for newton for P + xtrainp = np.hstack((q, p)) + ztrainp = P-p + + sigp = 2*np.amax(np.abs(ztrainp))**2 + + def nll_transform2(log10hyp, sig, sig2n, x, y, N): + hyp = 10**log10hyp + return nll_chol(np.hstack((hyp, sig, [sig2n])), x, y, N) + res = minimize(nll_transform2, np.array((log10l0)), args = (sigp, sig2_n, xtrainp, ztrainp.T.flatten(), N), method='L-BFGS-B', bounds = ((-10, 1), (-10, 1))) + + lp = 10**res.x + hypp = np.hstack((lp, sigp)) + print('Optimized lengthscales for regular GP: lq =', "{:.2f}".format(lp[0]), 'lp = ', "{:.2f}".format(lp[1])) + # build K and its inverse + Kp = np.zeros((N, N)) + buildKreg(xtrainp, xtrainp, hypp, Kp) + Kyinvp = scipy.linalg.inv(Kp + sig2_n*np.eye(Kp.shape[0])) + #%% + # Step 2: symplectic GP regression of -Delta p and Delta q over mixed variables (q,P) according to Eq. 41 + # hyperparameter optimization for lengthscales (lq, lp) and GP fitting + sig = 2*np.amax(np.abs(ztrain))**2 + log10l0 = np.array((0,0), dtype = float) + def nll_transform(log10hyp, sig, sig2n, x, y, N): + hyp = 10**log10hyp + out = nll_chol(np.hstack((hyp, sig, [sig2n])), x, y, N) + return out + + res = minimize(nll_transform, np.array((0,-1)), args = (sig, sig2_n, xtrain, ztrain.T.flatten(), 2*N), method='L-BFGS-B', tol= 1e-8, bounds = ((-2, 2), (-2, 2)))#, + + sol1 = 10**res.x + + l = [np.abs(sol1[0]), np.abs(sol1[1])] + print('Optimized lengthscales for mixed GP: lq =', "{:.2f}".format(l[0]), 'lp = ', "{:.2f}".format(l[1]), 'sig = ', "{:.2f}".format(sig)) + + #%% + #build K(x,x') and regularized inverse with sig2_n + # K(x,x') corresponds to L(q,P,q',P') given in Eq. (38) + hyp = np.hstack((l, sig)) + K = np.empty((2*N, 2*N)) + build_K(xtrain, xtrain, hyp, K) + Kyinv = scipy.linalg.inv(K + sig2_n*np.eye(K.shape[0])) + + # caluate training error + Eftrain = K.dot(Kyinv.dot(ztrain)) + outtrain = mean_squared_error(ztrain, Eftrain) + print('training error', "{:.1e}".format(outtrain)) + + #%% Application of symplectic map + outq, outp, pdiff = applymap( + nm, Ntest, hyp, hypp, Q0map, P0map, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv) +#%% +elif method == 'explicit': + # use kernels_expl_per_q_sq_p.pyd for sum kernel in func.py + + # Step 2: symplectic GP regression of -Delta p and Delta q over mixed variables (q,P) according to Eq. 41 + # hyperparameter optimization for lengthscales (lq, lp) and GP fitting + # this is the explicit method + # lq and lp are trained separately + sig = 2*np.amax(np.abs(ztrain))**2 + log10l0 = np.array((1), dtype = float) + def nll_transform_expl(log10hyp, sig, sig2n, x, y, N, ind): + hyp = 10**log10hyp + # hyp = log10hyp + # print(hyp) + out = nll_expl(np.hstack((hyp, sig, [sig2n])), x, y, N, ind) + return out#[0]#, out[1]/out[0] + + #log 10 -> BFGS + res_lq = minimize(nll_transform_expl, np.array((1)), args = (sig, 1e-8, xtrain, zptrain.T.flatten(), 2*N, 0), method='L-BFGS-B')#, bounds = (-2, 2))#, + res_lp = minimize(nll_transform_expl, np.array((1)), args = (sig, 1e-8, xtrain, zqtrain.T.flatten(), 2*N, 1), method='L-BFGS-B')#, bounds = ((-2, 2), (-2, 2)),tol= 1e-8)#, + + + sol1 = 10**res_lq.x + sol2 = 10**res_lp.x + print(res_lq.success) + print(res_lp.success) + l = np.hstack((np.abs(sol1), np.abs(sol2))) + print('Optimized lengthscales for mixed GP: lq =', "{:.2f}".format(l[0]), 'lp = ', "{:.2f}".format(l[1])) + hyp = np.hstack((l, sig)) + K = np.empty((2*N, 2*N)) + build_K(xtrain, xtrain, hyp, K) + Kyinv = scipy.linalg.inv(K + sig2_n*np.eye(K.shape[0])) + + # caluate training error + Eftrain = K.dot(Kyinv.dot(ztrain)) + outtrain = mean_squared_error(ztrain, Eftrain) + print('training error', "{:.1e}".format(outtrain)) + + outq, outp, pdiff = applymap_expl( + nm, Ntest, hyp, Q0map, P0map, xtrain, ztrain, Kyinv) +#%% +pmap = np.mod(outp-np.pi, 2*np.pi) +qmap = np.mod(outq, 2*np.pi) +fmap = np.stack((qmap, pmap)) + +X0test = np.vstack((Q0map, P0map)).T +outmap = StandardMapIterate(k, nm, Ntest, X0test.T) +fintmap = np.stack((np.mod(outmap[0], 2*np.pi), np.mod(outmap[1]-np.pi, 2*np.pi))) +#%% plot results + +plt.figure(figsize = [10,3]) +plt.subplot(1,3,1) +for i in range(0, Ntest): + plt.plot(qmap[:,i], pmap[:,i], 'k^', label = 'GP', markersize = 0.5) +plt.xlabel(r"$\theta$", fontsize = 20) +plt.ylabel(r"I", fontsize = 20) +plt.tight_layout() + +plt.subplot(1,3,2) +plt.plot(fintmap[0], fintmap[1], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) +plt.xlabel(r"$\theta$", fontsize = 20) +plt.ylabel(r"I", fontsize = 20) +plt.tight_layout() + +plt.subplot(1,3,3) +plt.plot(fintmap[0], fintmap[1], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) +for i in range(0, Ntest): + plt.plot(qmap[:,i], pmap[:,i], 'k^', label = 'GP', markersize = 0.5) +plt.xlabel(r"$\theta$", fontsize = 20) +plt.ylabel(r"I", fontsize = 20) +plt.tight_layout() +plt.show(block = True) \ No newline at end of file diff --git a/python/05_tokamak/SplittedSympGPR/Makefile b/python/05_tokamak/SplittedSympGPR/Makefile new file mode 100644 index 0000000..4700534 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/Makefile @@ -0,0 +1,3 @@ +include *.mk $(bar) +clean: + rm -f *.x *.so *.o *.mod *.pyf *.pyd *.dylib *.dll \ No newline at end of file diff --git a/python/05_tokamak/SplittedSympGPR/calc_fieldlines.py b/python/05_tokamak/SplittedSympGPR/calc_fieldlines.py new file mode 100644 index 0000000..9b84ee1 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/calc_fieldlines.py @@ -0,0 +1,128 @@ +from numpy import zeros, array, arange, append, hstack, pi, cos, sin, mod, linspace, concatenate,vstack +import numpy as np +from common import f, qe, c +import common +from fieldlines import fieldlines +import ghalton +import random +# +# Attention: Variables are [pth, th, ph], so momentum in first component and +# "time" in third. Change to usual Z = z[:,1::-1] before using! +# +N = 70 # training data N = 70 for eps = 0.001 +nm = 500 +nturn = 2 # Number of full turns +nph = 32 # Number of steps per turn + +nphmap = 4 +eps = 0.001 +seq = ghalton.Halton(3) +X0 = seq.get(N)*array([0.38, 2*pi, 0])+array([0.1, 0, 0])# +yint = zeros([nph*nturn + 1, 3, N]) + +for ipart in range(0, N): + + r0 = X0[ipart, 0] + th0 = X0[ipart, 1] + ph0 = X0[ipart, 2] + fieldlines.init(nph=nph, am=-3, an=2, aeps=eps, aphase=0.0, arlast=r0) + pth0 = qe/c*fieldlines.ath(r0, th0, ph0) + + #%% + + z = zeros([nph*nturn + 1, 3]) + z[0,:] = [pth0, th0, 0.0] + + from time import time + tic = time() + + for kph in arange(nph*nturn): + z[kph+1, :] = z[kph, :] + fieldlines.timestep(z[kph+1, :]) + yint[:,:,ipart] = z + +#%% + +print('finish') +ind = int(nph/nphmap) + +q1 = yint[0,1] +p1 = yint[0,0]*1e2 +Q1 = yint[ind,1] +P1 = yint[ind,0]*1e2 + +q2 = Q1 +p2 = P1 +Q2 = yint[int(2*ind),1] +P2 = yint[int(2*ind),0]*1e2 + +q3 = Q2 +p3 = P2 +Q3 = yint[int(3*ind),1] +P3 = yint[int(3*ind),0]*1e2 + +q4 = Q3 +p4 = P3 +Q4 = yint[int(4*ind),1] +P4 = yint[int(4*ind),0]*1e2 + +#%% +zqtrain1 = Q1 - q1 +zptrain1 = p1 - P1 +xtrain1 = q1.flatten() +ytrain1 = P1.flatten() +xtrain1 = hstack((q1, P1)).T +ztrain1 = concatenate((zptrain1.flatten(), zqtrain1.flatten())) + +zqtrain2 = Q2 - q2 +zptrain2 = p2 - P2 +xtrain2 = q2.flatten() +ytrain2 = P2.flatten() +xtrain2 = hstack((q2, P2)).T +ztrain2 = concatenate((zptrain2.flatten(), zqtrain2.flatten())) + +zqtrain3 = Q3 - q3 +zptrain3 = p3 - P3 +xtrain3 = q3.flatten() +ytrain3 = P3.flatten() +xtrain3 = hstack((q3, P3)).T +ztrain3 = concatenate((zptrain3.flatten(), zqtrain3.flatten())) + +zqtrain4 = Q4 - q4 +zptrain4 = p4 - P4 +xtrain4 = q4.flatten() +ytrain4 = P4.flatten() +xtrain4 = hstack((q4, P4)).T +ztrain4 = concatenate((zptrain4.flatten(), zqtrain4.flatten())) + +Ntest = 30 +nturntest = int(nm/4) +qminmap = 0.16 +qmaxmap = 0.31 +pminmap = 0 +pmaxmap = 2*np.pi + +random.seed(1) +q0map = linspace(qminmap,qmaxmap,Ntest) +p0map = linspace(pminmap,pmaxmap,Ntest) +q0map = random.sample(list(q0map), Ntest) +p0map = random.sample(list(p0map), Ntest) +Q0map = np.array(q0map) +P0map = np.array(p0map) +X0test = np.stack((Q0map, P0map, np.zeros(Ntest))).T + +yinttest = zeros([nph*nturntest + 1, 3, Ntest]) +for ipart in range(0, Ntest): + fieldlines.init(nph=nph, am=-3, an=2, aeps=eps, aphase=0.0, arlast=X0test[ipart,0]) + X0test[ipart, 0] = qe/c*fieldlines.ath(X0test[ipart,0], X0test[ipart,1], X0test[ipart,2]) + + temp = zeros([nph*nturntest + 1, 3]) + temp[0,:] = [X0test[ipart,0], X0test[ipart,1], 0.0] + + + for kph in arange(nph*nturntest): + temp[kph+1, :] = temp[kph, :] + fieldlines.timestep(temp[kph+1, :]) + yinttest[:,:,ipart] = temp +yinttest[:,0] = yinttest[:, 0]*1e2 +yinttest[:, 1] = yinttest[:, 1] \ No newline at end of file diff --git a/python/05_tokamak/SplittedSympGPR/common.py b/python/05_tokamak/SplittedSympGPR/common.py new file mode 100644 index 0000000..b9947a3 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/common.py @@ -0,0 +1,153 @@ +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" + +from numpy import empty, nan, zeros, ones, sqrt, mod, array +from field_test import field +#%% + +taub = 7800 # estimated bounce time + +def timesteps(steps_per_bounce, nbounce): + """ compute step size and number of timesteps """ + return taub/steps_per_bounce, nbounce*steps_per_bounce + +f = field() + +qe = 1.0; m = 1.0; c = 1.0 # particle charge, mass and speed of light +mu = 1e-5 # magnetic moment + +# Initial conditions +r0 = 0.1 +th0 = 1.5 +ph0 = 0.0 +vpar0 = 0.0 + +# Compute toroidal momentum from initial conditions +f.evaluate(r0, th0, ph0) +pph0 = m*vpar0*f.hph + qe/c*f.Aph + +# Buffer to avoid double evaluation within black-box root finding routines +bufsize = 16 +kbuf = 0 +zbuf = empty((4,bufsize)); zbuf[:] = nan +neval = 0 +buffer = empty(bufsize,dtype=object) +fbuf = empty(bufsize,dtype=object) + +dHdvpar = lambda vpar: m*vpar +pth = lambda vpar,hth,Ath: m*vpar*hth + qe*Ath/c +pph = lambda vpar,hph,Aph: m*vpar*hph + qe*Aph/c + +def get_val(z): + """ evaluates H, pth, vpar at z """ + f.evaluate(z[0], z[1], z[2]) + vpar = 1.0/f.hph*(z[3]-qe/c*f.Aph) + H = m*vpar**2/2 + mu*f.B + qe*f.Phie + pth = m*f.hth*vpar + qe/c*f.Ath + ret = [H, pth, vpar] + return ret + +def get_der(z): + """ evaluates H, pth, vpar and first derivatives at z """ + global neval, kbuf + + # re-evaluation at same point + if(bufsize>0): + for kb in range(bufsize): + if all(z==zbuf[:,kb]): + f.__dict__ = fbuf[kb].__dict__.copy() + return buffer[kb] + + neval = neval+1 + + [H,pth,vpar] = get_val(z) + + dvpardx = -(qe/(m*c)*f.dAph + vpar*f.dhph)/f.hph + dvpardpph = 1.0/(m*f.hph) + + dHdx = m*vpar*dvpardx + mu*f.dB + qe*f.dPhie + dHdpph = m*vpar/f.hph + + dpthdx = m*dvpardx*f.hth + m*vpar*f.dhth + qe/c*f.dAth + dpthdpph = f.hth/f.hph + + ret = [H, pth, vpar, dHdx, dHdpph, dpthdx, dpthdpph, dvpardx, dvpardpph] + + # fill buffer + if(bufsize>0): + zbuf[:,kbuf] = z[:] + buffer[kbuf] = ret + fbuf[kbuf] = field() + fbuf[kbuf].__dict__ = f.__dict__.copy() + kbuf = mod(kbuf+1,bufsize) + + return ret + +def get_der2(z): + """ evaluates H, pth, vpar with first and second derivatives at z """ + [H, pth, vpar, + dHdx, dHdpph, dpthdx, dpthdpph, dvpardx, dvpardpph] = get_der(z) + + d2pthdx2 = zeros(6) # d2dr2, d2dth2, d2dph2, d2drdth, d2drdph, d2dthdph + d2pthdpphdz = zeros(4) # d2dpphdr, d2dpphdth, d2dpphdph, d2dpph2 + d2Hdx2 = zeros(6) + d2Hdpphdz = zeros(4) + d2vpardx2 = zeros(6) + d2vpardpphdz = zeros(4) + + d2vpardx2[:3] = -(qe/(m*c)*f.d2Aph[:3] + f.d2hph[:3]*vpar + + 2*f.dhph*dvpardx)/f.hph + d2vpardx2[3] = -(qe/(m*c)*f.d2Aph[3] + f.d2hph[3]*vpar + + f.dhph[0]*dvpardx[1] + f.dhph[1]*dvpardx[0])/f.hph + d2vpardpphdz[:3] = -1.0/(m*f.hph**2)*f.d2hph[:3] + + d2pthdx2[:3] = m*(d2vpardx2[:3]*f.hth + 2*dvpardx*f.dhth + vpar*f.d2hth[:3] + + qe/(m*c)*f.d2Ath[:3]) + + d2pthdx2[3] = m*(d2vpardx2[3]*f.hth + dvpardx[0]*f.dhth[1] + + dvpardx[1]*f.dhth[0] + vpar*f.d2hth[3] + qe/(m*c)*f.d2Ath[3]) + d2pthdpphdz[:3] = f.dhth/f.hph - f.hth/f.hph**2*f.dhph + + d2Hdx2[:3] = m*(dvpardx[:3]**2 + vpar*d2vpardx2[:3]) \ + + mu*f.d2B[:3] + qe*f.d2Phie[:3] + d2Hdx2[3] = m*(dvpardx[0]*dvpardx[1] + vpar*d2vpardx2[3]) \ + + mu*f.d2B[3] + qe*f.d2Phie[3] + d2Hdpphdz[:3] = m*(1.0/f.hph*dvpardx - vpar/f.hph**2*f.dhph) + + return [H, pth, vpar, + dHdx, dHdpph, dpthdx, dpthdpph, dvpardx, dvpardpph, + d2pthdx2, d2pthdpphdz, d2Hdx2, d2Hdpphdz, d2vpardx2, d2vpardpphdz] + +class NewtonResult(): + x = array([]) + xold = array([]) + +def newton1(f, x0, rtol, atol, args): + ret = NewtonResult() + x = array([x0]) + xold = array([x0*(1e0+1e30*rtol)]) + fval = atol*array([1e30,1e30]) + while(abs(fval[0]) > atol and abs(x-xold)/(abs(x)*(1e0+1e-30)) > rtol): + fval = f(x, *args) + xold = x + x = x - fval[0]/fval[1] + ret.xold = xold + ret.x = x + return ret + +def newton(f, x0, rtol, atol, args): + ret = NewtonResult() + x = x0 + xold = x0*(1e0+1e30*rtol) + fval = atol*array([1e30*ones(x0),1e30*ones(x0)]) + while(abs(fval[0]) > atol and + sqrt((x-xold).dot(x-xold))/(sqrt(x.dot(x))*(1e0+1e-30)) > rtol): + fval = f(x, *args) + xold = x + x = x - fval[0]/fval[1] + ret.xold = xold + ret.x = x + return ret diff --git a/python/05_tokamak/SplittedSympGPR/field_test.py b/python/05_tokamak/SplittedSympGPR/field_test.py new file mode 100644 index 0000000..2e17fa5 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/field_test.py @@ -0,0 +1,47 @@ +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" +from numpy import sin, cos, zeros_like, array + +B0 = 1.0 # magnetic field modulus normalization +iota0 = 1.0 # constant part of rotational transform +a = 0.5 # (equivalent) minor radius +R0 = 1.0 # (equivalent) major radius + +class field: + """ Model tokamak field with B ~ B0*(1-r/R0*cos(th)) """ + def evaluate(self, r, th, ph): + cth = cos(th) + sth = sin(th) + zer = zeros_like(r) + + self.Ath = B0*(r**2/2.0 - r**3/(3.0*R0)*cth) + self.dAth = array((B0*(r - r**2/R0*cth), B0*r**3*sth/(3.0*R0), zer)) + self.d2Ath = array((B0*(1.0 - 2.0*r/R0*cth), + B0*r**3*cth/(3.0*R0), + zer, + B0*r**2/R0*sth, + zer, + zer)) + + self.Aph = -B0*iota0*(r**2/2.0-r**4/(4.0*a**2)) + self.dAph = array((-B0*iota0*(r-r**3/a**2), zer, zer)) + self.d2Aph = array((-B0*iota0*(1.0-3.0*r**2/a**2), zer, zer, zer, zer, zer)) + + self.hth = iota0*(1.0-r**2/a**2)*r**2/R0 + self.dhth = array(((2.0*iota0*r*(a**2-2.0*r**2))/(a**2*R0), zer, zer)) + self.d2hth = array(((2.0*iota0*(a**2-6.0*r**2))/(a**2*R0), zer, zer, zer, zer, zer)) + + self.hph = R0 + r*cth + self.dhph = array((cth, -r*sth, zer)) + self.d2hph = array((zer, -r*cth, zer, -sth, zer) ) + + self.B = B0*(1.0 - r/R0*cth) + self.dB = array((-B0/R0*cth, B0*r/R0*sth, zer)) + self.d2B = array((zer, B0*r/R0*cth, zer, B0/R0*sth, zer, zer)) + + self.Phie = zer + self.dPhie = array((zer, zer, zer)) + self.d2Phie = array((zer, zer, zer, zer, zer, zer)) diff --git a/python/05_tokamak/SplittedSympGPR/fieldlines.f90 b/python/05_tokamak/SplittedSympGPR/fieldlines.f90 new file mode 100644 index 0000000..7f6e388 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/fieldlines.f90 @@ -0,0 +1,174 @@ +module fieldlines +implicit none +save + +real(8), parameter :: pi = 4d0*atan(1d0) + +real(8), parameter :: B0 = 1d0 +real(8), parameter :: iota0 = 1d0 ! constant part of rotational transform +real(8), parameter :: a = 0.5d0 ! (equivalent) minor radius +real(8), parameter :: R0 = 1d0 ! (equivalent) major radius + +real(8) :: dph ! Timestep +real(8) :: eps ! Absolute perturbation +real(8) :: phase ! Perturbation phase +integer :: m, n ! Perturbation harmonics + +real(8) :: rlast = 0d0 + +contains + +subroutine init(nph, am, an, aeps, aphase, arlast) + integer :: nph, am, an + real(8) :: aeps, aphase, arlast + + dph = 2*pi/nph + m = am + n = an + eps = aeps + phase = aphase + rlast = arlast +end subroutine init + + +function Ath(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: Ath + + Ath = B0*(r**2/2d0 - r**3/(3d0*R0)*cos(th)) +end function Ath + + +function dAthdr(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: dAthdr + + dAthdr = B0*(r - r**2/R0*cos(th)) +end function dAthdr + + +function dAthdth(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: dAthdth + + dAthdth = B0*r**3*sin(th)/(3d0*R0) +end function dAthdth + + +function Aph(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: Aph + + Aph = -B0*iota0*(r**2/2d0-r**4/(4d0*a**2))*(1d0+eps*cos(m*th+n*ph+phase)) +end function Aph + + +function dAphdr(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: dAphdr + + dAphdr = -B0*iota0*(r-r**3/a**2)*(1d0+eps*cos(m*th+n*ph+phase)) +end function dAphdr + + +function dAphdth(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: dAphdth + + dAphdth = B0*iota0*(r**2/2d0-r**4/(4d0*a**2))*m*eps*sin(m*th + n*ph + phase) +end function dAphdth + + +subroutine f_r(x, y, dy, args) + ! Implicit function to solve for r + real(8), intent(in) :: x ! r + real(8), intent(in) :: args(3) ! pth, th, ph + real(8), intent(out) :: y ! Target function + real(8), intent(out) :: dy ! Jacobian + ! Compute r implicitly for given th, p_th and ph + y = args(1) - Ath(x, args(2), args(3)) ! pth - pth(r, th, ph) + dy = -dAthdr(x, args(2), args(3)) +end subroutine f_r + + +function compute_r(z, rstart) result(r) + real(8), intent(in) :: z(3) + real(8), intent(in) :: rstart + real(8) :: r + + integer :: k + real(8) :: y, dy + + r = rstart + do k = 1, 20 ! Newton iterations + call f_r(r, y, dy, z) + r = r - y/dy + end do +end function compute_r + + +subroutine F_tstep(znew, y, dy, zold) + real(8), intent(in) :: znew(2) + real(8), intent(in) :: zold(3) + real(8), intent(out) :: y(2) ! Target function + real(8), intent(out) :: dy(2,2) ! Jacobian + + real(8) :: z(3), r + real(8) :: dApdr, dApdt, dAtdr, dAtdt + + z(1:2) = 0.5d0*(zold(1:2) + znew) + z(3) = zold(3) + 0.5d0*dph + r = compute_r(z, rlast) + rlast = r + + dApdr = dAphdr(r, z(2), z(3)) + dApdt = dAphdth(r, z(2), z(3)) + dAtdr = dAthdr(r, z(2), z(3)) + dAtdt = dAthdth(r, z(2), z(3)) + + y(1) = zold(1) - znew(1) + dph*(dApdt - dApdr*dAtdt/dAtdr) + y(2) = zold(2) - znew(2) - dph*dApdr/dAtdr + + ! print *, y + + ! TODO: look in SIMPLE + dy(1,1) = 0d0 + dy(2,1) = 0d0 + dy(1,2) = 0d0 + dy(2,2) = 0d0 + +end subroutine F_tstep + + +subroutine timestep(z) + real(8), intent(inout) :: z(3) + + integer :: info + real(8) :: zold(3) + real(8) :: z12new(2) + real(8) :: fvec(3) + + zold = z + z12new = z(1:2) + + call hybrd1(f_tstep_wrap, 2, z12new, fvec, 1d-13, info) +! print *, info + + z(1:2) = z12new + z(3) = zold(3) + dph + + contains + + subroutine f_tstep_wrap(n, x, fvec, iflag) + integer, intent(in) :: n + double precision, intent(in) :: x(n) + double precision, intent(out) :: fvec(n) + integer, intent(in) :: iflag + + real(8) :: dummy(2,2) ! for later Jacobian + + call F_tstep(x, fvec, dummy, zold) + end subroutine f_tstep_wrap +end subroutine timestep + +end module fieldlines diff --git a/python/05_tokamak/SplittedSympGPR/fieldlines_fast.py b/python/05_tokamak/SplittedSympGPR/fieldlines_fast.py new file mode 100644 index 0000000..cf93a19 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/fieldlines_fast.py @@ -0,0 +1,54 @@ +#%% +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" +from numpy import zeros, array, arange, append, hstack, pi, cos, sin, mod +from scipy.optimize import fsolve, root +import common +from plotting import plot_orbit +from matplotlib.pyplot import * +from fieldlines import fieldlines + +# +# Attention: Variables are [pth, th, ph], so momentum in first component and +# "time" in third. Change to usual Z = z[:,1::-1] before using! +# + +nturn = 1000 # Number of full turns +nph = 32 # Number of steps per turn + +r0 = 0.3 +th0 = 0.0 +ph0 = 0.0 + +fieldlines.init(nph=nph, am=-3, an=2, aeps=.001, aphase=0.0, arlast=r0) + +pth0 = fieldlines.ath(r0, th0, ph0) + +#%% + +z = zeros([nph*nturn + 1, 3]) +z[0,:] = [pth0, th0, 0.0] + +from time import time +tic = time() + +for kph in arange(nph*nturn): + z[kph+1, :] = z[kph, :] + fieldlines.timestep(z[kph+1, :]) + + +print(f'Time taken: {time()-tic}') +print(f'Safety factor: {(z[nph, 2] - z[0, 2])/(z[nph, 1] - z[0, 1])}') + +#%% +figure() +plot(z[::nph,0], mod(z[::nph,1], 2*pi), ',') + +#%% +figure() +th = z[::nph,1] +r = array([fieldlines.compute_r(zk, 0.3) for zk in z[::nph,:]]).flatten() +plot(r*cos(th), r*sin(th), ',') diff --git a/python/05_tokamak/SplittedSympGPR/fieldlines_mid.py b/python/05_tokamak/SplittedSympGPR/fieldlines_mid.py new file mode 100644 index 0000000..49d171f --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/fieldlines_mid.py @@ -0,0 +1,67 @@ +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" +from numpy import zeros, array, arange, append, hstack, pi, cos, sin +from scipy.optimize import fsolve, root +from common import f, qe, c +import common +from plotting import plot_orbit +from matplotlib.pyplot import * + +nturn = 50 # Number of full turns +nph = 10 # Number of steps per turn +dph = 2*pi/nph # Integrator step in phi +epspert = .005 # Absolute perturbation +m = -3 # Poloidal perturbation harmonic +n = 2 # Toroidal perturbation harmonic + +r0 = 0.3 +th0 = 0.0 +ph0 = 0.0 +f.evaluate(r0, th0, ph0) +pth0 = qe/c*f.Ath + +z = zeros([3, nph*nturn + 1]) +z[:,0] = [pth0, th0, 0.0] + +def compute_r(pth, th, ph): + # Compute r implicitly for given th, p_th and ph + def rootfun(x): + f.evaluate(x, th, ph) + return pth - qe/c*f.Ath # pth - pth(r, th, ph) + + sol = fsolve(rootfun, r0) + return sol + +def F_tstep(znew, zold): + z = zeros(3) + z[0:2] = 0.5*(zold[0:2] + znew) + z[2] = zold[2] + 0.5*dph + r = compute_r(z[0], z[1], z[2]) + f.evaluate(r, z[1], z[2]) + f.dAph = f.dAph - epspert*sin(m*z[1] + n*z[2]) # Apply perturbation on Aph + ret = zeros(2) + ret[0] = zold[0] - znew[0] + dph*(f.dAph[1] - f.dAph[0]*f.dAth[1]/f.dAth[0]) # dAph/dth + ret[1] = zold[1] - znew[1] - dph*f.dAph[0]/f.dAth[0] # dAph/dpth + return ret + +from time import time +tic = time() + +for kph in arange(nph*nturn): + zold = z[:,kph] + sol = root(F_tstep, zold[0:2], method='hybr', tol=1e-12, args=(zold)) + z[0:2, kph+1] = sol.x + z[2, kph+1] = z[2, kph] + dph + +print(f'Time taken: {time()-tic}') +print(f'Safety factor: {(z[2, nph] - z[2, 0])/(z[1, nph] - z[1, 0])}') + +#%% +#plot_orbit(z) +th = z[1,:] #::nph] +r = array([compute_r(zk[0], zk[1], 0.0) for zk in z.T]).flatten() +plot(r*cos(th), r*sin(th), '.') +# %% diff --git a/python/05_tokamak/SplittedSympGPR/fieldlines_rk.py b/python/05_tokamak/SplittedSympGPR/fieldlines_rk.py new file mode 100644 index 0000000..0b4d0de --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/fieldlines_rk.py @@ -0,0 +1,77 @@ +#%% +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" +from numpy import zeros, array, arange, append, hstack, pi, cos, sin, mod, pi +from scipy.integrate import odeint +from scipy.optimize import fsolve +from common import f, qe, c +import common +from plotting import plot_orbit +from matplotlib.pyplot import * + +nturn = 50 +nph = 10 +dph = 2*pi/nph +eps = .0001 # Absolute perturbation +m = -3 # Poloidal perturbation harmonic +n = 2 # Toroidal perturbation harmonic +p = array([0.0, pi/5.0]) # Perturbation phase + +r0 = 0.3 +th0 = 0.0 +ph0 = 0.0 +f.evaluate(r0, th0, ph0) +pth0 = qe/c*f.Ath + +z = zeros([3, nturn*nph+1]) +z[:,0] = [pth0, th0, 0.0] +rprev = r0 +#%% +def compute_r(pth, th, ph): + # Compute r implicitly for given th, p_th and ph + global rprev + def rootfun(x): + f.evaluate(x, th, ph) + return pth - qe/c*f.Ath # pth - pth(r, th, ph) + + sol = fsolve(rootfun, rprev) + reprev = sol + return sol + +def zdot(z, ph): + r = compute_r(z[0], z[1], ph) + f.evaluate(r, z[1], ph) + f.dAph[1] = f.dAph[1] - eps*sin(m*z[1] + n*ph + p) # Apply perturbation on Aph + ret = zeros(2) + ret[0] = f.dAph[1] - f.dAph[0]*f.dAth[1]/f.dAth[0] # pthdot = dAph/dth + ret[1] = -f.dAph[0]/f.dAth[0] # thdot = dAph/dpth + return ret + +from time import time +tic = time() + +for kturn in arange(nph*nturn): + + znew = odeint(zdot, z[0:2, kturn], [z[2, kturn], z[2, kturn]+dph]) + + z[0:2, kturn+1] = znew[-1, :] + z[2, kturn+1] = z[2, kturn] + dph + +print('Time taken: {}'.format(time()-tic)) +print(f'Safety factor: {(z[2, nph] - z[2, 0])/(z[1, nph] - z[1, 0])}') +#%% +figure() +plot(mod(z[1,::nph], 2*pi), z[0,::nph], ',') +#%% +figure() +plot(mod(z[1,:], 2*pi), z[0,:], ',') +#%% +#plot_orbit(z) +th = z[1,::nph] +r = array([compute_r(zk[0], zk[1], 0.0) for zk in z[:,::nph].T]).flatten() +figure() +plot(r*cos(th), r*sin(th), '.') +# %% diff --git a/python/05_tokamak/SplittedSympGPR/func.py b/python/05_tokamak/SplittedSympGPR/func.py new file mode 100644 index 0000000..67948c4 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/func.py @@ -0,0 +1,315 @@ +import numpy as np +from scipy.optimize import newton +from scipy.linalg import solve_triangular +import scipy +from sklearn.metrics import mean_squared_error +from fieldlines import fieldlines +from scipy.integrate import solve_ivp + +from kernels import * + +def f_kern(x, y, x0, y0, l): + return kern_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdx0(x, y, x0, y0, l): + return d2kdxdx0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydy0(x, y, x0, y0, l): + return d2kdydy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdy0(x, y, x0, y0, l): + return d2kdxdy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydx0(x, y, x0, y0, l): + return d2kdxdy0(x, y, x0, y0, l) + +def build_K(xin, x0in, hyp, K): + # set up covariance matrix with derivative observations, Eq. (38) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0]//2 + N0 = K.shape[1]//2 + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = d2kdxdx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,lk] = d2kdxdy0( + x0[lk], y0[lk], x[k], y[k], l) + K[k,N0+lk] = d2kdydx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,N0+lk] = d2kdydy0( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def buildKreg(xin, x0in, hyp, K): + # set up "usual" covariance matrix for GP on regular grid (q,p) + # print(hyp) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0] + N0 = K.shape[1] + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = f_kern( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def build_dK(xin, x0in, hyp): + # set up covariance matrix + N = len(xin)//2 + N0 = len(x0in)//2 + l = hyp[:-1] + sig = hyp[-1] + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + k11 = np.empty((N0, N)) + k12 = np.empty((N0, N)) + k21 = np.empty((N0, N)) + k22 = np.empty((N0, N)) + + dK = [] + + for k in range(N0): + for lk in range(N): + k11[k,lk] = sig*d3kdxdx0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k21[k,lk] = sig*d3kdxdy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k12[k,lk] = sig*d3kdxdy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k22[k,lk] = sig*d3kdydy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + + dK.append(np.vstack([ + np.hstack([k11, k12]), + np.hstack([k21, k22]) + ])) + + for k in range(N0): + for lk in range(N): + k11[k,lk] = sig*d3kdxdx0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k21[k,lk] = sig*d3kdxdy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k12[k,lk] = sig*d3kdxdy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k22[k,lk] = sig*d3kdydy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + + dK.append(np.vstack([ + np.hstack([k11, k12]), + np.hstack([k21, k22]) + ])) + + for k in range(N): + for lk in range(N0): + k11[k,lk] = d2kdxdx0( + x0[lk], y0[lk], x[k], y[k], l) + k21[k,lk] = d2kdxdy0( + x0[lk], y0[lk], x[k], y[k], l) + k12[k,lk] = d2kdydx0( + x0[lk], y0[lk], x[k], y[k], l) + k22[k,lk] = d2kdydy0( + x0[lk], y0[lk], x[k], y[k], l) + dK.append(np.vstack([ + np.hstack([k11, k12]), + np.hstack([k21, k22]) + ])) + #dK[:,:] = sig*dK + return dK + +def gpsolve(Ky, ft): + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_triangular( + L.T, solve_triangular(L, ft, lower=True, check_finite=False), + lower=False, check_finite=False) + + return L, alpha + +# compute log-likelihood according to RW, p.19 +def solve_cholesky(L, b): + return solve_triangular( + L.T, solve_triangular(L, b, lower=True, check_finite=False), + lower=False, check_finite=False) + +def nll_chol_reg(hyp, x, y, N): + K = np.empty((N, N)) + buildKreg(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + ret = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + return ret + +# negative log-posterior +def nll_chol(hyp, x, y, N, buildK = build_K): + K = np.empty((N, N)) + build_K(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + ret = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + return ret + +def nll_grad(hyp, x, y, N): + K = np.empty((N, N)) + build_K(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + Kyinv = np.linalg.inv(Ky) # invert GP matrix + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + nlp_val = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + dK = build_dK(x, x, hyp[:-1]) + alpha = Kyinv.dot(y) + # Rasmussen (5.9) + nlp_grad = np.array([ + -0.5*alpha.T.dot(dK[0].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[0])), + -0.5*alpha.T.dot(dK[1].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[1])), + -0.5*alpha.T.dot(dK[1].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[2])) + ]) + return nlp_val, nlp_grad + + +def guessP(x, y, hypp, xtrainp, ztrainp, Kyinvp, N): + Ntest = 1 + Kstar = np.empty((Ntest, int(len(xtrainp)/2))) + buildKreg(np.hstack((x,y)), xtrainp, hypp, Kstar) + Ef = Kstar.dot(Kyinvp.dot(ztrainp)) + return Ef + +def calcQ(x,y, xtrain, l, Kyinv, ztrain): + # get \Delta q from GP on mixed grid. + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack(([x], [y])), l, Kstar) + qGP = Kstar.T.dot(Kyinv.dot(ztrain)) + dq = qGP[1] + return dq + +def Pnewton(P, x, y, l, xtrain, Kyinv, ztrain): + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack((x, P)), l, Kstar) + pGP = Kstar.T.dot(Kyinv.dot(ztrain)) + f = pGP[0] - y + P + # print(pGP[0]) + return f + +def calcP(x,y, l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest): + # as P is given in an implicit relation, use newton to solve for P + # use the GP on regular grid (q,p) for a first guess for P + pgss = guessP([x], [y], hypp, xtrainp, ztrainp, Kyinvp, Ntest) + res, r = newton(Pnewton, pgss, full_output=True, maxiter=500, disp=True, + args = (np.array([x]), np.array ([y]), l, xtrain, Kyinv, ztrain)) + return res + + + +def applymap_tok(nm, Ntest, Q0map, P0map, + xtrainp1, ztrainp1, Kyinvp1, hypp1, xtrain1, ztrain1, Kyinv1, hyp1, + xtrainp2, ztrainp2, Kyinvp2, hypp2, xtrain2, ztrain2, Kyinv2, hyp2, + xtrainp3, ztrainp3, Kyinvp3, hypp3, xtrain3, ztrain3, Kyinv3, hyp3, + xtrainp4, ztrainp4, Kyinvp4, hypp4, xtrain4, ztrain4, Kyinv4, hyp4): + # Application of symplectic map + #init + pmap = np.zeros([nm, Ntest]) + qmap = np.zeros([nm, Ntest]) + #set initial conditions + pmap[0,:] = P0map + qmap[0,:] = Q0map + i = 0 + r_gss = 0.3 + r_cut = 0.5 + # loop through all test points and all time steps + while i < nm-4: + for k in range(0, Ntest): + if np.isnan(pmap[i, k]): + pmap[i+1,k] = np.nan + else: + # set new P including Newton for implicit Eq + pmap[i+1, k] = calcP(qmap[i,k], pmap[i, k], hyp1, hypp1, xtrainp1, ztrainp1, Kyinvp1, xtrain1, ztrain1, Kyinv1, Ntest) + + ph = 2*np.pi/4 + zk = np.array([pmap[i+1, k]*1e-2, qmap[i,k], ph]) + temp = fieldlines.compute_r(zk, r_gss) + if temp > r_cut or pmap[i+1, k] < 0.0: + pmap[i+1, k] = np.nan + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + # then: set new Q via calculating \Delta q and adding q + dqmap = calcQ(qmap[i,k], pmap[i+1,k], xtrain1, hyp1, Kyinv1, ztrain1) + qmap[i+1, k] = np.mod(dqmap + qmap[i, k], 2*np.pi) + # + i = i + 1 + for k in range(0, Ntest): + if np.isnan(pmap[i, k]): + pmap[i+1,k] = np.nan + else: + # set new P including Newton for implicit Eq. (42) + pmap[i+1, k] = calcP(qmap[i,k], pmap[i, k], hyp2, hypp2, xtrainp2, ztrainp2, Kyinvp2, xtrain2, ztrain2, Kyinv2, Ntest) + ph = 2*np.pi/2 + zk = np.array([pmap[i+1, k]*1e-2, qmap[i,k], ph]) + temp = fieldlines.compute_r(zk, r_gss) + if temp > r_cut or pmap[i+1, k] < 0.0: + pmap[i+1, k] = np.nan + + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + # then: set new Q via calculating \Delta q and adding q + dqmap = calcQ(qmap[i,k], pmap[i+1,k], xtrain2, hyp2, Kyinv2, ztrain2) + qmap[i+1, k] = np.mod(dqmap + qmap[i, k], 2*np.pi) + + i = i + 1 + for k in range(0, Ntest): + if np.isnan(pmap[i, k]): + pmap[i+1,k] = np.nan + else: + # set new P including Newton for implicit Eq + pmap[i+1, k] = calcP(qmap[i,k], pmap[i, k], hyp3, hypp3, xtrainp3, ztrainp3, Kyinvp3, xtrain3, ztrain3, Kyinv3, Ntest) + ph = 2*3*np.pi/4 + zk = np.array([pmap[i+1, k]*1e-2, qmap[i,k], ph]) + temp = fieldlines.compute_r(zk, r_gss) + if temp > r_cut or pmap[i+1, k] < 0.0: + pmap[i+1, k] = np.nan + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + # then: set new Q via calculating \Delta q and adding q + dqmap = calcQ(qmap[i,k], pmap[i+1,k], xtrain3, hyp3, Kyinv3, ztrain3) + qmap[i+1, k] = np.mod(dqmap + qmap[i, k], 2*np.pi) + + i = i + 1 + for k in range(0, Ntest): + if np.isnan(pmap[i, k]): + pmap[i+1,k] = np.nan + else: + # set new P including Newton for implicit Eq. + pmap[i+1, k] = calcP(qmap[i,k], pmap[i, k], hyp4, hypp4, xtrainp4, ztrainp4, Kyinvp4, xtrain4, ztrain4, Kyinv4, Ntest) + ph = 2*np.pi + zk = np.array([pmap[i+1, k]*1e-2, qmap[i,k], ph]) + temp = fieldlines.compute_r(zk, r_gss) + if temp > r_cut or pmap[i+1, k] < 0.0: + pmap[i+1, k] = np.nan + + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + # then: set new Q via calculating \Delta q and adding q + dqmap = calcQ(qmap[i,k], pmap[i+1,k], xtrain4, hyp4, Kyinv4, ztrain4) + qmap[i+1, k] = np.mod(dqmap + qmap[i, k], 2*np.pi) + i = i + 1 + return qmap, pmap diff --git a/python/05_tokamak/SplittedSympGPR/init_func.py b/python/05_tokamak/SplittedSympGPR/init_func.py new file mode 100644 index 0000000..15ce043 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/init_func.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed May 6 14:50:32 2020 + +@author: ert +""" +#%% +from sympy import * +from sympy.utilities.autowrap import autowrap, ufuncify +from sympy.utilities.lambdify import lambdastr, lambdify, implemented_function +import shutil + +tmp = '.' +#try: +# shutil.rmtree(tmp) +#except: +# pass + +#%% # Kernel functions (kernel, 1st and second derivatives) +x, y, xa, ya, xb, yb, l, lx, ly = symbols(r'x y x_a y_a x_b, y_b, l, lx, ly') + + +kern_sqexp = exp(-x**2/(2.0*l**2)) +kern_sqexp_sin = exp(-sin(0.5*x)**2/(2.0*l**2)) + +kern_y = kern_sqexp.subs(x, ya-yb).subs(l, ly) +kern_x = kern_sqexp_sin.subs(x, xa-xb).subs(l, lx) +kern = (kern_x*kern_y).simplify() + +dkdxa = diff(kern, xa).simplify() +dkdxb = diff(kern, xb).simplify() +dkdya = diff(kern, ya).simplify() +dkdyb = diff(kern, yb).simplify() +dkdxadxb = diff(kern, xa, xb).simplify() +dkdyadyb = diff(kern, ya, yb).simplify() +dkdxadyb = diff(kern, xa, yb).simplify() + +d3kdxdx0dy0 = diff(kern, xa, xb, yb).simplify() +d3kdydy0dy0 = diff(kern, ya, yb, yb).simplify() +d3kdxdy0dy0 = diff(kern, xa, yb, yb).simplify() + +dkdlx = diff(kern, lx).simplify() +dkdly = diff(kern, ly).simplify() + +d3kdxdx0dlx = diff(kern, xa, xb, lx).simplify() +d3kdydy0dlx = diff(kern, ya, yb, lx).simplify() +d3kdxdy0dlx = diff(kern, xa, yb, lx).simplify() + +d3kdxdx0dly = diff(kern, xa, xb, ly).simplify() +d3kdydy0dly = diff(kern, ya, yb, ly).simplify() +d3kdxdy0dly = diff(kern, xa, yb, ly).simplify() + +#%% +from sympy.utilities.codegen import codegen +seq = (xa, ya, xb, yb, lx, ly) +[(name, code), (h_name, header)] = \ +codegen([('kern_num', kern), + ('dkdx_num', dkdxa), + ('dkdy_num', dkdya), + ('dkdx0_num', dkdxb), + ('dkdy0_num', dkdyb), + ('d2kdxdx0_num', dkdxadxb), + ('d2kdydy0_num', dkdyadyb), + ('d2kdxdy0_num', dkdxadyb), + ('d3kdxdx0dy0_num', d3kdxdx0dy0), + ('d3kdydy0dy0_num', d3kdydy0dy0), + ('d3kdxdy0dy0_num', d3kdxdy0dy0), + ('dkdlx_num', dkdlx), + ('dkdly_num', dkdly), + ('d3kdxdx0dlx_num', d3kdxdx0dlx), + ('d3kdydy0dlx_num', d3kdydy0dlx), + ('d3kdxdy0dlx_num', d3kdxdy0dlx), + ('d3kdxdx0dly_num', d3kdxdx0dly), + ('d3kdydy0dly_num', d3kdydy0dly), + ('d3kdxdy0dly_num', d3kdxdy0dly), + ], "F95", "kernels", + argument_sequence=seq, + header=False, empty=False) +with open(name, 'w') as f: + f.write(code) diff --git a/python/05_tokamak/SplittedSympGPR/kernels.f90 b/python/05_tokamak/SplittedSympGPR/kernels.f90 new file mode 100644 index 0000000..a6a970b --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/kernels.f90 @@ -0,0 +1,231 @@ +REAL*8 function kern_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +kern_num = exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0*sin(0.5d0*x_a - 0.5d0 & + *x_b)**2/lx**2) +end function +REAL*8 function dkdx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx_num = -0.5d0*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a & + - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy_num = 1.0d0*(-y_a + y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2* & + sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function dkdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx0_num = 0.5d0*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a & + - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy0_num = 1.0d0*(y_a - y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2* & + sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function d2kdxdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdx0_num = 0.25d0*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b) - sin(0.5d0*x_a & + - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp(0.5d0*(-lx**2* & + (y_a - y_b)**2 - ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly** & + 2))/lx**4 +end function +REAL*8 function d2kdydy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdydy0_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp(0.5d0*(-lx**2*(y_a - & + y_b)**2 - ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly & + **4 +end function +REAL*8 function d2kdxdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdy0_num = -0.5d0*(y_a - y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly & + **2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - & + 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/(lx**2*ly**2) +end function +REAL*8 function d3kdxdx0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dy0_num = 0.25d0*(y_a - y_b)*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b) - & + sin(0.5d0*x_a - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp( & + -0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)** & + 2)/(lx**2*ly**2))/(lx**4*ly**2) +end function +REAL*8 function d3kdydy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dy0_num = (3.0d0*ly**2 - (y_a - y_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx & + **2*ly**2))/ly**6 +end function +REAL*8 function d3kdxdy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dy0_num = 0.5d0*(1.0d0*ly**2 - (y_a - y_b)**2)*exp(-0.5d0*(lx**2 & + *(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly & + **2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/(lx** & + 2*ly**4) +end function +REAL*8 function dkdlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdlx_num = 1.0d0*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0*sin(0.5d0*x_a & + - 0.5d0*x_b)**2/lx**2)*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**3 +end function +REAL*8 function dkdly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdly_num = 1.0d0*(y_a - y_b)**2*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0 & + *sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)/ly**3 +end function +REAL*8 function d3kdxdx0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dlx_num = (-0.5d0*lx**4*cos(1.0d0*x_a - 1.0d0*x_b) + lx**2*( & + 0.75d0*cos(1.0d0*x_a - 1.0d0*x_b) + 0.5d0)*sin(0.5d0*x_a - 0.5d0* & + x_b)**2 - 0.25d0*sin(0.5d0*x_a - 0.5d0*x_b)**4*cos(0.5d0*x_a - & + 0.5d0*x_b)**2)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0 & + *x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/lx**7 +end function +REAL*8 function d3kdydy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dlx_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp(-0.5d0*(lx**2*(y_a & + - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))* & + sin(0.5d0*x_a - 0.5d0*x_b)**2/(lx**3*ly**4) +end function +REAL*8 function d3kdxdy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dlx_num = (lx**2 - 0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2)*(y_a - & + y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - & + 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/(lx**5*ly**2) +end function +REAL*8 function d3kdxdx0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dly_num = 0.25d0*(y_a - y_b)**2*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b & + ) - sin(0.5d0*x_a - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)* & + exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0* & + x_b)**2)/(lx**2*ly**2))/(lx**4*ly**3) +end function +REAL*8 function d3kdydy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dly_num = (-2.0d0*ly**4 + 5.0d0*ly**2*(y_a - y_b)**2 - 1.0d0*( & + y_a - y_b)**4)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0 & + *x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**7 +end function +REAL*8 function d3kdxdy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dly_num = (ly**2 - 0.5d0*(y_a - y_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx & + **2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b) & + /(lx**2*ly**5) +end function diff --git a/python/05_tokamak/SplittedSympGPR/main.py b/python/05_tokamak/SplittedSympGPR/main.py new file mode 100644 index 0000000..e80e3c4 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/main.py @@ -0,0 +1,135 @@ +import numpy as np +import random +import ghalton +from scipy.optimize import minimize +from func import (build_K, buildKreg, applymap_tok, nll_chol, nll_chol_reg, nll_grad) +import tkinter +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +from sklearn.metrics import mean_squared_error +import scipy +from scipy.integrate import solve_ivp +from mpl_toolkits.mplot3d import Axes3D +from calc_fieldlines import (q1, Q1, p1, P1, zqtrain1, zptrain1, xtrain1, ztrain1, X0test, yinttest, Ntest, nphmap, N, nm, + q2, Q2, p2, P2, zqtrain2, zptrain2, xtrain2, ztrain2, + q3, Q3, p3, P3, zqtrain3, zptrain3, xtrain3, ztrain3, + q4, Q4, p4, P4, zqtrain4, zptrain4, xtrain4, ztrain4) +#%% init parameters +sig2_n = 1e-8 #noise**2 in observations +Q0map = X0test[:,1] +P0map = X0test[:,0]*1e2 +def regGP(q, p, P, N): + # Usual GP regression of P over (q,p) + xtrainp = np.hstack((q, p)) + ztrainp = P-p + + def nll_transform2(log10hyp, sig2n, x, y, N): + hyp = 10**log10hyp + return nll_chol_reg(np.hstack((hyp, [sig2n])), x, y, N) + res = minimize(nll_transform2, np.array((-1,-1, 1)), args = (sig2_n, xtrainp, ztrainp.T.flatten(), N), method='L-BFGS-B') + print(res.success) + + lp = 10**res.x[0:2] + sigp = 10**res.x[2] + hypp = np.hstack((lp, sigp)) + print('Optimized lengthscales for regular GP: lq =', "{:.2f}".format(lp[0]), 'lp = ', "{:.2f}".format(lp[1]), 'sig = ', "{:.2f}".format(sigp)) + # build K and its inverse + Kp = np.zeros((N, N)) + buildKreg(xtrainp, xtrainp, hypp, Kp) + Kyinvp = scipy.linalg.inv(Kp + sig2_n*np.eye(Kp.shape[0])) + return Kyinvp, hypp, xtrainp, ztrainp + +def nll_transform_grad(log10hyp, sig2n, x, y, N): + hyp = log10hyp + out = nll_grad(np.hstack((hyp, [sig2n])), x, y, N) + return out[0] + +def GP(xtrain, ztrain, N): + # symplectic GP regression of -Delta p and Delta q over mixed variables (q,P) + res = minimize(nll_transform_grad, np.array((0.5, 0.5, 1)), args = (sig2_n, xtrain, ztrain.T.flatten(), 2*N), method='L-BFGS-B') + sol1 = res.x[0:2] + sig = res.x[2] + print(res.success) + l = [np.abs(sol1[0]), np.abs(sol1[1])] + print('Optimized lengthscales for mixed GP: lq =', "{:.2f}".format(l[0]), 'lp = ', "{:.2f}".format(l[1]), 'sig = ', "{:.2f}".format(sig)) + + #build K(x,x') and regularized inverse with sig2_n + # K(x,x') corresponds to L(q,P,q',P') + hyp = np.hstack((l, sig)) + K = np.empty((2*N, 2*N)) + build_K(xtrain, xtrain, hyp, K) + Kyinv = scipy.linalg.inv(K + sig2_n*np.eye(K.shape[0])) + + # caluate training error + Eftrain = K.dot(Kyinv.dot(ztrain)) + outtrain = mean_squared_error(ztrain, Eftrain) + print('training error', "{:.1e}".format(outtrain)) + + return Kyinv, hyp +#%% set up GP +# as indicated in Algorithm 1: Semi-implicit symplectic GP map +#hyperparameter optimization of length scales (lq, lp) + +# Step 1: Usual GP regression of P over (q,p) +#fit GP + hyperparameter optimization to have a first guess for newton for P +Kyinvp1, hypp1, xtrainp1, ztrainp1 = regGP(q1, p1, P1, N) +Kyinvp2, hypp2, xtrainp2, ztrainp2 = regGP(q2, p2, P2, N) +Kyinvp3, hypp3, xtrainp3, ztrainp3 = regGP(q3, p3, P3, N) +Kyinvp4, hypp4, xtrainp4, ztrainp4 = regGP(q4, p4, P4, N) + +#%% +# Step 2: symplectic GP regression of -Delta p and Delta q over mixed variables (q,P) +# hyperparameter optimization for lengthscales (lq, lp) and GP fitting + +Kyinv1, hyp1 = GP(xtrain1, ztrain1, N) +Kyinv2, hyp2 = GP(xtrain2, ztrain2, N) +Kyinv3, hyp3 = GP(xtrain3, ztrain3, N) +Kyinv4, hyp4 = GP(xtrain4, ztrain4, N) + +#%% Application of symplectic map + +outq, outp = applymap_tok( + nm, Ntest, Q0map, P0map, + xtrainp1, ztrainp1, Kyinvp1, hypp1, xtrain1, ztrain1, Kyinv1, hyp1, + xtrainp2, ztrainp2, Kyinvp2, hypp2, xtrain2, ztrain2, Kyinv2, hyp2, + xtrainp3, ztrainp3, Kyinvp3, hypp3, xtrain3, ztrain3, Kyinv3, hyp3, + xtrainp4, ztrainp4, Kyinvp4, hypp4, xtrain4, ztrain4, Kyinv4, hyp4) + + +qmap = outq +pmap = outp +fmap = np.stack((qmap, pmap)) + +fintmap = yinttest +fintmap[:,0] = yinttest[:,0] +fintmap[:,1] = yinttest[:,1] +#%% plot results + +plt.rc('xtick',labelsize=20) +plt.rc('ytick',labelsize=20) +plt.figure(figsize = [10,3]) +plt.subplot(1,3,1) +for i in range(0, Ntest): + plt.plot(np.mod(qmap[::4,i], 2*np.pi), 1e-2*pmap[::4,i], 'k^', label = 'GP', markersize = 0.5) +plt.xlabel(r"$\vartheta$", fontsize = 20) +plt.ylabel(r"$p_\vartheta$", fontsize = 20) +plt.ylim([0.007, 0.06]) +plt.tight_layout() +plt.subplot(1,3,2) +plt.plot(np.mod(fintmap[::32,1],2*np.pi), 1e-2*fintmap[::32,0], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) +plt.xlabel(r"$\vartheta$", fontsize = 20) +plt.ylabel(r"$p_\vartheta$", fontsize = 20) +plt.ylim([0.007, 0.06]) +plt.tight_layout() + +plt.subplot(1,3,3) +plt.plot(np.mod(fintmap[::32,1],2*np.pi), 1e-2*fintmap[::32,0], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) +for i in range(0, Ntest): + plt.plot(np.mod(qmap[::4,i], 2*np.pi), 1e-2*pmap[::4,i], 'k^', label = 'GP', markersize = 0.5) + +plt.xlabel(r"$\vartheta$", fontsize = 20) +plt.ylabel(r"$p_\vartheta$", fontsize = 20) +plt.ylim([0.007, 0.06]) +plt.tight_layout() +plt.show() diff --git a/python/05_tokamak/SplittedSympGPR/make_fieldlines.mk b/python/05_tokamak/SplittedSympGPR/make_fieldlines.mk new file mode 100644 index 0000000..309ef46 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/make_fieldlines.mk @@ -0,0 +1,13 @@ +all: fieldlines.*.so + +fieldlines.*.so: fieldlines.pyf fieldlines.o minpack.o + python3 -m numpy.f2py -m fieldlines -c fieldlines.pyf fieldlines.o minpack.o + +fieldlines.pyf: fieldlines.f90 + python3 -m numpy.f2py fieldlines.f90 -m fieldlines -h fieldlines.pyf --overwrite-signature + +fieldlines.o: fieldlines.f90 + gfortran -fPIC -fbacktrace -c -g fieldlines.f90 + +minpack.o: minpack.f90 + gfortran -fPIC -c -march=native -O3 minpack.f90 diff --git a/python/05_tokamak/SplittedSympGPR/make_kernels.mk b/python/05_tokamak/SplittedSympGPR/make_kernels.mk new file mode 100644 index 0000000..c63c736 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/make_kernels.mk @@ -0,0 +1,10 @@ +FC = gfortran +FFLAGS = -Wall -march=native -O2 -g -fbacktrace +PYTHON = python3 +NAME = kernels + +all: $(NAME).f90 + $(PYTHON) -m numpy.f2py -m $(NAME) -c $(NAME).f90 --f90flags='$(FFLAGS)' -lgomp + +$(NAME).f90: init_func.py + $(PYTHON) init_func.py \ No newline at end of file diff --git a/python/05_tokamak/SplittedSympGPR/minpack.f90 b/python/05_tokamak/SplittedSympGPR/minpack.f90 new file mode 100644 index 0000000..c0b393b --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/minpack.f90 @@ -0,0 +1,6036 @@ +! Minpack Copyright Notice (1999) University of Chicago. All rights reserved + +! Redistribution and use in source and binary forms, with or +! without modification, are permitted provided that the +! following conditions are met: + +! 1. Redistributions of source code must retain the above +! copyright notice, this list of conditions and the following +! disclaimer. + +! 2. Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials +! provided with the distribution. + +! 3. The end-user documentation included with the +! redistribution, if any, must include the following +! acknowledgment: + +! "This product includes software developed by the +! University of Chicago, as Operator of Argonne National +! Laboratory. + +! Alternately, this acknowledgment may appear in the software +! itself, if and wherever such third-party acknowledgments +! normally appear. + +! 4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" +! WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE +! UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND +! THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES +! OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE +! OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY +! OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR +! USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF +! THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) +! DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION +! UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL +! BE CORRECTED. + +! 5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT +! HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF +! ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, +! INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF +! ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF +! PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER +! SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT +! (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, +! EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE +! POSSIBILITY OF SUCH LOSS OR DAMAGES. + + +subroutine chkder ( m, n, x, fvec, fjac, ldfjac, xp, fvecp, mode, err ) + +!*****************************************************************************80 +! +!! CHKDER checks the gradients of M functions of N variables. +! +! Discussion: +! +! CHKDER checks the gradients of M nonlinear functions in N variables, +! evaluated at a point X, for consistency with the functions themselves. +! +! The user calls CHKDER twice, first with MODE = 1 and then with MODE = 2. +! +! MODE = 1. +! On input, +! X contains the point of evaluation. +! On output, +! XP is set to a neighboring point. +! +! Now the user must evaluate the function and gradients at X, and the +! function at XP. Then the function is called again: +! +! MODE = 2. +! On input, +! FVEC contains the function values at X, +! FJAC contains the function gradients at X. +! FVECP contains the functions evaluated at XP. +! On output, +! ERR contains measures of correctness of the respective gradients. +! +! The function does not perform reliably if cancellation or +! rounding errors cause a severe loss of significance in the +! evaluation of a function. Therefore, none of the components +! of X should be unusually small (in particular, zero) or any +! other value which may cause loss of significance. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, is the number of functions. +! +! Input, integer ( kind = 4 ) N, is the number of variables. +! +! Input, real ( kind = 8 ) X(N), the point at which the jacobian is to be +! evaluated. +! +! Input, real ( kind = 8 ) FVEC(M), is used only when MODE = 2. +! In that case, it should contain the function values at X. +! +! Input, real ( kind = 8 ) FJAC(LDFJAC,N), an M by N array. When MODE = 2, +! FJAC(I,J) should contain the value of dF(I)/dX(J). +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least M. +! +! Output, real ( kind = 8 ) XP(N), on output with MODE = 1, is a neighboring +! point of X, at which the function is to be evaluated. +! +! Input, real ( kind = 8 ) FVECP(M), on input with MODE = 2, is the function +! value at XP. +! +! Input, integer ( kind = 4 ) MODE, should be set to 1 on the first call, and +! 2 on the second. +! +! Output, real ( kind = 8 ) ERR(M). On output when MODE = 2, ERR contains +! measures of correctness of the respective gradients. If there is no +! severe loss of significance, then if ERR(I): +! = 1.0D+00, the I-th gradient is correct, +! = 0.0D+00, the I-th gradient is incorrect. +! > 0.5D+00, the I-th gradient is probably correct. +! < 0.5D+00, the I-th gradient is probably incorrect. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) eps + real ( kind = 8 ) epsf + real ( kind = 8 ) epslog + real ( kind = 8 ) epsmch + real ( kind = 8 ) err(m) + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) fvecp(m) + integer ( kind = 4 ) i + integer ( kind = 4 ) j + integer ( kind = 4 ) mode + real ( kind = 8 ) temp + real ( kind = 8 ) x(n) + real ( kind = 8 ) xp(n) + + epsmch = epsilon ( epsmch ) + eps = sqrt ( epsmch ) +! +! MODE = 1. +! + if ( mode == 1 ) then + + do j = 1, n + temp = eps * abs ( x(j) ) + if ( temp == 0.0D+00 ) then + temp = eps + end if + xp(j) = x(j) + temp + end do +! +! MODE = 2. +! + else if ( mode == 2 ) then + + epsf = 100.0D+00 * epsmch + epslog = log10 ( eps ) + + err = 0.0D+00 + + do j = 1, n + temp = abs ( x(j) ) + if ( temp == 0.0D+00 ) then + temp = 1.0D+00 + end if + err(1:m) = err(1:m) + temp * fjac(1:m,j) + end do + + do i = 1, m + + temp = 1.0D+00 + + if ( fvec(i) /= 0.0D+00 .and. fvecp(i) /= 0.0D+00 .and. & + abs ( fvecp(i)-fvec(i)) >= epsf * abs ( fvec(i) ) ) then + temp = eps * abs ( (fvecp(i)-fvec(i)) / eps - err(i) ) & + / ( abs ( fvec(i) ) + abs ( fvecp(i) ) ) + end if + + err(i) = 1.0D+00 + + if ( epsmch < temp .and. temp < eps ) then + err(i) = ( log10 ( temp ) - epslog ) / epslog + end if + + if ( eps <= temp ) then + err(i) = 0.0D+00 + end if + + end do + + end if + + return +end +subroutine dogleg ( n, r, lr, diag, qtb, delta, x ) + +!*****************************************************************************80 +! +!! DOGLEG finds the minimizing combination of Gauss-Newton and gradient steps. +! +! Discussion: +! +! Given an M by N matrix A, an N by N nonsingular diagonal +! matrix D, an M-vector B, and a positive number DELTA, the +! problem is to determine the convex combination X of the +! Gauss-Newton and scaled gradient directions that minimizes +! (A*X - B) in the least squares sense, subject to the +! restriction that the euclidean norm of D*X be at most DELTA. +! +! This function completes the solution of the problem +! if it is provided with the necessary information from the +! QR factorization of A. That is, if A = Q*R, where Q has +! orthogonal columns and R is an upper triangular matrix, +! then DOGLEG expects the full upper triangle of R and +! the first N components of Q'*B. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, the order of the matrix R. +! +! Input, real ( kind = 8 ) R(LR), the upper triangular matrix R stored +! by rows. +! +! Input, integer ( kind = 4 ) LR, the size of the R array, which must be +! no less than (N*(N+1))/2. +! +! Input, real ( kind = 8 ) DIAG(N), the diagonal elements of the matrix D. +! +! Input, real ( kind = 8 ) QTB(N), the first N elements of the vector Q'* B. +! +! Input, real ( kind = 8 ) DELTA, is a positive upper bound on the +! euclidean norm of D*X(1:N). +! +! Output, real ( kind = 8 ) X(N), the desired convex combination of the +! Gauss-Newton direction and the scaled gradient direction. +! + implicit none + + integer ( kind = 4 ) lr + integer ( kind = 4 ) n + + real ( kind = 8 ) alpha + real ( kind = 8 ) bnorm + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) enorm + real ( kind = 8 ) epsmch + real ( kind = 8 ) gnorm + integer ( kind = 4 ) i + integer ( kind = 4 ) j + integer ( kind = 4 ) jj + integer ( kind = 4 ) k + integer ( kind = 4 ) l + real ( kind = 8 ) qnorm + real ( kind = 8 ) qtb(n) + real ( kind = 8 ) r(lr) + real ( kind = 8 ) sgnorm + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) x(n) + + epsmch = epsilon ( epsmch ) +! +! Calculate the Gauss-Newton direction. +! + jj = ( n * ( n + 1 ) ) / 2 + 1 + + do k = 1, n + + j = n - k + 1 + jj = jj - k + l = jj + 1 + sum2 = 0.0D+00 + + do i = j + 1, n + sum2 = sum2 + r(l) * x(i) + l = l + 1 + end do + + temp = r(jj) + + if ( temp == 0.0D+00 ) then + + l = j + do i = 1, j + temp = max ( temp, abs ( r(l)) ) + l = l + n - i + end do + + if ( temp == 0.0D+00 ) then + temp = epsmch + else + temp = epsmch * temp + end if + + end if + + x(j) = ( qtb(j) - sum2 ) / temp + + end do +! +! Test whether the Gauss-Newton direction is acceptable. +! + wa1(1:n) = 0.0D+00 + wa2(1:n) = diag(1:n) * x(1:n) + qnorm = enorm ( n, wa2 ) + + if ( qnorm <= delta ) then + return + end if +! +! The Gauss-Newton direction is not acceptable. +! Calculate the scaled gradient direction. +! + l = 1 + do j = 1, n + temp = qtb(j) + do i = j, n + wa1(i) = wa1(i) + r(l) * temp + l = l + 1 + end do + wa1(j) = wa1(j) / diag(j) + end do +! +! Calculate the norm of the scaled gradient. +! Test for the special case in which the scaled gradient is zero. +! + gnorm = enorm ( n, wa1 ) + sgnorm = 0.0D+00 + alpha = delta / qnorm + + if ( gnorm /= 0.0D+00 ) then +! +! Calculate the point along the scaled gradient which minimizes the quadratic. +! + wa1(1:n) = ( wa1(1:n) / gnorm ) / diag(1:n) + + l = 1 + do j = 1, n + sum2 = 0.0D+00 + do i = j, n + sum2 = sum2 + r(l) * wa1(i) + l = l + 1 + end do + wa2(j) = sum2 + end do + + temp = enorm ( n, wa2 ) + sgnorm = ( gnorm / temp ) / temp +! +! Test whether the scaled gradient direction is acceptable. +! + alpha = 0.0D+00 +! +! The scaled gradient direction is not acceptable. +! Calculate the point along the dogleg at which the quadratic is minimized. +! + if ( sgnorm < delta ) then + + bnorm = enorm ( n, qtb ) + temp = ( bnorm / gnorm ) * ( bnorm / qnorm ) * ( sgnorm / delta ) + temp = temp - ( delta / qnorm ) * ( sgnorm / delta) ** 2 & + + sqrt ( ( temp - ( delta / qnorm ) ) ** 2 & + + ( 1.0D+00 - ( delta / qnorm ) ** 2 ) & + * ( 1.0D+00 - ( sgnorm / delta ) ** 2 ) ) + + alpha = ( ( delta / qnorm ) * ( 1.0D+00 - ( sgnorm / delta ) ** 2 ) ) & + / temp + + end if + + end if +! +! Form appropriate convex combination of the Gauss-Newton +! direction and the scaled gradient direction. +! + temp = ( 1.0D+00 - alpha ) * min ( sgnorm, delta ) + + x(1:n) = temp * wa1(1:n) + alpha * x(1:n) + + return +end +function enorm ( n, x ) + +!*****************************************************************************80 +! +!! ENORM computes the Euclidean norm of a vector. +! +! Discussion: +! +! This is an extremely simplified version of the original ENORM +! routine, which has been renamed to "ENORM2". +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, is the length of the vector. +! +! Input, real ( kind = 8 ) X(N), the vector whose norm is desired. +! +! Output, real ( kind = 8 ) ENORM, the Euclidean norm of the vector. +! + implicit none + + integer ( kind = 4 ) n + real ( kind = 8 ) x(n) + real ( kind = 8 ) enorm + + enorm = sqrt ( sum ( x(1:n) ** 2 )) + + return +end +function enorm2 ( n, x ) + +!*****************************************************************************80 +! +!! ENORM2 computes the Euclidean norm of a vector. +! +! Discussion: +! +! This routine was named ENORM. It has been renamed "ENORM2", +! and a simplified routine has been substituted. +! +! The Euclidean norm is computed by accumulating the sum of +! squares in three different sums. The sums of squares for the +! small and large components are scaled so that no overflows +! occur. Non-destructive underflows are permitted. Underflows +! and overflows do not occur in the computation of the unscaled +! sum of squares for the intermediate components. +! +! The definitions of small, intermediate and large components +! depend on two constants, RDWARF and RGIANT. The main +! restrictions on these constants are that RDWARF^2 not +! underflow and RGIANT^2 not overflow. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1 +! Argonne National Laboratory, +! Argonne, Illinois. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, is the length of the vector. +! +! Input, real ( kind = 8 ) X(N), the vector whose norm is desired. +! +! Output, real ( kind = 8 ) ENORM2, the Euclidean norm of the vector. +! + implicit none + + integer ( kind = 4 ) n + + real ( kind = 8 ) agiant + real ( kind = 8 ) enorm2 + integer ( kind = 4 ) i + real ( kind = 8 ) rdwarf + real ( kind = 8 ) rgiant + real ( kind = 8 ) s1 + real ( kind = 8 ) s2 + real ( kind = 8 ) s3 + real ( kind = 8 ) x(n) + real ( kind = 8 ) xabs + real ( kind = 8 ) x1max + real ( kind = 8 ) x3max + + rdwarf = sqrt ( tiny ( rdwarf ) ) + rgiant = sqrt ( huge ( rgiant ) ) + + s1 = 0.0D+00 + s2 = 0.0D+00 + s3 = 0.0D+00 + x1max = 0.0D+00 + x3max = 0.0D+00 + agiant = rgiant / real ( n, kind = 8 ) + + do i = 1, n + + xabs = abs ( x(i) ) + + if ( xabs <= rdwarf ) then + + if ( x3max < xabs ) then + s3 = 1.0D+00 + s3 * ( x3max / xabs ) ** 2 + x3max = xabs + else if ( xabs /= 0.0D+00 ) then + s3 = s3 + ( xabs / x3max ) ** 2 + end if + + else if ( agiant <= xabs ) then + + if ( x1max < xabs ) then + s1 = 1.0D+00 + s1 * ( x1max / xabs ) ** 2 + x1max = xabs + else + s1 = s1 + ( xabs / x1max ) ** 2 + end if + + else + + s2 = s2 + xabs ** 2 + + end if + + end do +! +! Calculation of norm. +! + if ( s1 /= 0.0D+00 ) then + + enorm2 = x1max * sqrt ( s1 + ( s2 / x1max ) / x1max ) + + else if ( s2 /= 0.0D+00 ) then + + if ( x3max <= s2 ) then + enorm2 = sqrt ( s2 * ( 1.0D+00 + ( x3max / s2 ) * ( x3max * s3 ) ) ) + else + enorm2 = sqrt ( x3max * ( ( s2 / x3max ) + ( x3max * s3 ) ) ) + end if + + else + + enorm2 = x3max * sqrt ( s3 ) + + end if + + return +end +subroutine fdjac1 ( fcn, n, x, fvec, fjac, ldfjac, iflag, ml, mu, epsfcn ) + +!*****************************************************************************80 +! +!! FDJAC1 estimates an N by N jacobian matrix using forward differences. +! +! Discussion: +! +! This function computes a forward-difference approximation +! to the N by N jacobian matrix associated with a specified +! problem of N functions in N variables. If the jacobian has +! a banded form, then function evaluations are saved by only +! approximating the nonzero terms. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( n, x, fvec, iflag ) +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(n) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) N, the number of functions and variables. +! +! Input, real ( kind = 8 ) X(N), the point where the jacobian is evaluated. +! +! Input, real ( kind = 8 ) FVEC(N), the functions evaluated at X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), the N by N approximate +! jacobian matrix. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC, which +! must not be less than N. +! +! Output, integer ( kind = 4 ) IFLAG, is an error flag returned by FCN. +! If FCN returns a nonzero value of IFLAG, then this routine returns +! immediately to the calling program, with the value of IFLAG. +! +! Input, integer ( kind = 4 ) ML, MU, specify the number of subdiagonals and +! superdiagonals within the band of the jacobian matrix. If the +! jacobian is not banded, set ML and MU to N-1. +! +! Input, real ( kind = 8 ) EPSFCN, is used in determining a suitable step +! length for the forward-difference approximation. This approximation +! assumes that the relative errors in the functions are of the order of +! EPSFCN. If EPSFCN is less than the machine precision, it is assumed that +! the relative errors in the functions are of the order of the machine +! precision. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) n + + real ( kind = 8 ) eps + real ( kind = 8 ) epsfcn + real ( kind = 8 ) epsmch + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fvec(n) + real ( kind = 8 ) h + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) j + integer ( kind = 4 ) k + integer ( kind = 4 ) ml + integer ( kind = 4 ) msum + integer ( kind = 4 ) mu + real ( kind = 8 ) temp + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) x(n) + + epsmch = epsilon ( epsmch ) + + eps = sqrt ( max ( epsfcn, epsmch ) ) + msum = ml + mu + 1 +! +! Computation of dense approximate jacobian. +! + if ( n <= msum ) then + + do j = 1, n + + temp = x(j) + h = eps * abs ( temp ) + if ( h == 0.0D+00 ) then + h = eps + end if + + iflag = 1 + x(j) = temp + h + call fcn ( n, x, wa1, iflag ) + + if ( iflag < 0 ) then + exit + end if + + x(j) = temp + fjac(1:n,j) = ( wa1(1:n) - fvec(1:n) ) / h + + end do + + else +! +! Computation of banded approximate jacobian. +! + do k = 1, msum + + do j = k, n, msum + wa2(j) = x(j) + h = eps * abs ( wa2(j) ) + if ( h == 0.0D+00 ) then + h = eps + end if + x(j) = wa2(j) + h + end do + + iflag = 1 + call fcn ( n, x, wa1, iflag ) + + if ( iflag < 0 ) then + exit + end if + + do j = k, n, msum + + x(j) = wa2(j) + + h = eps * abs ( wa2(j) ) + if ( h == 0.0D+00 ) then + h = eps + end if + + fjac(1:n,j) = 0.0D+00 + + do i = 1, n + if ( j - mu <= i .and. i <= j + ml ) then + fjac(i,j) = ( wa1(i) - fvec(i) ) / h + end if + end do + + end do + + end do + + end if + + return +end +subroutine fdjac2 ( fcn, m, n, x, fvec, fjac, ldfjac, iflag, epsfcn ) + +!*****************************************************************************80 +! +!! FDJAC2 estimates an M by N jacobian matrix using forward differences. +! +! Discussion: +! +! This function computes a forward-difference approximation +! to the M by N jacobian matrix associated with a specified +! problem of M functions in N variables. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( m, n, x, fvec, iflag ) +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) M, is the number of functions. +! +! Input, integer ( kind = 4 ) N, is the number of variables. +! N must not exceed M. +! +! Input, real ( kind = 8 ) X(N), the point where the jacobian is evaluated. +! +! Input, real ( kind = 8 ) FVEC(M), the functions evaluated at X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), the M by N approximate +! jacobian matrix. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC, +! which must not be less than M. +! +! Output, integer ( kind = 4 ) IFLAG, is an error flag returned by FCN. +! If FCN returns a nonzero value of IFLAG, then this routine returns +! immediately to the calling program, with the value of IFLAG. +! +! Input, real ( kind = 8 ) EPSFCN, is used in determining a suitable +! step length for the forward-difference approximation. This approximation +! assumes that the relative errors in the functions are of the order of +! EPSFCN. If EPSFCN is less than the machine precision, it is assumed that +! the relative errors in the functions are of the order of the machine +! precision. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) eps + real ( kind = 8 ) epsfcn + real ( kind = 8 ) epsmch + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) h + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) j + real ( kind = 8 ) temp + real ( kind = 8 ) wa(m) + real ( kind = 8 ) x(n) + + epsmch = epsilon ( epsmch ) + + eps = sqrt ( max ( epsfcn, epsmch ) ) + + do j = 1, n + + temp = x(j) + h = eps * abs ( temp ) + if ( h == 0.0D+00 ) then + h = eps + end if + + iflag = 1 + x(j) = temp + h + call fcn ( m, n, x, wa, iflag ) + + if ( iflag < 0 ) then + exit + end if + + x(j) = temp + fjac(1:m,j) = ( wa(1:m) - fvec(1:m) ) / h + + end do + + return +end +subroutine hybrd ( fcn, n, x, fvec, xtol, maxfev, ml, mu, epsfcn, diag, mode, & + factor, nprint, info, nfev, fjac, ldfjac, r, lr, qtf ) + +!*****************************************************************************80 +! +!! HYBRD seeks a zero of N nonlinear equations in N variables. +! +! Discussion: +! +! HYBRD finds a zero of a system of N nonlinear functions in N variables +! by a modification of the Powell hybrid method. The user must provide a +! subroutine which calculates the functions. +! +! The jacobian is then calculated by a forward-difference approximation. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( n, x, fvec, iflag ) +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(n) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) N, the number of functions and variables. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(N), the functions evaluated at the output X. +! +! Input, real ( kind = 8 ) XTOL. Termination occurs when the relative error +! between two consecutive iterates is at most XTOL. XTOL should be +! nonnegative. +! +! Input, integer ( kind = 4 ) MAXFEV. Termination occurs when the number of +! calls to FCN is at least MAXFEV by the end of an iteration. +! +! Input, integer ( kind = 4 ) ML, MU, specify the number of subdiagonals and +! superdiagonals within the band of the jacobian matrix. If the jacobian +! is not banded, set ML and MU to at least n - 1. +! +! Input, real ( kind = 8 ) EPSFCN, is used in determining a suitable step +! length for the forward-difference approximation. This approximation +! assumes that the relative errors in the functions are of the order of +! EPSFCN. If EPSFCN is less than the machine precision, it is assumed that +! the relative errors in the functions are of the order of the machine +! precision. +! +! Input/output, real ( kind = 8 ) DIAG(N). If MODE = 1, then DIAG is set +! internally. If MODE = 2, then DIAG must contain positive entries that +! serve as multiplicative scale factors for the variables. +! +! Input, integer ( kind = 4 ) MODE, scaling option. +! 1, variables will be scaled internally. +! 2, scaling is specified by the input DIAG vector. +! +! Input, real ( kind = 8 ) FACTOR, determines the initial step bound. This +! bound is set to the product of FACTOR and the euclidean norm of DIAG*X if +! nonzero, or else to FACTOR itself. In most cases, FACTOR should lie +! in the interval (0.1, 100) with 100 the recommended value. +! +! Input, integer ( kind = 4 ) NPRINT, enables controlled printing of +! iterates if it is positive. In this case, FCN is called with IFLAG = 0 at +! the beginning of the first iteration and every NPRINT iterations thereafter +! and immediately prior to return, with X and FVEC available +! for printing. If NPRINT is not positive, no special calls +! of FCN with IFLAG = 0 are made. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. +! See the description of FCN. +! Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, relative error between two consecutive iterates is at most XTOL. +! 2, number of calls to FCN has reached or exceeded MAXFEV. +! 3, XTOL is too small. No further improvement in the approximate +! solution X is possible. +! 4, iteration is not making good progress, as measured by the improvement +! from the last five jacobian evaluations. +! 5, iteration is not making good progress, as measured by the improvement +! from the last ten iterations. +! +! Output, integer ( kind = 4 ) NFEV, the number of calls to FCN. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an N by N array which contains +! the orthogonal matrix Q produced by the QR factorization of the final +! approximate jacobian. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least N. +! +! Output, real ( kind = 8 ) R(LR), the upper triangular matrix produced by +! the QR factorization of the final approximate jacobian, stored rowwise. +! +! Input, integer ( kind = 4 ) LR, the size of the R array, which must be no +! less than (N*(N+1))/2. +! +! Output, real ( kind = 8 ) QTF(N), contains the vector Q'*FVEC. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) lr + integer ( kind = 4 ) n + + real ( kind = 8 ) actred + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) enorm + real ( kind = 8 ) epsfcn + real ( kind = 8 ) epsmch + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fnorm + real ( kind = 8 ) fnorm1 + real ( kind = 8 ) fvec(n) + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) info + integer ( kind = 4 ) iter + integer ( kind = 4 ) iwa(1) + integer ( kind = 4 ) j + logical jeval + integer ( kind = 4 ) l + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) ml + integer ( kind = 4 ) mode + integer ( kind = 4 ) msum + integer ( kind = 4 ) mu + integer ( kind = 4 ) ncfail + integer ( kind = 4 ) nslow1 + integer ( kind = 4 ) nslow2 + integer ( kind = 4 ) ncsuc + integer ( kind = 4 ) nfev + integer ( kind = 4 ) nprint + logical pivot + real ( kind = 8 ) pnorm + real ( kind = 8 ) prered + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) r(lr) + real ( kind = 8 ) ratio + logical sing + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) wa3(n) + real ( kind = 8 ) wa4(n) + real ( kind = 8 ) x(n) + real ( kind = 8 ) xnorm + real ( kind = 8 ) xtol + + epsmch = epsilon ( epsmch ) + + info = 0 + iflag = 0 + nfev = 0 +! +! Check the input parameters for errors. +! + if ( n <= 0 ) then + return + else if ( xtol < 0.0D+00 ) then + return + else if ( maxfev <= 0 ) then + return + else if ( ml < 0 ) then + return + else if ( mu < 0 ) then + return + else if ( factor <= 0.0D+00 ) then + return + else if ( ldfjac < n ) then + return + else if ( lr < ( n * ( n + 1 ) ) / 2 ) then + return + end if + + if ( mode == 2 ) then + + do j = 1, n + if ( diag(j) <= 0.0D+00 ) then + go to 300 + end if + end do + + end if +! +! Evaluate the function at the starting point +! and calculate its norm. +! + iflag = 1 + call fcn ( n, x, fvec, iflag ) + nfev = 1 + + if ( iflag < 0 ) then + go to 300 + end if + + fnorm = enorm ( n, fvec ) +! +! Determine the number of calls to FCN needed to compute the jacobian matrix. +! + msum = min ( ml + mu + 1, n ) +! +! Initialize iteration counter and monitors. +! + iter = 1 + ncsuc = 0 + ncfail = 0 + nslow1 = 0 + nslow2 = 0 +! +! Beginning of the outer loop. +! +30 continue + + jeval = .true. +! +! Calculate the jacobian matrix. +! + iflag = 2 + call fdjac1 ( fcn, n, x, fvec, fjac, ldfjac, iflag, ml, mu, epsfcn ) + + nfev = nfev + msum + + if ( iflag < 0 ) then + go to 300 + end if +! +! Compute the QR factorization of the jacobian. +! + pivot = .false. + call qrfac ( n, n, fjac, ldfjac, pivot, iwa, 1, wa1, wa2 ) +! +! On the first iteration, if MODE is 1, scale according +! to the norms of the columns of the initial jacobian. +! + if ( iter == 1 ) then + + if ( mode /= 2 ) then + + diag(1:n) = wa2(1:n) + do j = 1, n + if ( wa2(j) == 0.0D+00 ) then + diag(j) = 1.0D+00 + end if + end do + + end if +! +! On the first iteration, calculate the norm of the scaled X +! and initialize the step bound DELTA. +! + wa3(1:n) = diag(1:n) * x(1:n) + xnorm = enorm ( n, wa3 ) + delta = factor * xnorm + if ( delta == 0.0D+00 ) then + delta = factor + end if + + end if +! +! Form Q' * FVEC and store in QTF. +! + qtf(1:n) = fvec(1:n) + + do j = 1, n + + if ( fjac(j,j) /= 0.0D+00 ) then + temp = - dot_product ( qtf(j:n), fjac(j:n,j) ) / fjac(j,j) + qtf(j:n) = qtf(j:n) + fjac(j:n,j) * temp + end if + + end do +! +! Copy the triangular factor of the QR factorization into R. +! + sing = .false. + + do j = 1, n + l = j + do i = 1, j - 1 + r(l) = fjac(i,j) + l = l + n - i + end do + r(l) = wa1(j) + if ( wa1(j) == 0.0D+00 ) then + sing = .true. + end if + end do +! +! Accumulate the orthogonal factor in FJAC. +! + call qform ( n, n, fjac, ldfjac ) +! +! Rescale if necessary. +! + if ( mode /= 2 ) then + do j = 1, n + diag(j) = max ( diag(j), wa2(j) ) + end do + end if +! +! Beginning of the inner loop. +! +180 continue +! +! If requested, call FCN to enable printing of iterates. +! + if ( 0 < nprint ) then + iflag = 0 + if ( mod ( iter - 1, nprint ) == 0 ) then + call fcn ( n, x, fvec, iflag ) + end if + if ( iflag < 0 ) then + go to 300 + end if + end if +! +! Determine the direction P. +! + call dogleg ( n, r, lr, diag, qtf, delta, wa1 ) +! +! Store the direction P and X + P. +! Calculate the norm of P. +! + wa1(1:n) = - wa1(1:n) + wa2(1:n) = x(1:n) + wa1(1:n) + wa3(1:n) = diag(1:n) * wa1(1:n) + + pnorm = enorm ( n, wa3 ) +! +! On the first iteration, adjust the initial step bound. +! + if ( iter == 1 ) then + delta = min ( delta, pnorm ) + end if +! +! Evaluate the function at X + P and calculate its norm. +! + iflag = 1 + call fcn ( n, wa2, wa4, iflag ) + nfev = nfev + 1 + + if ( iflag < 0 ) then + go to 300 + end if + + fnorm1 = enorm ( n, wa4 ) +! +! Compute the scaled actual reduction. +! + actred = -1.0D+00 + if ( fnorm1 < fnorm ) then + actred = 1.0D+00 - ( fnorm1 / fnorm ) ** 2 + endif +! +! Compute the scaled predicted reduction. +! + l = 1 + do i = 1, n + sum2 = 0.0D+00 + do j = i, n + sum2 = sum2 + r(l) * wa1(j) + l = l + 1 + end do + wa3(i) = qtf(i) + sum2 + end do + + temp = enorm ( n, wa3 ) + prered = 0.0D+00 + if ( temp < fnorm ) then + prered = 1.0D+00 - ( temp / fnorm ) ** 2 + end if +! +! Compute the ratio of the actual to the predicted reduction. +! + ratio = 0.0D+00 + if ( 0.0D+00 < prered ) then + ratio = actred / prered + end if +! +! Update the step bound. +! + if ( ratio < 0.1D+00 ) then + + ncsuc = 0 + ncfail = ncfail + 1 + delta = 0.5D+00 * delta + + else + + ncfail = 0 + ncsuc = ncsuc + 1 + + if ( 0.5D+00 <= ratio .or. 1 < ncsuc ) then + delta = max ( delta, pnorm / 0.5D+00 ) + end if + + if ( abs ( ratio - 1.0D+00 ) <= 0.1D+00 ) then + delta = pnorm / 0.5D+00 + end if + + end if +! +! Test for successful iteration. +! +! Successful iteration. +! Update X, FVEC, and their norms. +! + if ( 0.0001D+00 <= ratio ) then + x(1:n) = wa2(1:n) + wa2(1:n) = diag(1:n) * x(1:n) + fvec(1:n) = wa4(1:n) + xnorm = enorm ( n, wa2 ) + fnorm = fnorm1 + iter = iter + 1 + end if +! +! Determine the progress of the iteration. +! + nslow1 = nslow1 + 1 + if ( 0.001D+00 <= actred ) then + nslow1 = 0 + end if + + if ( jeval ) then + nslow2 = nslow2 + 1 + end if + + if ( 0.1D+00 <= actred ) then + nslow2 = 0 + end if +! +! Test for convergence. +! + if ( delta <= xtol * xnorm .or. fnorm == 0.0D+00 ) then + info = 1 + end if + + if ( info /= 0 ) then + go to 300 + end if +! +! Tests for termination and stringent tolerances. +! + if ( maxfev <= nfev ) then + info = 2 + end if + + if ( 0.1D+00 * max ( 0.1D+00 * delta, pnorm ) <= epsmch * xnorm ) then + info = 3 + end if + + if ( nslow2 == 5 ) then + info = 4 + end if + + if ( nslow1 == 10 ) then + info = 5 + end if + + if ( info /= 0 ) then + go to 300 + end if +! +! Criterion for recalculating jacobian approximation +! by forward differences. +! + if ( ncfail == 2 ) then + go to 290 + end if +! +! Calculate the rank one modification to the jacobian +! and update QTF if necessary. +! + do j = 1, n + sum2 = dot_product ( wa4(1:n), fjac(1:n,j) ) + wa2(j) = ( sum2 - wa3(j) ) / pnorm + wa1(j) = diag(j) * ( ( diag(j) * wa1(j) ) / pnorm ) + if ( 0.0001D+00 <= ratio ) then + qtf(j) = sum2 + end if + end do +! +! Compute the QR factorization of the updated jacobian. +! + call r1updt ( n, n, r, lr, wa1, wa2, wa3, sing ) + call r1mpyq ( n, n, fjac, ldfjac, wa2, wa3 ) + call r1mpyq ( 1, n, qtf, 1, wa2, wa3 ) +! +! End of the inner loop. +! + jeval = .false. + go to 180 + + 290 continue +! +! End of the outer loop. +! + go to 30 + + 300 continue +! +! Termination, either normal or user imposed. +! + if ( iflag < 0 ) then + info = iflag + end if + + iflag = 0 + + if ( 0 < nprint ) then + call fcn ( n, x, fvec, iflag ) + end if + + return +end +subroutine hybrd1 ( fcn, n, x, fvec, tol, info ) + +!*****************************************************************************80 +! +!! HYBRD1 seeks a zero of N nonlinear equations in N variables. +! +! Discussion: +! +! HYBRD1 finds a zero of a system of N nonlinear functions in N variables +! by a modification of the Powell hybrid method. This is done by using the +! more general nonlinear equation solver HYBRD. The user must provide a +! subroutine which calculates the functions. The jacobian is then +! calculated by a forward-difference approximation. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 19 August 2016 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( n, x, fvec, iflag ) +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(n) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) N, the number of functions and variables. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(N), the functions evaluated at the output X. +! +! Input, real ( kind = 8 ) TOL. Termination occurs when the algorithm +! estimates that the relative error between X and the solution is at +! most TOL. TOL should be nonnegative. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See the +! description of FCN. +! Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, algorithm estimates that the relative error between X and the +! solution is at most TOL. +! 2, number of calls to FCN has reached or exceeded 200*(N+1). +! 3, TOL is too small. No further improvement in the approximate +! solution X is possible. +! 4, the iteration is not making good progress. +! + implicit none + + integer ( kind = 4 ) lwa + integer ( kind = 4 ) n + + real ( kind = 8 ) diag(n) + real ( kind = 8 ) epsfcn + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(n,n) + real ( kind = 8 ) fvec(n) + integer ( kind = 4 ) info + integer ( kind = 4 ) j + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) lr + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) ml + integer ( kind = 4 ) mode + integer ( kind = 4 ) mu + integer ( kind = 4 ) nfev + integer ( kind = 4 ) nprint + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) r((n*(n+1))/2) + real ( kind = 8 ) tol + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + if ( n <= 0 ) then + info = 0 + return + end if + + if ( tol < 0.0D+00 ) then + info = 0 + return + end if + + xtol = tol + maxfev = 200 * ( n + 1 ) + ml = n - 1 + mu = n - 1 + epsfcn = 0.0D+00 + diag(1:n) = 1.0D+00 + mode = 2 + factor = 100.0D+00 + nprint = 0 + info = 0 + nfev = 0 + fjac(1:n,1:n) = 0.0D+00 + ldfjac = n + r(1:(n*(n+1))/2) = 0.0D+00 + lr = ( n * ( n + 1 ) ) / 2 + qtf(1:n) = 0.0D+00 + + call hybrd ( fcn, n, x, fvec, xtol, maxfev, ml, mu, epsfcn, diag, mode, & + factor, nprint, info, nfev, fjac, ldfjac, r, lr, qtf ) + + if ( info == 5 ) then + info = 4 + end if + + return +end +subroutine hybrj ( fcn, n, x, fvec, fjac, ldfjac, xtol, maxfev, diag, mode, & + factor, nprint, info, nfev, njev, r, lr, qtf ) + +!*****************************************************************************80 +! +!! HYBRJ seeks a zero of N nonlinear equations in N variables. +! +! Discussion: +! +! HYBRJ finds a zero of a system of N nonlinear functions in N variables +! by a modification of the Powell hybrid method. The user must provide a +! subroutine which calculates the functions and the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the jacobian. FCN should have the form: +! +! subroutine fcn ( n, x, fvec, fjac, ldfjac, iflag ) +! integer ( kind = 4 ) ldfjac +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjac(ldfjac,n) +! real ( kind = 8 ) fvec(n) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If IFLAG = 2 on input, FCN should calculate the jacobian at X and +! return this matrix in FJAC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) N, the number of functions and variables. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(N), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an N by N matrix, containing +! the orthogonal matrix Q produced by the QR factorization +! of the final approximate jacobian. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of the +! array FJAC. LDFJAC must be at least N. +! +! Input, real ( kind = 8 ) XTOL. Termination occurs when the relative error +! between two consecutive iterates is at most XTOL. XTOL should be +! nonnegative. +! +! Input, integer ( kind = 4 ) MAXFEV. Termination occurs when the number of +! calls to FCN is at least MAXFEV by the end of an iteration. +! +! Input/output, real ( kind = 8 ) DIAG(N). If MODE = 1, then DIAG is set +! internally. If MODE = 2, then DIAG must contain positive entries that +! serve as multiplicative scale factors for the variables. +! +! Input, integer ( kind = 4 ) MODE, scaling option. +! 1, variables will be scaled internally. +! 2, scaling is specified by the input DIAG vector. +! +! Input, real ( kind = 8 ) FACTOR, determines the initial step bound. This +! bound is set to the product of FACTOR and the euclidean norm of DIAG*X if +! nonzero, or else to FACTOR itself. In most cases, FACTOR should lie +! in the interval (0.1, 100) with 100 the recommended value. +! +! Input, integer ( kind = 4 ) NPRINT, enables controlled printing of iterates +! if it is positive. In this case, FCN is called with IFLAG = 0 at the +! beginning of the first iteration and every NPRINT iterations thereafter +! and immediately prior to return, with X and FVEC available +! for printing. If NPRINT is not positive, no special calls +! of FCN with IFLAG = 0 are made. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. +! See the description of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, relative error between two consecutive iterates is at most XTOL. +! 2, number of calls to FCN with IFLAG = 1 has reached MAXFEV. +! 3, XTOL is too small. No further improvement in +! the approximate solution X is possible. +! 4, iteration is not making good progress, as measured by the +! improvement from the last five jacobian evaluations. +! 5, iteration is not making good progress, as measured by the +! improvement from the last ten iterations. +! +! Output, integer ( kind = 4 ) NFEV, the number of calls to FCN +! with IFLAG = 1. +! +! Output, integer ( kind = 4 ) NJEV, the number of calls to FCN +! with IFLAG = 2. +! +! Output, real ( kind = 8 ) R(LR), the upper triangular matrix produced +! by the QR factorization of the final approximate jacobian, stored rowwise. +! +! Input, integer ( kind = 4 ) LR, the size of the R array, which must +! be no less than (N*(N+1))/2. +! +! Output, real ( kind = 8 ) QTF(N), contains the vector Q'*FVEC. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) lr + integer ( kind = 4 ) n + + real ( kind = 8 ) actred + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) enorm + real ( kind = 8 ) epsmch + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fnorm + real ( kind = 8 ) fnorm1 + real ( kind = 8 ) fvec(n) + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) info + integer ( kind = 4 ) iter + integer ( kind = 4 ) iwa(1) + integer ( kind = 4 ) j + logical jeval + integer ( kind = 4 ) l + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) ncfail + integer ( kind = 4 ) nslow1 + integer ( kind = 4 ) nslow2 + integer ( kind = 4 ) ncsuc + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + logical pivot + real ( kind = 8 ) pnorm + real ( kind = 8 ) prered + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) r(lr) + real ( kind = 8 ) ratio + logical sing + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) wa3(n) + real ( kind = 8 ) wa4(n) + real ( kind = 8 ) x(n) + real ( kind = 8 ) xnorm + real ( kind = 8 ) xtol + + epsmch = epsilon ( epsmch ) + + info = 0 + iflag = 0 + nfev = 0 + njev = 0 +! +! Check the input parameters for errors. +! + if ( n <= 0 ) then + if ( iflag < 0 ) then + info = iflag + end if + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + + if ( ldfjac < n .or. & + xtol < 0.0D+00 .or. & + maxfev <= 0 .or. & + factor <= 0.0D+00 .or. & + lr < ( n * ( n + 1 ) ) / 2 ) then + if ( iflag < 0 ) then + info = iflag + end if + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + + if ( mode == 2 ) then + do j = 1, n + if ( diag(j) <= 0.0D+00 ) then + if ( iflag < 0 ) then + info = iflag + end if + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + end do + end if +! +! Evaluate the function at the starting point and calculate its norm. +! + iflag = 1 + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + nfev = 1 + + if ( iflag < 0 ) then + if ( iflag < 0 ) then + info = iflag + end if + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + + fnorm = enorm ( n, fvec ) +! +! Initialize iteration counter and monitors. +! + iter = 1 + ncsuc = 0 + ncfail = 0 + nslow1 = 0 + nslow2 = 0 +! +! Beginning of the outer loop. +! + do + + jeval = .true. +! +! Calculate the jacobian matrix. +! + iflag = 2 + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + njev = njev + 1 + + if ( iflag < 0 ) then + info = iflag + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if +! +! Compute the QR factorization of the jacobian. +! + pivot = .false. + call qrfac ( n, n, fjac, ldfjac, pivot, iwa, 1, wa1, wa2 ) +! +! On the first iteration, if MODE is 1, scale according +! to the norms of the columns of the initial jacobian. +! + if ( iter == 1 ) then + + if ( mode /= 2 ) then + diag(1:n) = wa2(1:n) + do j = 1, n + if ( wa2(j) == 0.0D+00 ) then + diag(j) = 1.0D+00 + end if + end do + end if +! +! On the first iteration, calculate the norm of the scaled X +! and initialize the step bound DELTA. +! + wa3(1:n) = diag(1:n) * x(1:n) + xnorm = enorm ( n, wa3 ) + delta = factor * xnorm + if ( delta == 0.0D+00 ) then + delta = factor + end if + + end if +! +! Form Q'*FVEC and store in QTF. +! + qtf(1:n) = fvec(1:n) + + do j = 1, n + if ( fjac(j,j) /= 0.0D+00 ) then + sum2 = 0.0D+00 + do i = j, n + sum2 = sum2 + fjac(i,j) * qtf(i) + end do + temp = - sum2 / fjac(j,j) + do i = j, n + qtf(i) = qtf(i) + fjac(i,j) * temp + end do + end if + end do +! +! Copy the triangular factor of the QR factorization into R. +! + sing = .false. + + do j = 1, n + l = j + do i = 1, j - 1 + r(l) = fjac(i,j) + l = l + n - i + end do + r(l) = wa1(j) + if ( wa1(j) == 0.0D+00 ) then + sing = .true. + end if + end do +! +! Accumulate the orthogonal factor in FJAC. +! + call qform ( n, n, fjac, ldfjac ) +! +! Rescale if necessary. +! + if ( mode /= 2 ) then + do j = 1, n + diag(j) = max ( diag(j), wa2(j) ) + end do + end if +! +! Beginning of the inner loop. +! + do +! +! If requested, call FCN to enable printing of iterates. +! + if ( 0 < nprint ) then + + iflag = 0 + if ( mod ( iter - 1, nprint ) == 0 ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + + if ( iflag < 0 ) then + info = iflag + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + + end if +! +! Determine the direction P. +! + call dogleg ( n, r, lr, diag, qtf, delta, wa1 ) +! +! Store the direction P and X + P. +! Calculate the norm of P. +! + wa1(1:n) = - wa1(1:n) + wa2(1:n) = x(1:n) + wa1(1:n) + wa3(1:n) = diag(1:n) * wa1(1:n) + pnorm = enorm ( n, wa3 ) +! +! On the first iteration, adjust the initial step bound. +! + if ( iter == 1 ) then + delta = min ( delta, pnorm ) + end if +! +! Evaluate the function at X + P and calculate its norm. +! + iflag = 1 + call fcn ( n, wa2, wa4, fjac, ldfjac, iflag ) + nfev = nfev + 1 + + if ( iflag < 0 ) then + info = iflag + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + + fnorm1 = enorm ( n, wa4 ) +! +! Compute the scaled actual reduction. +! + actred = -1.0D+00 + if ( fnorm1 < fnorm ) then + actred = 1.0D+00 - ( fnorm1 / fnorm ) ** 2 + end if +! +! Compute the scaled predicted reduction. +! + l = 1 + do i = 1, n + sum2 = 0.0D+00 + do j = i, n + sum2 = sum2 + r(l) * wa1(j) + l = l + 1 + end do + wa3(i) = qtf(i) + sum2 + end do + + temp = enorm ( n, wa3 ) + prered = 0.0D+00 + if ( temp < fnorm ) then + prered = 1.0D+00 - ( temp / fnorm ) ** 2 + end if +! +! Compute the ratio of the actual to the predicted reduction. +! + if ( 0.0D+00 < prered ) then + ratio = actred / prered + else + ratio = 0.0D+00 + end if +! +! Update the step bound. +! + if ( ratio < 0.1D+00 ) then + + ncsuc = 0 + ncfail = ncfail + 1 + delta = 0.5D+00 * delta + + else + + ncfail = 0 + ncsuc = ncsuc + 1 + + if ( 0.5D+00 <= ratio .or. 1 < ncsuc ) then + delta = max ( delta, pnorm / 0.5D+00 ) + end if + + if ( abs ( ratio - 1.0D+00 ) <= 0.1D+00 ) then + delta = pnorm / 0.5D+00 + end if + + end if +! +! Test for successful iteration. +! + +! +! Successful iteration. +! Update X, FVEC, and their norms. +! + if ( 0.0001D+00 <= ratio ) then + x(1:n) = wa2(1:n) + wa2(1:n) = diag(1:n) * x(1:n) + fvec(1:n) = wa4(1:n) + xnorm = enorm ( n, wa2 ) + fnorm = fnorm1 + iter = iter + 1 + end if +! +! Determine the progress of the iteration. +! + nslow1 = nslow1 + 1 + if ( 0.001D+00 <= actred ) then + nslow1 = 0 + end if + + if ( jeval ) then + nslow2 = nslow2 + 1 + end if + + if ( 0.1D+00 <= actred ) then + nslow2 = 0 + end if +! +! Test for convergence. +! + if ( delta <= xtol * xnorm .or. fnorm == 0.0D+00 ) then + info = 1 + end if + + if ( info /= 0 ) then + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if +! +! Tests for termination and stringent tolerances. +! + if ( maxfev <= nfev ) then + info = 2 + end if + + if ( 0.1D+00 * max ( 0.1D+00 * delta, pnorm ) <= epsmch * xnorm ) then + info = 3 + end if + + if ( nslow2 == 5 ) then + info = 4 + end if + + if ( nslow1 == 10 ) then + info = 5 + end if + + if ( info /= 0 ) then + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if +! +! Criterion for recalculating jacobian. +! + if ( ncfail == 2 ) then + exit + end if +! +! Calculate the rank one modification to the jacobian +! and update QTF if necessary. +! + do j = 1, n + sum2 = dot_product ( wa4(1:n), fjac(1:n,j) ) + wa2(j) = ( sum2 - wa3(j) ) / pnorm + wa1(j) = diag(j) * ( ( diag(j) * wa1(j) ) / pnorm ) + if ( 0.0001D+00 <= ratio ) then + qtf(j) = sum2 + end if + end do +! +! Compute the QR factorization of the updated jacobian. +! + call r1updt ( n, n, r, lr, wa1, wa2, wa3, sing ) + call r1mpyq ( n, n, fjac, ldfjac, wa2, wa3 ) + call r1mpyq ( 1, n, qtf, 1, wa2, wa3 ) +! +! End of the inner loop. +! + jeval = .false. + + end do +! +! End of the outer loop. +! + end do + +end +subroutine hybrj1 ( fcn, n, x, fvec, fjac, ldfjac, tol, info ) + +!*****************************************************************************80 +! +!! HYBRJ1 seeks a zero of N equations in N variables by Powell's method. +! +! Discussion: +! +! HYBRJ1 finds a zero of a system of N nonlinear functions in N variables +! by a modification of the Powell hybrid method. This is done by using the +! more general nonlinear equation solver HYBRJ. The user +! must provide a subroutine which calculates the functions +! and the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the jacobian. FCN should have the form: +! subroutine fcn ( n, x, fvec, fjac, ldfjac, iflag ) +! integer ( kind = 4 ) ldfjac +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjac(ldfjac,n) +! real ( kind = 8 ) fvec(n) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If IFLAG = 2 on input, FCN should calculate the jacobian at X and +! return this matrix in FJAC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) N, the number of functions and variables. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(N), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an N by N array which contains +! the orthogonal matrix Q produced by the QR factorization of the final +! approximate jacobian. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least N. +! +! Input, real ( kind = 8 ) TOL. Termination occurs when the algorithm +! estimates that the relative error between X and the solution is at most +! TOL. TOL should be nonnegative. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, algorithm estimates that the relative error between X and the +! solution is at most TOL. +! 2, number of calls to FCN with IFLAG = 1 has reached 100*(N+1). +! 3, TOL is too small. No further improvement in the approximate +! solution X is possible. +! 4, iteration is not making good progress. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) n + + real ( kind = 8 ) diag(n) + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fvec(n) + integer ( kind = 4 ) info + integer ( kind = 4 ) j + integer ( kind = 4 ) lr + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) r((n*(n+1))/2) + real ( kind = 8 ) tol + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + info = 0 + + if ( n <= 0 ) then + return + else if ( ldfjac < n ) then + return + else if ( tol < 0.0D+00 ) then + return + end if + + maxfev = 100 * ( n + 1 ) + xtol = tol + mode = 2 + diag(1:n) = 1.0D+00 + factor = 100.0D+00 + nprint = 0 + lr = ( n * ( n + 1 ) ) / 2 + + call hybrj ( fcn, n, x, fvec, fjac, ldfjac, xtol, maxfev, diag, mode, & + factor, nprint, info, nfev, njev, r, lr, qtf ) + + if ( info == 5 ) then + info = 4 + end if + + return +end +subroutine lmder ( fcn, m, n, x, fvec, fjac, ldfjac, ftol, xtol, gtol, maxfev, & + diag, mode, factor, nprint, info, nfev, njev, ipvt, qtf ) + +!*****************************************************************************80 +! +!! LMDER minimizes M functions in N variables by the Levenberg-Marquardt method. +! +! Discussion: +! +! LMDER minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm. +! The user must provide a subroutine which calculates the functions +! and the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the jacobian. FCN should have the form: +! subroutine fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) +! integer ( kind = 4 ) ldfjac +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjac(ldfjac,n) +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If IFLAG = 2 on input, FCN should calculate the jacobian at X and +! return this matrix in FJAC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) M, is the number of functions. +! +! Input, integer ( kind = 4 ) N, is the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an M by N array. The upper +! N by N submatrix of FJAC contains an upper triangular matrix R with +! diagonal elements of nonincreasing magnitude such that +! P' * ( JAC' * JAC ) * P = R' * R, +! where P is a permutation matrix and JAC is the final calculated jacobian. +! Column J of P is column IPVT(J) of the identity matrix. The lower +! trapezoidal part of FJAC contains information generated during +! the computation of R. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least M. +! +! Input, real ( kind = 8 ) FTOL. Termination occurs when both the actual +! and predicted relative reductions in the sum of squares are at most FTOL. +! Therefore, FTOL measures the relative error desired in the sum of +! squares. FTOL should be nonnegative. +! +! Input, real ( kind = 8 ) XTOL. Termination occurs when the relative error +! between two consecutive iterates is at most XTOL. XTOL should be +! nonnegative. +! +! Input, real ( kind = 8 ) GTOL. Termination occurs when the cosine of the +! angle between FVEC and any column of the jacobian is at most GTOL in +! absolute value. Therefore, GTOL measures the orthogonality desired +! between the function vector and the columns of the jacobian. GTOL should +! be nonnegative. +! +! Input, integer ( kind = 4 ) MAXFEV. Termination occurs when the number of +! calls to FCN with IFLAG = 1 is at least MAXFEV by the end of an iteration. +! +! Input/output, real ( kind = 8 ) DIAG(N). If MODE = 1, then DIAG is set +! internally. If MODE = 2, then DIAG must contain positive entries that +! serve as multiplicative scale factors for the variables. +! +! Input, integer ( kind = 4 ) MODE, scaling option. +! 1, variables will be scaled internally. +! 2, scaling is specified by the input DIAG vector. +! +! Input, real ( kind = 8 ) FACTOR, determines the initial step bound. This +! bound is set to the product of FACTOR and the euclidean norm of DIAG*X if +! nonzero, or else to FACTOR itself. In most cases, FACTOR should lie +! in the interval (0.1, 100) with 100 the recommended value. +! +! Input, integer ( kind = 4 ) NPRINT, enables controlled printing of iterates +! if it is positive. In this case, FCN is called with IFLAG = 0 at the +! beginning of the first iteration and every NPRINT iterations thereafter +! and immediately prior to return, with X and FVEC available +! for printing. If NPRINT is not positive, no special calls +! of FCN with IFLAG = 0 are made. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, both actual and predicted relative reductions in the sum of +! squares are at most FTOL. +! 2, relative error between two consecutive iterates is at most XTOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, the cosine of the angle between FVEC and any column of the jacobian +! is at most GTOL in absolute value. +! 5, number of calls to FCN with IFLAG = 1 has reached MAXFEV. +! 6, FTOL is too small. No further reduction in the sum of squares +! is possible. +! 7, XTOL is too small. No further improvement in the approximate +! solution X is possible. +! 8, GTOL is too small. FVEC is orthogonal to the columns of the +! jacobian to machine precision. +! +! Output, integer ( kind = 4 ) NFEV, the number of calls to FCN with +! IFLAG = 1. +! +! Output, integer ( kind = 4 ) NJEV, the number of calls to FCN with +! IFLAG = 2. +! +! Output, integer ( kind = 4 ) IPVT(N), defines a permutation matrix P +! such that JAC*P = Q*R, where JAC is the final calculated jacobian, Q is +! orthogonal (not stored), and R is upper triangular with diagonal +! elements of nonincreasing magnitude. Column J of P is column +! IPVT(J) of the identity matrix. +! +! Output, real ( kind = 8 ) QTF(N), contains the first N elements of Q'*FVEC. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) actred + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) dirder + real ( kind = 8 ) enorm + real ( kind = 8 ) epsmch + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fnorm + real ( kind = 8 ) fnorm1 + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gnorm + real ( kind = 8 ) gtol + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) iter + integer ( kind = 4 ) j + integer ( kind = 4 ) l + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + real ( kind = 8 ) par + logical pivot + real ( kind = 8 ) pnorm + real ( kind = 8 ) prered + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) ratio + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) temp1 + real ( kind = 8 ) temp2 + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) wa3(n) + real ( kind = 8 ) wa4(m) + real ( kind = 8 ) xnorm + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + epsmch = epsilon ( epsmch ) + + info = 0 + iflag = 0 + nfev = 0 + njev = 0 +! +! Check the input parameters for errors. +! + if ( n <= 0 ) then + go to 300 + end if + + if ( m < n ) then + go to 300 + end if + + if ( ldfjac < m & + .or. ftol < 0.0D+00 .or. xtol < 0.0D+00 .or. gtol < 0.0D+00 & + .or. maxfev <= 0 .or. factor <= 0.0D+00 ) then + go to 300 + end if + + if ( mode == 2 ) then + do j = 1, n + if ( diag(j) <= 0.0D+00 ) then + go to 300 + end if + end do + end if +! +! Evaluate the function at the starting point and calculate its norm. +! + iflag = 1 + call fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) + nfev = 1 + if ( iflag < 0 ) then + go to 300 + end if + + fnorm = enorm ( m, fvec ) +! +! Initialize Levenberg-Marquardt parameter and iteration counter. +! + par = 0.0D+00 + iter = 1 +! +! Beginning of the outer loop. +! +30 continue +! +! Calculate the jacobian matrix. +! + iflag = 2 + call fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) + + njev = njev + 1 + + if ( iflag < 0 ) then + go to 300 + end if +! +! If requested, call FCN to enable printing of iterates. +! + if ( 0 < nprint ) then + iflag = 0 + if ( mod ( iter - 1, nprint ) == 0 ) then + call fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) + end if + if ( iflag < 0 ) then + go to 300 + end if + end if +! +! Compute the QR factorization of the jacobian. +! + pivot = .true. + call qrfac ( m, n, fjac, ldfjac, pivot, ipvt, n, wa1, wa2 ) +! +! On the first iteration and if mode is 1, scale according +! to the norms of the columns of the initial jacobian. +! + if ( iter == 1 ) then + + if ( mode /= 2 ) then + diag(1:n) = wa2(1:n) + do j = 1, n + if ( wa2(j) == 0.0D+00 ) then + diag(j) = 1.0D+00 + end if + end do + end if +! +! On the first iteration, calculate the norm of the scaled X +! and initialize the step bound DELTA. +! + wa3(1:n) = diag(1:n) * x(1:n) + + xnorm = enorm ( n, wa3 ) + delta = factor * xnorm + if ( delta == 0.0D+00 ) then + delta = factor + end if + + end if +! +! Form Q'*FVEC and store the first N components in QTF. +! + wa4(1:m) = fvec(1:m) + + do j = 1, n + + if ( fjac(j,j) /= 0.0D+00 ) then + sum2 = dot_product ( wa4(j:m), fjac(j:m,j) ) + temp = - sum2 / fjac(j,j) + wa4(j:m) = wa4(j:m) + fjac(j:m,j) * temp + end if + + fjac(j,j) = wa1(j) + qtf(j) = wa4(j) + + end do +! +! Compute the norm of the scaled gradient. +! + gnorm = 0.0D+00 + + if ( fnorm /= 0.0D+00 ) then + + do j = 1, n + l = ipvt(j) + if ( wa2(l) /= 0.0D+00 ) then + sum2 = dot_product ( qtf(1:j), fjac(1:j,j) ) / fnorm + gnorm = max ( gnorm, abs ( sum2 / wa2(l) ) ) + end if + end do + + end if +! +! Test for convergence of the gradient norm. +! + if ( gnorm <= gtol ) then + info = 4 + go to 300 + end if +! +! Rescale if necessary. +! + if ( mode /= 2 ) then + do j = 1, n + diag(j) = max ( diag(j), wa2(j) ) + end do + end if +! +! Beginning of the inner loop. +! +200 continue +! +! Determine the Levenberg-Marquardt parameter. +! + call lmpar ( n, fjac, ldfjac, ipvt, diag, qtf, delta, par, wa1, wa2 ) +! +! Store the direction p and x + p. calculate the norm of p. +! + wa1(1:n) = - wa1(1:n) + wa2(1:n) = x(1:n) + wa1(1:n) + wa3(1:n) = diag(1:n) * wa1(1:n) + + pnorm = enorm ( n, wa3 ) +! +! On the first iteration, adjust the initial step bound. +! + if ( iter == 1 ) then + delta = min ( delta, pnorm ) + end if +! +! Evaluate the function at x + p and calculate its norm. +! + iflag = 1 + call fcn ( m, n, wa2, wa4, fjac, ldfjac, iflag ) + + nfev = nfev + 1 + + if ( iflag < 0 ) then + go to 300 + end if + + fnorm1 = enorm ( m, wa4 ) +! +! Compute the scaled actual reduction. +! + actred = -1.0D+00 + if ( 0.1D+00 * fnorm1 < fnorm ) then + actred = 1.0D+00 - ( fnorm1 / fnorm ) ** 2 + end if +! +! Compute the scaled predicted reduction and +! the scaled directional derivative. +! + do j = 1, n + wa3(j) = 0.0D+00 + l = ipvt(j) + temp = wa1(l) + wa3(1:j) = wa3(1:j) + fjac(1:j,j) * temp + end do + + temp1 = enorm ( n, wa3 ) / fnorm + temp2 = ( sqrt ( par ) * pnorm ) / fnorm + prered = temp1 ** 2 + temp2 ** 2 / 0.5D+00 + dirder = - ( temp1 ** 2 + temp2 ** 2 ) +! +! Compute the ratio of the actual to the predicted reduction. +! + if ( prered /= 0.0D+00 ) then + ratio = actred / prered + else + ratio = 0.0D+00 + end if +! +! Update the step bound. +! + if ( ratio <= 0.25D+00 ) then + + if ( 0.0D+00 <= actred ) then + temp = 0.5D+00 + end if + + if ( actred < 0.0D+00 ) then + temp = 0.5D+00 * dirder / ( dirder + 0.5D+00 * actred ) + end if + + if ( 0.1D+00 * fnorm1 >= fnorm .or. temp < 0.1D+00 ) then + temp = 0.1D+00 + end if + + delta = temp * min ( delta, pnorm / 0.1D+00 ) + par = par / temp + + else + + if ( par == 0.0D+00 .or. ratio >= 0.75D+00 ) then + delta = 2.0D+00 * pnorm + par = 0.5D+00 * par + end if + + end if +! +! Successful iteration. +! +! Update X, FVEC, and their norms. +! + if ( 0.0001D+00 <= ratio ) then + x(1:n) = wa2(1:n) + wa2(1:n) = diag(1:n) * x(1:n) + fvec(1:m) = wa4(1:m) + xnorm = enorm ( n, wa2 ) + fnorm = fnorm1 + iter = iter + 1 + end if +! +! Tests for convergence. +! + if ( abs ( actred) <= ftol .and. & + prered <= ftol .and. & + 0.5D+00 * ratio <= 1.0D+00 ) then + info = 1 + end if + + if ( delta <= xtol * xnorm ) then + info = 2 + end if + + if ( abs ( actred) <= ftol .and. prered <= ftol & + .and. 0.5D+00 * ratio <= 1.0D+00 .and. info == 2 ) then + info = 3 + end if + + if ( info /= 0 ) then + go to 300 + end if +! +! Tests for termination and stringent tolerances. +! + if ( nfev >= maxfev ) then + info = 5 + end if + + if ( abs ( actred ) <= epsmch .and. prered <= epsmch & + .and. 0.5D+00 * ratio <= 1.0D+00 ) then + info = 6 + end if + + if ( delta <= epsmch * xnorm ) then + info = 7 + end if + + if ( gnorm <= epsmch ) then + info = 8 + end if + + if ( info /= 0 ) then + go to 300 + end if +! +! End of the inner loop. repeat if iteration unsuccessful. +! + if ( ratio < 0.0001D+00 ) then + go to 200 + end if +! +! End of the outer loop. +! + go to 30 + + 300 continue +! +! Termination, either normal or user imposed. +! + if ( iflag < 0 ) then + info = iflag + end if + + iflag = 0 + + if ( 0 < nprint ) then + call fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) + end if + + return +end +subroutine lmder1 ( fcn, m, n, x, fvec, fjac, ldfjac, tol, info ) + +!*****************************************************************************80 +! +!! LMDER1 minimizes M functions in N variables by Levenberg-Marquardt method. +! +! Discussion: +! +! LMDER1 minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm. +! This is done by using the more general least-squares solver LMDER. +! The user must provide a subroutine which calculates the functions +! and the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the jacobian. FCN should have the form: +! subroutine fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) +! integer ( kind = 4 ) ldfjac +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjac(ldfjac,n) +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If IFLAG = 2 on input, FCN should calculate the jacobian at X and +! return this matrix in FJAC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) M, the number of functions. +! +! Input, integer ( kind = 4 ) N, is the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an M by N array. The upper +! N by N submatrix contains an upper triangular matrix R with +! diagonal elements of nonincreasing magnitude such that +! P' * ( JAC' * JAC ) * P = R' * R, +! where P is a permutation matrix and JAC is the final calculated +! jacobian. Column J of P is column IPVT(J) of the identity matrix. +! The lower trapezoidal part of FJAC contains information generated during +! the computation of R. +! +! Input, integer ( kind = 4 ) LDFJAC, is the leading dimension of FJAC, +! which must be no less than M. +! +! Input, real ( kind = 8 ) TOL. Termination occurs when the algorithm +! estimates either that the relative error in the sum of squares is at +! most TOL or that the relative error between X and the solution is at +! most TOL. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, algorithm estimates that the relative error in the sum of squares +! is at most TOL. +! 2, algorithm estimates that the relative error between X and the +! solution is at most TOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, FVEC is orthogonal to the columns of the jacobian to machine precision. +! 5, number of calls to FCN with IFLAG = 1 has reached 100*(N+1). +! 6, TOL is too small. No further reduction in the sum of squares is +! possible. +! 7, TOL is too small. No further improvement in the approximate +! solution X is possible. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) diag(n) + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gtol + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) tol + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + info = 0 + + if ( n <= 0 ) then + return + else if ( m < n ) then + return + else if ( ldfjac < m ) then + return + else if ( tol < 0.0D+00 ) then + return + end if + + factor = 100.0D+00 + maxfev = 100 * ( n + 1 ) + ftol = tol + xtol = tol + gtol = 0.0D+00 + mode = 1 + nprint = 0 + + call lmder ( fcn, m, n, x, fvec, fjac, ldfjac, ftol, xtol, gtol, maxfev, & + diag, mode, factor, nprint, info, nfev, njev, ipvt, qtf ) + + if ( info == 8 ) then + info = 4 + end if + + return +end +subroutine lmdif ( fcn, m, n, x, fvec, ftol, xtol, gtol, maxfev, epsfcn, & + diag, mode, factor, nprint, info, nfev, fjac, ldfjac, ipvt, qtf ) + +!*****************************************************************************80 +! +!! LMDIF minimizes M functions in N variables by the Levenberg-Marquardt method. +! +! Discussion: +! +! LMDIF minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm. +! The user must provide a subroutine which calculates the functions. +! The jacobian is then calculated by a forward-difference approximation. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( m, n, x, fvec, iflag ) +! integer ( kind = 4 ) m +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) M, the number of functions. +! +! Input, integer ( kind = 4 ) N, the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Input, real ( kind = 8 ) FTOL. Termination occurs when both the actual +! and predicted relative reductions in the sum of squares are at most FTOL. +! Therefore, FTOL measures the relative error desired in the sum of +! squares. FTOL should be nonnegative. +! +! Input, real ( kind = 8 ) XTOL. Termination occurs when the relative error +! between two consecutive iterates is at most XTOL. Therefore, XTOL +! measures the relative error desired in the approximate solution. XTOL +! should be nonnegative. +! +! Input, real ( kind = 8 ) GTOL. termination occurs when the cosine of the +! angle between FVEC and any column of the jacobian is at most GTOL in +! absolute value. Therefore, GTOL measures the orthogonality desired +! between the function vector and the columns of the jacobian. GTOL should +! be nonnegative. +! +! Input, integer ( kind = 4 ) MAXFEV. Termination occurs when the number of +! calls to FCN is at least MAXFEV by the end of an iteration. +! +! Input, real ( kind = 8 ) EPSFCN, is used in determining a suitable step +! length for the forward-difference approximation. This approximation +! assumes that the relative errors in the functions are of the order of +! EPSFCN. If EPSFCN is less than the machine precision, it is assumed that +! the relative errors in the functions are of the order of the machine +! precision. +! +! Input/output, real ( kind = 8 ) DIAG(N). If MODE = 1, then DIAG is set +! internally. If MODE = 2, then DIAG must contain positive entries that +! serve as multiplicative scale factors for the variables. +! +! Input, integer ( kind = 4 ) MODE, scaling option. +! 1, variables will be scaled internally. +! 2, scaling is specified by the input DIAG vector. +! +! Input, real ( kind = 8 ) FACTOR, determines the initial step bound. +! This bound is set to the product of FACTOR and the euclidean norm of +! DIAG*X if nonzero, or else to FACTOR itself. In most cases, FACTOR should +! lie in the interval (0.1, 100) with 100 the recommended value. +! +! Input, integer ( kind = 4 ) NPRINT, enables controlled printing of iterates +! if it is positive. In this case, FCN is called with IFLAG = 0 at the +! beginning of the first iteration and every NPRINT iterations thereafter +! and immediately prior to return, with X and FVEC available +! for printing. If NPRINT is not positive, no special calls +! of FCN with IFLAG = 0 are made. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, both actual and predicted relative reductions in the sum of squares +! are at most FTOL. +! 2, relative error between two consecutive iterates is at most XTOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, the cosine of the angle between FVEC and any column of the jacobian +! is at most GTOL in absolute value. +! 5, number of calls to FCN has reached or exceeded MAXFEV. +! 6, FTOL is too small. No further reduction in the sum of squares +! is possible. +! 7, XTOL is too small. No further improvement in the approximate +! solution X is possible. +! 8, GTOL is too small. FVEC is orthogonal to the columns of the +! jacobian to machine precision. +! +! Output, integer ( kind = 4 ) NFEV, the number of calls to FCN. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an M by N array. The upper +! N by N submatrix of FJAC contains an upper triangular matrix R with +! diagonal elements of nonincreasing magnitude such that +! +! P' * ( JAC' * JAC ) * P = R' * R, +! +! where P is a permutation matrix and JAC is the final calculated jacobian. +! Column J of P is column IPVT(J) of the identity matrix. The lower +! trapezoidal part of FJAC contains information generated during +! the computation of R. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least M. +! +! Output, integer ( kind = 4 ) IPVT(N), defines a permutation matrix P such +! that JAC * P = Q * R, where JAC is the final calculated jacobian, Q is +! orthogonal (not stored), and R is upper triangular with diagonal +! elements of nonincreasing magnitude. Column J of P is column IPVT(J) +! of the identity matrix. +! +! Output, real ( kind = 8 ) QTF(N), the first N elements of Q'*FVEC. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) actred + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) dirder + real ( kind = 8 ) enorm + real ( kind = 8 ) epsfcn + real ( kind = 8 ) epsmch + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fnorm + real ( kind = 8 ) fnorm1 + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gnorm + real ( kind = 8 ) gtol + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) iter + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) j + integer ( kind = 4 ) l + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) nprint + real ( kind = 8 ) par + logical pivot + real ( kind = 8 ) pnorm + real ( kind = 8 ) prered + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) ratio + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) temp1 + real ( kind = 8 ) temp2 + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) wa3(n) + real ( kind = 8 ) wa4(m) + real ( kind = 8 ) x(n) + real ( kind = 8 ) xnorm + real ( kind = 8 ) xtol + + epsmch = epsilon ( epsmch ) + + info = 0 + iflag = 0 + nfev = 0 + + if ( n <= 0 ) then + go to 300 + else if ( m < n ) then + go to 300 + else if ( ldfjac < m ) then + go to 300 + else if ( ftol < 0.0D+00 ) then + go to 300 + else if ( xtol < 0.0D+00 ) then + go to 300 + else if ( gtol < 0.0D+00 ) then + go to 300 + else if ( maxfev <= 0 ) then + go to 300 + else if ( factor <= 0.0D+00 ) then + go to 300 + end if + + if ( mode == 2 ) then + do j = 1, n + if ( diag(j) <= 0.0D+00 ) then + go to 300 + end if + end do + end if +! +! Evaluate the function at the starting point and calculate its norm. +! + iflag = 1 + call fcn ( m, n, x, fvec, iflag ) + nfev = 1 + + if ( iflag < 0 ) then + go to 300 + end if + + fnorm = enorm ( m, fvec ) +! +! Initialize Levenberg-Marquardt parameter and iteration counter. +! + par = 0.0D+00 + iter = 1 +! +! Beginning of the outer loop. +! +30 continue +! +! Calculate the jacobian matrix. +! + iflag = 2 + call fdjac2 ( fcn, m, n, x, fvec, fjac, ldfjac, iflag, epsfcn ) + nfev = nfev + n + + if ( iflag < 0 ) then + go to 300 + end if +! +! If requested, call FCN to enable printing of iterates. +! + if ( 0 < nprint ) then + iflag = 0 + if ( mod ( iter - 1, nprint ) == 0 ) then + call fcn ( m, n, x, fvec, iflag ) + end if + if ( iflag < 0 ) then + go to 300 + end if + end if +! +! Compute the QR factorization of the jacobian. +! + pivot = .true. + call qrfac ( m, n, fjac, ldfjac, pivot, ipvt, n, wa1, wa2 ) +! +! On the first iteration and if MODE is 1, scale according +! to the norms of the columns of the initial jacobian. +! + if ( iter == 1 ) then + + if ( mode /= 2 ) then + diag(1:n) = wa2(1:n) + do j = 1, n + if ( wa2(j) == 0.0D+00 ) then + diag(j) = 1.0D+00 + end if + end do + end if +! +! On the first iteration, calculate the norm of the scaled X +! and initialize the step bound DELTA. +! + wa3(1:n) = diag(1:n) * x(1:n) + xnorm = enorm ( n, wa3 ) + delta = factor * xnorm + if ( delta == 0.0D+00 ) then + delta = factor + end if + end if +! +! Form Q' * FVEC and store the first N components in QTF. +! + wa4(1:m) = fvec(1:m) + + do j = 1, n + + if ( fjac(j,j) /= 0.0D+00 ) then + sum2 = dot_product ( wa4(j:m), fjac(j:m,j) ) + temp = - sum2 / fjac(j,j) + wa4(j:m) = wa4(j:m) + fjac(j:m,j) * temp + end if + + fjac(j,j) = wa1(j) + qtf(j) = wa4(j) + + end do +! +! Compute the norm of the scaled gradient. +! + gnorm = 0.0D+00 + + if ( fnorm /= 0.0D+00 ) then + + do j = 1, n + + l = ipvt(j) + + if ( wa2(l) /= 0.0D+00 ) then + sum2 = 0.0D+00 + do i = 1, j + sum2 = sum2 + fjac(i,j) * ( qtf(i) / fnorm ) + end do + gnorm = max ( gnorm, abs ( sum2 / wa2(l) ) ) + end if + + end do + + end if +! +! Test for convergence of the gradient norm. +! + if ( gnorm <= gtol ) then + info = 4 + go to 300 + end if +! +! Rescale if necessary. +! + if ( mode /= 2 ) then + do j = 1, n + diag(j) = max ( diag(j), wa2(j) ) + end do + end if +! +! Beginning of the inner loop. +! +200 continue +! +! Determine the Levenberg-Marquardt parameter. +! + call lmpar ( n, fjac, ldfjac, ipvt, diag, qtf, delta, par, wa1, wa2 ) +! +! Store the direction P and X + P. +! Calculate the norm of P. +! + wa1(1:n) = -wa1(1:n) + wa2(1:n) = x(1:n) + wa1(1:n) + wa3(1:n) = diag(1:n) * wa1(1:n) + + pnorm = enorm ( n, wa3 ) +! +! On the first iteration, adjust the initial step bound. +! + if ( iter == 1 ) then + delta = min ( delta, pnorm ) + end if +! +! Evaluate the function at X + P and calculate its norm. +! + iflag = 1 + call fcn ( m, n, wa2, wa4, iflag ) + nfev = nfev + 1 + if ( iflag < 0 ) then + go to 300 + end if + fnorm1 = enorm ( m, wa4 ) +! +! Compute the scaled actual reduction. +! + if ( 0.1D+00 * fnorm1 < fnorm ) then + actred = 1.0D+00 - ( fnorm1 / fnorm ) ** 2 + else + actred = -1.0D+00 + end if +! +! Compute the scaled predicted reduction and the scaled directional derivative. +! + do j = 1, n + wa3(j) = 0.0D+00 + l = ipvt(j) + temp = wa1(l) + wa3(1:j) = wa3(1:j) + fjac(1:j,j) * temp + end do + + temp1 = enorm ( n, wa3 ) / fnorm + temp2 = ( sqrt ( par ) * pnorm ) / fnorm + prered = temp1 ** 2 + temp2 ** 2 / 0.5D+00 + dirder = - ( temp1 ** 2 + temp2 ** 2 ) +! +! Compute the ratio of the actual to the predicted reduction. +! + ratio = 0.0D+00 + if ( prered /= 0.0D+00 ) then + ratio = actred / prered + end if +! +! Update the step bound. +! + if ( ratio <= 0.25D+00 ) then + + if ( actred >= 0.0D+00 ) then + temp = 0.5D+00 + endif + + if ( actred < 0.0D+00 ) then + temp = 0.5D+00 * dirder / ( dirder + 0.5D+00 * actred ) + end if + + if ( 0.1D+00 * fnorm1 >= fnorm .or. temp < 0.1D+00 ) then + temp = 0.1D+00 + end if + + delta = temp * min ( delta, pnorm / 0.1D+00 ) + par = par / temp + + else + + if ( par == 0.0D+00 .or. ratio >= 0.75D+00 ) then + delta = 2.0D+00 * pnorm + par = 0.5D+00 * par + end if + + end if +! +! Test for successful iteration. +! + +! +! Successful iteration. update X, FVEC, and their norms. +! + if ( 0.0001D+00 <= ratio ) then + x(1:n) = wa2(1:n) + wa2(1:n) = diag(1:n) * x(1:n) + fvec(1:m) = wa4(1:m) + xnorm = enorm ( n, wa2 ) + fnorm = fnorm1 + iter = iter + 1 + end if +! +! Tests for convergence. +! + if ( abs ( actred) <= ftol .and. prered <= ftol & + .and. 0.5D+00 * ratio <= 1.0D+00 ) then + info = 1 + end if + + if ( delta <= xtol * xnorm ) then + info = 2 + end if + + if ( abs ( actred) <= ftol .and. prered <= ftol & + .and. 0.5D+00 * ratio <= 1.0D+00 .and. info == 2 ) info = 3 + + if ( info /= 0 ) then + go to 300 + end if +! +! Tests for termination and stringent tolerances. +! + if ( maxfev <= nfev ) then + info = 5 + end if + + if ( abs ( actred) <= epsmch .and. prered <= epsmch & + .and. 0.5D+00 * ratio <= 1.0D+00 ) then + info = 6 + end if + + if ( delta <= epsmch * xnorm ) then + info = 7 + end if + + if ( gnorm <= epsmch ) then + info = 8 + end if + + if ( info /= 0 ) then + go to 300 + end if +! +! End of the inner loop. Repeat if iteration unsuccessful. +! + if ( ratio < 0.0001D+00 ) then + go to 200 + end if +! +! End of the outer loop. +! + go to 30 + +300 continue +! +! Termination, either normal or user imposed. +! + if ( iflag < 0 ) then + info = iflag + end if + + iflag = 0 + + if ( 0 < nprint ) then + call fcn ( m, n, x, fvec, iflag ) + end if + + return +end +subroutine lmdif1 ( fcn, m, n, x, fvec, tol, info ) + +!*****************************************************************************80 +! +!! LMDIF1 minimizes M functions in N variables using Levenberg-Marquardt method. +! +! Discussion: +! +! LMDIF1 minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm. +! This is done by using the more general least-squares solver LMDIF. +! The user must provide a subroutine which calculates the functions. +! The jacobian is then calculated by a forward-difference approximation. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( m, n, x, fvec, iflag ) +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) M, the number of functions. +! +! Input, integer ( kind = 4 ) N, the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Input, real ( kind = 8 ) TOL. Termination occurs when the algorithm +! estimates either that the relative error in the sum of squares is at +! most TOL or that the relative error between X and the solution is at +! most TOL. TOL should be nonnegative. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, algorithm estimates that the relative error in the sum of squares +! is at most TOL. +! 2, algorithm estimates that the relative error between X and the +! solution is at most TOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, FVEC is orthogonal to the columns of the jacobian to machine precision. +! 5, number of calls to FCN has reached or exceeded 200*(N+1). +! 6, TOL is too small. No further reduction in the sum of squares +! is possible. +! 7, TOL is too small. No further improvement in the approximate +! solution X is possible. +! + implicit none + + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) diag(n) + real ( kind = 8 ) epsfcn + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(m,n) + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gtol + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) nprint + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) tol + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + info = 0 + + if ( n <= 0 ) then + return + else if ( m < n ) then + return + else if ( tol < 0.0D+00 ) then + return + end if + + factor = 100.0D+00 + maxfev = 200 * ( n + 1 ) + ftol = tol + xtol = tol + gtol = 0.0D+00 + epsfcn = 0.0D+00 + mode = 1 + nprint = 0 + ldfjac = m + + call lmdif ( fcn, m, n, x, fvec, ftol, xtol, gtol, maxfev, epsfcn, & + diag, mode, factor, nprint, info, nfev, fjac, ldfjac, ipvt, qtf ) + + if ( info == 8 ) then + info = 4 + end if + + return +end +subroutine lmpar ( n, r, ldr, ipvt, diag, qtb, delta, par, x, sdiag ) + +!*****************************************************************************80 +! +!! LMPAR computes a parameter for the Levenberg-Marquardt method. +! +! Discussion: +! +! Given an M by N matrix A, an N by N nonsingular diagonal +! matrix D, an M-vector B, and a positive number DELTA, +! the problem is to determine a value for the parameter +! PAR such that if X solves the system +! +! A*X = B, +! sqrt ( PAR ) * D * X = 0, +! +! in the least squares sense, and DXNORM is the euclidean +! norm of D*X, then either PAR is zero and +! +! ( DXNORM - DELTA ) <= 0.1 * DELTA, +! +! or PAR is positive and +! +! abs ( DXNORM - DELTA) <= 0.1 * DELTA. +! +! This function completes the solution of the problem +! if it is provided with the necessary information from the +! QR factorization, with column pivoting, of A. That is, if +! A*P = Q*R, where P is a permutation matrix, Q has orthogonal +! columns, and R is an upper triangular matrix with diagonal +! elements of nonincreasing magnitude, then LMPAR expects +! the full upper triangle of R, the permutation matrix P, +! and the first N components of Q'*B. On output +! LMPAR also provides an upper triangular matrix S such that +! +! P' * ( A' * A + PAR * D * D ) * P = S'* S. +! +! S is employed within LMPAR and may be of separate interest. +! +! Only a few iterations are generally needed for convergence +! of the algorithm. +! +! If, however, the limit of 10 iterations is reached, then the output +! PAR will contain the best value obtained so far. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 24 January 2014 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, the order of R. +! +! Input/output, real ( kind = 8 ) R(LDR,N),the N by N matrix. The full +! upper triangle must contain the full upper triangle of the matrix R. +! On output the full upper triangle is unaltered, and the strict lower +! triangle contains the strict upper triangle (transposed) of the upper +! triangular matrix S. +! +! Input, integer ( kind = 4 ) LDR, the leading dimension of R. LDR must be +! no less than N. +! +! Input, integer ( kind = 4 ) IPVT(N), defines the permutation matrix P +! such that A*P = Q*R. Column J of P is column IPVT(J) of the +! identity matrix. +! +! Input, real ( kind = 8 ) DIAG(N), the diagonal elements of the matrix D. +! +! Input, real ( kind = 8 ) QTB(N), the first N elements of the vector Q'*B. +! +! Input, real ( kind = 8 ) DELTA, an upper bound on the euclidean norm +! of D*X. DELTA should be positive. +! +! Input/output, real ( kind = 8 ) PAR. On input an initial estimate of the +! Levenberg-Marquardt parameter. On output the final estimate. +! PAR should be nonnegative. +! +! Output, real ( kind = 8 ) X(N), the least squares solution of the system +! A*X = B, sqrt(PAR)*D*X = 0, for the output value of PAR. +! +! Output, real ( kind = 8 ) SDIAG(N), the diagonal elements of the upper +! triangular matrix S. +! + implicit none + + integer ( kind = 4 ) ldr + integer ( kind = 4 ) n + + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) dwarf + real ( kind = 8 ) dxnorm + real ( kind = 8 ) enorm + real ( kind = 8 ) gnorm + real ( kind = 8 ) fp + integer ( kind = 4 ) i + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) iter + integer ( kind = 4 ) j + integer ( kind = 4 ) k + integer ( kind = 4 ) l + integer ( kind = 4 ) nsing + real ( kind = 8 ) par + real ( kind = 8 ) parc + real ( kind = 8 ) parl + real ( kind = 8 ) paru + real ( kind = 8 ) qnorm + real ( kind = 8 ) qtb(n) + real ( kind = 8 ) r(ldr,n) + real ( kind = 8 ) sdiag(n) + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) x(n) +! +! DWARF is the smallest positive magnitude. +! + dwarf = tiny ( dwarf ) +! +! Compute and store in X the Gauss-Newton direction. +! +! If the jacobian is rank-deficient, obtain a least squares solution. +! + nsing = n + + do j = 1, n + wa1(j) = qtb(j) + if ( r(j,j) == 0.0D+00 .and. nsing == n ) then + nsing = j - 1 + end if + if ( nsing < n ) then + wa1(j) = 0.0D+00 + end if + end do + + do k = 1, nsing + j = nsing - k + 1 + wa1(j) = wa1(j) / r(j,j) + temp = wa1(j) + wa1(1:j-1) = wa1(1:j-1) - r(1:j-1,j) * temp + end do + + do j = 1, n + l = ipvt(j) + x(l) = wa1(j) + end do +! +! Initialize the iteration counter. +! Evaluate the function at the origin, and test +! for acceptance of the Gauss-Newton direction. +! + iter = 0 + wa2(1:n) = diag(1:n) * x(1:n) + dxnorm = enorm ( n, wa2 ) + fp = dxnorm - delta + + if ( fp <= 0.1D+00 * delta ) then + if ( iter == 0 ) then + par = 0.0D+00 + end if + return + end if +! +! If the jacobian is not rank deficient, the Newton +! step provides a lower bound, PARL, for the zero of +! the function. +! +! Otherwise set this bound to zero. +! + parl = 0.0D+00 + + if ( n <= nsing ) then + + do j = 1, n + l = ipvt(j) + wa1(j) = diag(l) * ( wa2(l) / dxnorm ) + end do + + do j = 1, n + sum2 = dot_product ( wa1(1:j-1), r(1:j-1,j) ) + wa1(j) = ( wa1(j) - sum2 ) / r(j,j) + end do + + temp = enorm ( n, wa1 ) + parl = ( ( fp / delta ) / temp ) / temp + + end if +! +! Calculate an upper bound, PARU, for the zero of the function. +! + do j = 1, n + sum2 = dot_product ( qtb(1:j), r(1:j,j) ) + l = ipvt(j) + wa1(j) = sum2 / diag(l) + end do + + gnorm = enorm ( n, wa1 ) + paru = gnorm / delta + + if ( paru == 0.0D+00 ) then + paru = dwarf / min ( delta, 0.1D+00 ) + end if +! +! If the input PAR lies outside of the interval (PARL, PARU), +! set PAR to the closer endpoint. +! + par = max ( par, parl ) + par = min ( par, paru ) + if ( par == 0.0D+00 ) then + par = gnorm / dxnorm + end if +! +! Beginning of an iteration. +! + do + + iter = iter + 1 +! +! Evaluate the function at the current value of PAR. +! + if ( par == 0.0D+00 ) then + par = max ( dwarf, 0.001D+00 * paru ) + end if + + wa1(1:n) = sqrt ( par ) * diag(1:n) + + call qrsolv ( n, r, ldr, ipvt, wa1, qtb, x, sdiag ) + + wa2(1:n) = diag(1:n) * x(1:n) + dxnorm = enorm ( n, wa2 ) + temp = fp + fp = dxnorm - delta +! +! If the function is small enough, accept the current value of PAR. +! + if ( abs ( fp ) <= 0.1D+00 * delta ) then + exit + end if +! +! Test for the exceptional cases where PARL +! is zero or the number of iterations has reached 10. +! + if ( parl == 0.0D+00 .and. fp <= temp .and. temp < 0.0D+00 ) then + exit + else if ( iter == 10 ) then + exit + end if +! +! Compute the Newton correction. +! + do j = 1, n + l = ipvt(j) + wa1(j) = diag(l) * ( wa2(l) / dxnorm ) + end do + + do j = 1, n + wa1(j) = wa1(j) / sdiag(j) + temp = wa1(j) + wa1(j+1:n) = wa1(j+1:n) - r(j+1:n,j) * temp + end do + + temp = enorm ( n, wa1 ) + parc = ( ( fp / delta ) / temp ) / temp +! +! Depending on the sign of the function, update PARL or PARU. +! + if ( 0.0D+00 < fp ) then + parl = max ( parl, par ) + else if ( fp < 0.0D+00 ) then + paru = min ( paru, par ) + end if +! +! Compute an improved estimate for PAR. +! + par = max ( parl, par + parc ) +! +! End of an iteration. +! + end do +! +! Termination. +! + if ( iter == 0 ) then + par = 0.0D+00 + end if + + return +end +subroutine lmstr ( fcn, m, n, x, fvec, fjac, ldfjac, ftol, xtol, gtol, maxfev, & + diag, mode, factor, nprint, info, nfev, njev, ipvt, qtf ) + +!*****************************************************************************80 +! +!! LMSTR minimizes M functions in N variables using Levenberg-Marquardt method. +! +! Discussion: +! +! LMSTR minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm +! which uses minimal storage. +! +! The user must provide a subroutine which calculates the functions and +! the rows of the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the rows of the jacobian. +! FCN should have the form: +! subroutine fcn ( m, n, x, fvec, fjrow, iflag ) +! integer ( kind = 4 ) m +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjrow(n) +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If the input value of IFLAG is I > 1, calculate the (I-1)-st row of +! the jacobian at X, and return this vector in FJROW. +! To terminate the algorithm, set the output value of IFLAG negative. +! +! Input, integer ( kind = 4 ) M, the number of functions. +! +! Input, integer ( kind = 4 ) N, the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an N by N array. The upper +! triangle of FJAC contains an upper triangular matrix R such that +! +! P' * ( JAC' * JAC ) * P = R' * R, +! +! where P is a permutation matrix and JAC is the final calculated jacobian. +! Column J of P is column IPVT(J) of the identity matrix. The lower +! triangular part of FJAC contains information generated during +! the computation of R. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least N. +! +! Input, real ( kind = 8 ) FTOL. Termination occurs when both the actual and +! predicted relative reductions in the sum of squares are at most FTOL. +! Therefore, FTOL measures the relative error desired in the sum of +! squares. FTOL should be nonnegative. +! +! Input, real ( kind = 8 ) XTOL. Termination occurs when the relative error +! between two consecutive iterates is at most XTOL. XTOL should be +! nonnegative. +! +! Input, real ( kind = 8 ) GTOL. termination occurs when the cosine of the +! angle between FVEC and any column of the jacobian is at most GTOL in +! absolute value. Therefore, GTOL measures the orthogonality desired +! between the function vector and the columns of the jacobian. GTOL should +! be nonnegative. +! +! Input, integer ( kind = 4 ) MAXFEV. Termination occurs when the number +! of calls to FCN with IFLAG = 1 is at least MAXFEV by the end of +! an iteration. +! +! Input/output, real ( kind = 8 ) DIAG(N). If MODE = 1, then DIAG is set +! internally. If MODE = 2, then DIAG must contain positive entries that +! serve as multiplicative scale factors for the variables. +! +! Input, integer ( kind = 4 ) MODE, scaling option. +! 1, variables will be scaled internally. +! 2, scaling is specified by the input DIAG vector. +! +! Input, real ( kind = 8 ) FACTOR, determines the initial step bound. This +! bound is set to the product of FACTOR and the euclidean norm of DIAG*X if +! nonzero, or else to FACTOR itself. In most cases, FACTOR should lie +! in the interval (0.1, 100) with 100 the recommended value. +! +! Input, integer ( kind = 4 ) NPRINT, enables controlled printing of iterates +! if it is positive. In this case, FCN is called with IFLAG = 0 at the +! beginning of the first iteration and every NPRINT iterations thereafter +! and immediately prior to return, with X and FVEC available +! for printing. If NPRINT is not positive, no special calls +! of FCN with IFLAG = 0 are made. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See the +! description of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, both actual and predicted relative reductions in the sum of squares +! are at most FTOL. +! 2, relative error between two consecutive iterates is at most XTOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, the cosine of the angle between FVEC and any column of the jacobian +! is at most GTOL in absolute value. +! 5, number of calls to FCN with IFLAG = 1 has reached MAXFEV. +! 6, FTOL is too small. No further reduction in the sum of squares is +! possible. +! 7, XTOL is too small. No further improvement in the approximate +! solution X is possible. +! 8, GTOL is too small. FVEC is orthogonal to the columns of the +! jacobian to machine precision. +! +! Output, integer ( kind = 4 ) NFEV, the number of calls to FCN +! with IFLAG = 1. +! +! Output, integer ( kind = 4 ) NJEV, the number of calls to FCN +! with IFLAG = 2. +! +! Output, integer ( kind = 4 ) IPVT(N), defines a permutation matrix P such +! that JAC * P = Q * R, where JAC is the final calculated jacobian, Q is +! orthogonal (not stored), and R is upper triangular. +! Column J of P is column IPVT(J) of the identity matrix. +! +! Output, real ( kind = 8 ) QTF(N), contains the first N elements of Q'*FVEC. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) actred + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) dirder + real ( kind = 8 ) enorm + real ( kind = 8 ) epsmch + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fnorm + real ( kind = 8 ) fnorm1 + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gnorm + real ( kind = 8 ) gtol + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) iter + integer ( kind = 4 ) j + integer ( kind = 4 ) l + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + real ( kind = 8 ) par + logical pivot + real ( kind = 8 ) pnorm + real ( kind = 8 ) prered + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) ratio + logical sing + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) temp1 + real ( kind = 8 ) temp2 + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) wa3(n) + real ( kind = 8 ) wa4(m) + real ( kind = 8 ) x(n) + real ( kind = 8 ) xnorm + real ( kind = 8 ) xtol + + epsmch = epsilon ( epsmch ) + + info = 0 + iflag = 0 + nfev = 0 + njev = 0 +! +! Check the input parameters for errors. +! + if ( n <= 0 ) then + go to 340 + else if ( m < n ) then + go to 340 + else if ( ldfjac < n ) then + go to 340 + else if ( ftol < 0.0D+00 ) then + go to 340 + else if ( xtol < 0.0D+00 ) then + go to 340 + else if ( gtol < 0.0D+00 ) then + go to 340 + else if ( maxfev <= 0 ) then + go to 340 + else if ( factor <= 0.0D+00 ) then + go to 340 + end if + + if ( mode == 2 ) then + do j = 1, n + if ( diag(j) <= 0.0D+00 ) then + go to 340 + end if + end do + end if +! +! Evaluate the function at the starting point and calculate its norm. +! + iflag = 1 + call fcn ( m, n, x, fvec, wa3, iflag ) + nfev = 1 + + if ( iflag < 0 ) then + go to 340 + end if + + fnorm = enorm ( m, fvec ) +! +! Initialize Levenberg-Marquardt parameter and iteration counter. +! + par = 0.0D+00 + iter = 1 +! +! Beginning of the outer loop. +! + 30 continue +! +! If requested, call FCN to enable printing of iterates. +! + if ( 0 < nprint ) then + iflag = 0 + if ( mod ( iter-1, nprint ) == 0 ) then + call fcn ( m, n, x, fvec, wa3, iflag ) + end if + if ( iflag < 0 ) then + go to 340 + end if + end if +! +! Compute the QR factorization of the jacobian matrix calculated one row +! at a time, while simultaneously forming Q'* FVEC and storing +! the first N components in QTF. +! + qtf(1:n) = 0.0D+00 + fjac(1:n,1:n) = 0.0D+00 + iflag = 2 + + do i = 1, m + call fcn ( m, n, x, fvec, wa3, iflag ) + if ( iflag < 0 ) then + go to 340 + end if + temp = fvec(i) + call rwupdt ( n, fjac, ldfjac, wa3, qtf, temp, wa1, wa2 ) + iflag = iflag + 1 + end do + + njev = njev + 1 +! +! If the jacobian is rank deficient, call QRFAC to +! reorder its columns and update the components of QTF. +! + sing = .false. + do j = 1, n + if ( fjac(j,j) == 0.0D+00 ) then + sing = .true. + end if + ipvt(j) = j + wa2(j) = enorm ( j, fjac(1,j) ) + end do + + if ( sing ) then + + pivot = .true. + call qrfac ( n, n, fjac, ldfjac, pivot, ipvt, n, wa1, wa2 ) + + do j = 1, n + + if ( fjac(j,j) /= 0.0D+00 ) then + + sum2 = dot_product ( qtf(j:n), fjac(j:n,j) ) + temp = - sum2 / fjac(j,j) + qtf(j:n) = qtf(j:n) + fjac(j:n,j) * temp + + end if + + fjac(j,j) = wa1(j) + + end do + + end if +! +! On the first iteration +! if mode is 1, +! scale according to the norms of the columns of the initial jacobian. +! calculate the norm of the scaled X, +! initialize the step bound delta. +! + if ( iter == 1 ) then + + if ( mode /= 2 ) then + + diag(1:n) = wa2(1:n) + do j = 1, n + if ( wa2(j) == 0.0D+00 ) then + diag(j) = 1.0D+00 + end if + end do + + end if + + wa3(1:n) = diag(1:n) * x(1:n) + xnorm = enorm ( n, wa3 ) + delta = factor * xnorm + if ( delta == 0.0D+00 ) then + delta = factor + end if + + end if +! +! Compute the norm of the scaled gradient. +! + gnorm = 0.0D+00 + + if ( fnorm /= 0.0D+00 ) then + + do j = 1, n + l = ipvt(j) + if ( wa2(l) /= 0.0D+00 ) then + sum2 = dot_product ( qtf(1:j), fjac(1:j,j) ) / fnorm + gnorm = max ( gnorm, abs ( sum2 / wa2(l) ) ) + end if + end do + + end if +! +! Test for convergence of the gradient norm. +! + if ( gnorm <= gtol ) then + info = 4 + go to 340 + end if +! +! Rescale if necessary. +! + if ( mode /= 2 ) then + do j = 1, n + diag(j) = max ( diag(j), wa2(j) ) + end do + end if +! +! Beginning of the inner loop. +! +240 continue +! +! Determine the Levenberg-Marquardt parameter. +! + call lmpar ( n, fjac, ldfjac, ipvt, diag, qtf, delta, par, wa1, wa2 ) +! +! Store the direction P and X + P. +! Calculate the norm of P. +! + wa1(1:n) = -wa1(1:n) + wa2(1:n) = x(1:n) + wa1(1:n) + wa3(1:n) = diag(1:n) * wa1(1:n) + pnorm = enorm ( n, wa3 ) +! +! On the first iteration, adjust the initial step bound. +! + if ( iter == 1 ) then + delta = min ( delta, pnorm ) + end if +! +! Evaluate the function at X + P and calculate its norm. +! + iflag = 1 + call fcn ( m, n, wa2, wa4, wa3, iflag ) + nfev = nfev + 1 + if ( iflag < 0 ) then + go to 340 + end if + fnorm1 = enorm ( m, wa4 ) +! +! Compute the scaled actual reduction. +! + if ( 0.1D+00 * fnorm1 < fnorm ) then + actred = 1.0D+00 - ( fnorm1 / fnorm ) ** 2 + else + actred = -1.0D+00 + end if +! +! Compute the scaled predicted reduction and +! the scaled directional derivative. +! + do j = 1, n + wa3(j) = 0.0D+00 + l = ipvt(j) + temp = wa1(l) + wa3(1:j) = wa3(1:j) + fjac(1:j,j) * temp + end do + + temp1 = enorm ( n, wa3 ) / fnorm + temp2 = ( sqrt(par) * pnorm ) / fnorm + prered = temp1 ** 2 + temp2 ** 2 / 0.5D+00 + dirder = - ( temp1 ** 2 + temp2 ** 2 ) +! +! Compute the ratio of the actual to the predicted reduction. +! + if ( prered /= 0.0D+00 ) then + ratio = actred / prered + else + ratio = 0.0D+00 + end if +! +! Update the step bound. +! + if ( ratio <= 0.25D+00 ) then + + if ( actred >= 0.0D+00 ) then + temp = 0.5D+00 + else + temp = 0.5D+00 * dirder / ( dirder + 0.5D+00 * actred ) + end if + + if ( 0.1D+00 * fnorm1 >= fnorm .or. temp < 0.1D+00 ) then + temp = 0.1D+00 + end if + + delta = temp * min ( delta, pnorm / 0.1D+00 ) + par = par / temp + + else + + if ( par == 0.0D+00 .or. ratio >= 0.75D+00 ) then + delta = pnorm / 0.5D+00 + par = 0.5D+00 * par + end if + + end if +! +! Test for successful iteration. +! + if ( ratio >= 0.0001D+00 ) then + x(1:n) = wa2(1:n) + wa2(1:n) = diag(1:n) * x(1:n) + fvec(1:m) = wa4(1:m) + xnorm = enorm ( n, wa2 ) + fnorm = fnorm1 + iter = iter + 1 + end if +! +! Tests for convergence, termination and stringent tolerances. +! + if ( abs ( actred ) <= ftol .and. prered <= ftol & + .and. 0.5D+00 * ratio <= 1.0D+00 ) then + info = 1 + end if + + if ( delta <= xtol * xnorm ) then + info = 2 + end if + + if ( abs ( actred ) <= ftol .and. prered <= ftol & + .and. 0.5D+00 * ratio <= 1.0D+00 .and. info == 2 ) then + info = 3 + end if + + if ( info /= 0 ) then + go to 340 + end if + + if ( nfev >= maxfev) then + info = 5 + end if + + if ( abs ( actred ) <= epsmch .and. prered <= epsmch & + .and. 0.5D+00 * ratio <= 1.0D+00 ) then + info = 6 + end if + + if ( delta <= epsmch * xnorm ) then + info = 7 + end if + + if ( gnorm <= epsmch ) then + info = 8 + end if + + if ( info /= 0 ) then + go to 340 + end if +! +! End of the inner loop. Repeat if iteration unsuccessful. +! + if ( ratio < 0.0001D+00 ) then + go to 240 + end if +! +! End of the outer loop. +! + go to 30 + + 340 continue +! +! Termination, either normal or user imposed. +! + if ( iflag < 0 ) then + info = iflag + end if + + iflag = 0 + + if ( 0 < nprint ) then + call fcn ( m, n, x, fvec, wa3, iflag ) + end if + + return +end +subroutine lmstr1 ( fcn, m, n, x, fvec, fjac, ldfjac, tol, info ) + +!*****************************************************************************80 +! +!! LMSTR1 minimizes M functions in N variables using Levenberg-Marquardt method. +! +! Discussion: +! +! LMSTR1 minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm +! which uses minimal storage. +! +! This is done by using the more general least-squares solver +! LMSTR. The user must provide a subroutine which calculates +! the functions and the rows of the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 19 August 2016 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the rows of the jacobian. +! FCN should have the form: +! subroutine fcn ( m, n, x, fvec, fjrow, iflag ) +! integer ( kind = 4 ) m +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjrow(n) +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If the input value of IFLAG is I > 1, calculate the (I-1)-st row of +! the jacobian at X, and return this vector in FJROW. +! To terminate the algorithm, set the output value of IFLAG negative. +! +! Input, integer ( kind = 4 ) M, the number of functions. +! +! Input, integer ( kind = 4 ) N, the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an N by N array. The upper +! triangle contains an upper triangular matrix R such that +! +! P' * ( JAC' * JAC ) * P = R' * R, +! +! where P is a permutation matrix and JAC is the final calculated +! jacobian. Column J of P is column IPVT(J) of the identity matrix. +! The lower triangular part of FJAC contains information generated +! during the computation of R. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least N. +! +! Input, real ( kind = 8 ) TOL. Termination occurs when the algorithm +! estimates either that the relative error in the sum of squares is at +! most TOL or that the relative error between X and the solution is at +! most TOL. TOL should be nonnegative. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, algorithm estimates that the relative error in the sum of squares +! is at most TOL. +! 2, algorithm estimates that the relative error between X and the +! solution is at most TOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, FVEC is orthogonal to the columns of the jacobian to machine precision. +! 5, number of calls to FCN with IFLAG = 1 has reached 100*(N+1). +! 6, TOL is too small. No further reduction in the sum of squares +! is possible. +! 7, TOL is too small. No further improvement in the approximate +! solution X is possible. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) diag(n) + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gtol + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) tol + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + if ( n <= 0 ) then + info = 0 + return + end if + + if ( m < n ) then + info = 0 + return + end if + + if ( ldfjac < n ) then + info = 0 + return + end if + + if ( tol < 0.0D+00 ) then + info = 0 + return + end if + + fvec(1:n) = 0.0D+00 + fjac(1:ldfjac,1:n) = 0.0D+00 + ftol = tol + xtol = tol + gtol = 0.0D+00 + maxfev = 100 * ( n + 1 ) + diag(1:n) = 0.0D+00 + mode = 1 + factor = 100.0D+00 + nprint = 0 + info = 0 + nfev = 0 + njev = 0 + ipvt(1:n) = 0 + qtf(1:n) = 0.0D+00 + + call lmstr ( fcn, m, n, x, fvec, fjac, ldfjac, ftol, xtol, gtol, maxfev, & + diag, mode, factor, nprint, info, nfev, njev, ipvt, qtf ) + + if ( info == 8 ) then + info = 4 + end if + + return +end +subroutine qform ( m, n, q, ldq ) + +!*****************************************************************************80 +! +!! QFORM produces the explicit QR factorization of a matrix. +! +! Discussion: +! +! The QR factorization of a matrix is usually accumulated in implicit +! form, that is, as a series of orthogonal transformations of the +! original matrix. This routine carries out those transformations, +! to explicitly exhibit the factorization constructed by QRFAC. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, is a positive integer input variable set +! to the number of rows of A and the order of Q. +! +! Input, integer ( kind = 4 ) N, is a positive integer input variable set +! to the number of columns of A. +! +! Input/output, real ( kind = 8 ) Q(LDQ,M). Q is an M by M array. +! On input the full lower trapezoid in the first min(M,N) columns of Q +! contains the factored form. +! On output, Q has been accumulated into a square matrix. +! +! Input, integer ( kind = 4 ) LDQ, is a positive integer input variable +! not less than M which specifies the leading dimension of the array Q. +! + implicit none + + integer ( kind = 4 ) ldq + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + integer ( kind = 4 ) j + integer ( kind = 4 ) k + integer ( kind = 4 ) l + integer ( kind = 4 ) minmn + real ( kind = 8 ) q(ldq,m) + real ( kind = 8 ) temp + real ( kind = 8 ) wa(m) + + minmn = min ( m, n ) + + do j = 2, minmn + q(1:j-1,j) = 0.0D+00 + end do +! +! Initialize remaining columns to those of the identity matrix. +! + q(1:m,n+1:m) = 0.0D+00 + + do j = n + 1, m + q(j,j) = 1.0D+00 + end do +! +! Accumulate Q from its factored form. +! + do l = 1, minmn + + k = minmn - l + 1 + + wa(k:m) = q(k:m,k) + + q(k:m,k) = 0.0D+00 + q(k,k) = 1.0D+00 + + if ( wa(k) /= 0.0D+00 ) then + + do j = k, m + temp = dot_product ( wa(k:m), q(k:m,j) ) / wa(k) + q(k:m,j) = q(k:m,j) - temp * wa(k:m) + end do + + end if + + end do + + return +end +subroutine qrfac ( m, n, a, lda, pivot, ipvt, lipvt, rdiag, acnorm ) + +!*****************************************************************************80 +! +!! QRFAC computes a QR factorization using Householder transformations. +! +! Discussion: +! +! This function uses Householder transformations with optional column +! pivoting to compute a QR factorization of the +! M by N matrix A. That is, QRFAC determines an orthogonal +! matrix Q, a permutation matrix P, and an upper trapezoidal +! matrix R with diagonal elements of nonincreasing magnitude, +! such that A*P = Q*R. +! +! The Householder transformation for column K, K = 1,2,...,min(M,N), +! is of the form +! +! I - ( 1 / U(K) ) * U * U' +! +! where U has zeros in the first K-1 positions. +! +! The form of this transformation and the method of pivoting first +! appeared in the corresponding LINPACK routine. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, the number of rows of A. +! +! Input, integer ( kind = 4 ) N, the number of columns of A. +! +! Input/output, real ( kind = 8 ) A(LDA,N), the M by N array. +! On input, A contains the matrix for which the QR factorization is to +! be computed. On output, the strict upper trapezoidal part of A contains +! the strict upper trapezoidal part of R, and the lower trapezoidal +! part of A contains a factored form of Q, the non-trivial elements of +! the U vectors described above. +! +! Input, integer ( kind = 4 ) LDA, the leading dimension of A, which must +! be no less than M. +! +! Input, logical PIVOT, is TRUE if column pivoting is to be carried out. +! +! Output, integer ( kind = 4 ) IPVT(LIPVT), defines the permutation matrix P +! such that A*P = Q*R. Column J of P is column IPVT(J) of the identity +! matrix. If PIVOT is false, IPVT is not referenced. +! +! Input, integer ( kind = 4 ) LIPVT, the dimension of IPVT, which should +! be N if pivoting is used. +! +! Output, real ( kind = 8 ) RDIAG(N), contains the diagonal elements of R. +! +! Output, real ( kind = 8 ) ACNORM(N), the norms of the corresponding +! columns of the input matrix A. If this information is not needed, +! then ACNORM can coincide with RDIAG. +! + implicit none + + integer ( kind = 4 ) lda + integer ( kind = 4 ) lipvt + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) a(lda,n) + real ( kind = 8 ) acnorm(n) + real ( kind = 8 ) ajnorm + real ( kind = 8 ) enorm + real ( kind = 8 ) epsmch + integer ( kind = 4 ) i + integer ( kind = 4 ) i4_temp + integer ( kind = 4 ) ipvt(lipvt) + integer ( kind = 4 ) j + integer ( kind = 4 ) k + integer ( kind = 4 ) kmax + integer ( kind = 4 ) minmn + logical pivot + real ( kind = 8 ) r8_temp(m) + real ( kind = 8 ) rdiag(n) + real ( kind = 8 ) temp + real ( kind = 8 ) wa(n) + + epsmch = epsilon ( epsmch ) +! +! Compute the initial column norms and initialize several arrays. +! + do j = 1, n + acnorm(j) = enorm ( m, a(1:m,j) ) + end do + + rdiag(1:n) = acnorm(1:n) + wa(1:n) = acnorm(1:n) + + if ( pivot ) then + do j = 1, n + ipvt(j) = j + end do + end if +! +! Reduce A to R with Householder transformations. +! + minmn = min ( m, n ) + + do j = 1, minmn +! +! Bring the column of largest norm into the pivot position. +! + if ( pivot ) then + + kmax = j + + do k = j, n + if ( rdiag(kmax) < rdiag(k) ) then + kmax = k + end if + end do + + if ( kmax /= j ) then + + r8_temp(1:m) = a(1:m,j) + a(1:m,j) = a(1:m,kmax) + a(1:m,kmax) = r8_temp(1:m) + + rdiag(kmax) = rdiag(j) + wa(kmax) = wa(j) + + i4_temp = ipvt(j) + ipvt(j) = ipvt(kmax) + ipvt(kmax) = i4_temp + + end if + + end if +! +! Compute the Householder transformation to reduce the +! J-th column of A to a multiple of the J-th unit vector. +! + ajnorm = enorm ( m-j+1, a(j,j) ) + + if ( ajnorm /= 0.0D+00 ) then + + if ( a(j,j) < 0.0D+00 ) then + ajnorm = -ajnorm + end if + + a(j:m,j) = a(j:m,j) / ajnorm + a(j,j) = a(j,j) + 1.0D+00 +! +! Apply the transformation to the remaining columns and update the norms. +! + do k = j + 1, n + + temp = dot_product ( a(j:m,j), a(j:m,k) ) / a(j,j) + + a(j:m,k) = a(j:m,k) - temp * a(j:m,j) + + if ( pivot .and. rdiag(k) /= 0.0D+00 ) then + + temp = a(j,k) / rdiag(k) + rdiag(k) = rdiag(k) * sqrt ( max ( 0.0D+00, 1.0D+00-temp ** 2 ) ) + + if ( 0.05D+00 * ( rdiag(k) / wa(k) ) ** 2 <= epsmch ) then + rdiag(k) = enorm ( m-j, a(j+1,k) ) + wa(k) = rdiag(k) + end if + + end if + + end do + + end if + + rdiag(j) = - ajnorm + + end do + + return +end +subroutine qrsolv ( n, r, ldr, ipvt, diag, qtb, x, sdiag ) + +!*****************************************************************************80 +! +!! QRSOLV solves a rectangular linear system A*x=b in the least squares sense. +! +! Discussion: +! +! Given an M by N matrix A, an N by N diagonal matrix D, +! and an M-vector B, the problem is to determine an X which +! solves the system +! +! A*X = B +! D*X = 0 +! +! in the least squares sense. +! +! This function completes the solution of the problem +! if it is provided with the necessary information from the +! QR factorization, with column pivoting, of A. That is, if +! Q*P = Q*R, where P is a permutation matrix, Q has orthogonal +! columns, and R is an upper triangular matrix with diagonal +! elements of nonincreasing magnitude, then QRSOLV expects +! the full upper triangle of R, the permutation matrix p, +! and the first N components of Q'*B. +! +! The system is then equivalent to +! +! R*Z = Q'*B +! P'*D*P*Z = 0 +! +! where X = P*Z. If this system does not have full rank, +! then a least squares solution is obtained. On output QRSOLV +! also provides an upper triangular matrix S such that +! +! P'*(A'*A + D*D)*P = S'*S. +! +! S is computed within QRSOLV and may be of separate interest. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, the order of R. +! +! Input/output, real ( kind = 8 ) R(LDR,N), the N by N matrix. +! On input the full upper triangle must contain the full upper triangle +! of the matrix R. On output the full upper triangle is unaltered, and +! the strict lower triangle contains the strict upper triangle +! (transposed) of the upper triangular matrix S. +! +! Input, integer ( kind = 4 ) LDR, the leading dimension of R, which must be +! at least N. +! +! Input, integer ( kind = 4 ) IPVT(N), defines the permutation matrix P such +! that A*P = Q*R. Column J of P is column IPVT(J) of the identity matrix. +! +! Input, real ( kind = 8 ) DIAG(N), the diagonal elements of the matrix D. +! +! Input, real ( kind = 8 ) QTB(N), the first N elements of the vector Q'*B. +! +! Output, real ( kind = 8 ) X(N), the least squares solution. +! +! Output, real ( kind = 8 ) SDIAG(N), the diagonal elements of the upper +! triangular matrix S. +! + implicit none + + integer ( kind = 4 ) ldr + integer ( kind = 4 ) n + + real ( kind = 8 ) c + real ( kind = 8 ) cotan + real ( kind = 8 ) diag(n) + integer ( kind = 4 ) i + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) j + integer ( kind = 4 ) k + integer ( kind = 4 ) l + integer ( kind = 4 ) nsing + real ( kind = 8 ) qtb(n) + real ( kind = 8 ) qtbpj + real ( kind = 8 ) r(ldr,n) + real ( kind = 8 ) s + real ( kind = 8 ) sdiag(n) + real ( kind = 8 ) sum2 + real ( kind = 8 ) t + real ( kind = 8 ) temp + real ( kind = 8 ) wa(n) + real ( kind = 8 ) x(n) +! +! Copy R and Q'*B to preserve input and initialize S. +! +! In particular, save the diagonal elements of R in X. +! + do j = 1, n + r(j:n,j) = r(j,j:n) + x(j) = r(j,j) + end do + + wa(1:n) = qtb(1:n) +! +! Eliminate the diagonal matrix D using a Givens rotation. +! + do j = 1, n +! +! Prepare the row of D to be eliminated, locating the +! diagonal element using P from the QR factorization. +! + l = ipvt(j) + + if ( diag(l) /= 0.0D+00 ) then + + sdiag(j:n) = 0.0D+00 + sdiag(j) = diag(l) +! +! The transformations to eliminate the row of D +! modify only a single element of Q'*B +! beyond the first N, which is initially zero. +! + qtbpj = 0.0D+00 + + do k = j, n +! +! Determine a Givens rotation which eliminates the +! appropriate element in the current row of D. +! + if ( sdiag(k) /= 0.0D+00 ) then + + if ( abs ( r(k,k) ) < abs ( sdiag(k) ) ) then + cotan = r(k,k) / sdiag(k) + s = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * cotan ** 2 ) + c = s * cotan + else + t = sdiag(k) / r(k,k) + c = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * t ** 2 ) + s = c * t + end if +! +! Compute the modified diagonal element of R and +! the modified element of (Q'*B,0). +! + r(k,k) = c * r(k,k) + s * sdiag(k) + temp = c * wa(k) + s * qtbpj + qtbpj = - s * wa(k) + c * qtbpj + wa(k) = temp +! +! Accumulate the tranformation in the row of S. +! + do i = k+1, n + temp = c * r(i,k) + s * sdiag(i) + sdiag(i) = - s * r(i,k) + c * sdiag(i) + r(i,k) = temp + end do + + end if + + end do + + end if +! +! Store the diagonal element of S and restore +! the corresponding diagonal element of R. +! + sdiag(j) = r(j,j) + r(j,j) = x(j) + + end do +! +! Solve the triangular system for Z. If the system is +! singular, then obtain a least squares solution. +! + nsing = n + + do j = 1, n + + if ( sdiag(j) == 0.0D+00 .and. nsing == n ) then + nsing = j - 1 + end if + + if ( nsing < n ) then + wa(j) = 0.0D+00 + end if + + end do + + do j = nsing, 1, -1 + sum2 = dot_product ( wa(j+1:nsing), r(j+1:nsing,j) ) + wa(j) = ( wa(j) - sum2 ) / sdiag(j) + end do +! +! Permute the components of Z back to components of X. +! + do j = 1, n + l = ipvt(j) + x(l) = wa(j) + end do + + return +end +subroutine r1mpyq ( m, n, a, lda, v, w ) + +!*****************************************************************************80 +! +!! R1MPYQ computes A*Q, where Q is the product of Householder transformations. +! +! Discussion: +! +! Given an M by N matrix A, this function computes A*Q where +! Q is the product of 2*(N - 1) transformations +! +! GV(N-1)*...*GV(1)*GW(1)*...*GW(N-1) +! +! and GV(I), GW(I) are Givens rotations in the (I,N) plane which +! eliminate elements in the I-th and N-th planes, respectively. +! Q itself is not given, rather the information to recover the +! GV, GW rotations is supplied. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, the number of rows of A. +! +! Input, integer ( kind = 4 ) N, the number of columns of A. +! +! Input/output, real ( kind = 8 ) A(LDA,N), the M by N array. +! On input, the matrix A to be postmultiplied by the orthogonal matrix Q. +! On output, the value of A*Q. +! +! Input, integer ( kind = 4 ) LDA, the leading dimension of A, which must not +! be less than M. +! +! Input, real ( kind = 8 ) V(N), W(N), contain the information necessary +! to recover the Givens rotations GV and GW. +! + implicit none + + integer ( kind = 4 ) lda + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) a(lda,n) + real ( kind = 8 ) c + integer ( kind = 4 ) i + integer ( kind = 4 ) j + real ( kind = 8 ) s + real ( kind = 8 ) temp + real ( kind = 8 ) v(n) + real ( kind = 8 ) w(n) +! +! Apply the first set of Givens rotations to A. +! + do j = n - 1, 1, -1 + + if ( 1.0D+00 < abs ( v(j) ) ) then + c = 1.0D+00 / v(j) + s = sqrt ( 1.0D+00 - c ** 2 ) + else + s = v(j) + c = sqrt ( 1.0D+00 - s ** 2 ) + end if + + do i = 1, m + temp = c * a(i,j) - s * a(i,n) + a(i,n) = s * a(i,j) + c * a(i,n) + a(i,j) = temp + end do + + end do +! +! Apply the second set of Givens rotations to A. +! + do j = 1, n - 1 + + if ( abs ( w(j) ) > 1.0D+00 ) then + c = 1.0D+00 / w(j) + s = sqrt ( 1.0D+00 - c ** 2 ) + else + s = w(j) + c = sqrt ( 1.0D+00 - s ** 2 ) + end if + + do i = 1, m + temp = c * a(i,j) + s * a(i,n) + a(i,n) = - s * a(i,j) + c * a(i,n) + a(i,j) = temp + end do + + end do + + return +end +subroutine r1updt ( m, n, s, ls, u, v, w, sing ) + +!*****************************************************************************80 +! +!! R1UPDT re-triangularizes a matrix after a rank one update. +! +! Discussion: +! +! Given an M by N lower trapezoidal matrix S, an M-vector U, and an +! N-vector V, the problem is to determine an orthogonal matrix Q such that +! +! (S + U * V' ) * Q +! +! is again lower trapezoidal. +! +! This function determines Q as the product of 2 * (N - 1) +! transformations +! +! GV(N-1)*...*GV(1)*GW(1)*...*GW(N-1) +! +! where GV(I), GW(I) are Givens rotations in the (I,N) plane +! which eliminate elements in the I-th and N-th planes, +! respectively. Q itself is not accumulated, rather the +! information to recover the GV and GW rotations is returned. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, the number of rows of S. +! +! Input, integer ( kind = 4 ) N, the number of columns of S. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) S(LS). On input, the lower trapezoidal +! matrix S stored by columns. On output S contains the lower trapezoidal +! matrix produced as described above. +! +! Input, integer ( kind = 4 ) LS, the length of the S array. LS must be at +! least (N*(2*M-N+1))/2. +! +! Input, real ( kind = 8 ) U(M), the U vector. +! +! Input/output, real ( kind = 8 ) V(N). On input, V must contain the +! vector V. On output V contains the information necessary to recover the +! Givens rotations GV described above. +! +! Output, real ( kind = 8 ) W(M), contains information necessary to +! recover the Givens rotations GW described above. +! +! Output, logical SING, is set to TRUE if any of the diagonal elements +! of the output S are zero. Otherwise SING is set FALSE. +! + implicit none + + integer ( kind = 4 ) ls + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) cos + real ( kind = 8 ) cotan + real ( kind = 8 ) giant + integer ( kind = 4 ) i + integer ( kind = 4 ) j + integer ( kind = 4 ) jj + integer ( kind = 4 ) l + real ( kind = 8 ) s(ls) + real ( kind = 8 ) sin + logical sing + real ( kind = 8 ) tan + real ( kind = 8 ) tau + real ( kind = 8 ) temp + real ( kind = 8 ) u(m) + real ( kind = 8 ) v(n) + real ( kind = 8 ) w(m) +! +! GIANT is the largest magnitude. +! + giant = huge ( giant ) +! +! Initialize the diagonal element pointer. +! + jj = ( n * ( 2 * m - n + 1 ) ) / 2 - ( m - n ) +! +! Move the nontrivial part of the last column of S into W. +! + l = jj + do i = n, m + w(i) = s(l) + l = l + 1 + end do +! +! Rotate the vector V into a multiple of the N-th unit vector +! in such a way that a spike is introduced into W. +! + do j = n - 1, 1, -1 + + jj = jj - ( m - j + 1 ) + w(j) = 0.0D+00 + + if ( v(j) /= 0.0D+00 ) then +! +! Determine a Givens rotation which eliminates the +! J-th element of V. +! + if ( abs ( v(n) ) < abs ( v(j) ) ) then + cotan = v(n) / v(j) + sin = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * cotan ** 2 ) + cos = sin * cotan + tau = 1.0D+00 + if ( abs ( cos ) * giant > 1.0D+00 ) then + tau = 1.0D+00 / cos + end if + else + tan = v(j) / v(n) + cos = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * tan ** 2 ) + sin = cos * tan + tau = sin + end if +! +! Apply the transformation to V and store the information +! necessary to recover the Givens rotation. +! + v(n) = sin * v(j) + cos * v(n) + v(j) = tau +! +! Apply the transformation to S and extend the spike in W. +! + l = jj + do i = j, m + temp = cos * s(l) - sin * w(i) + w(i) = sin * s(l) + cos * w(i) + s(l) = temp + l = l + 1 + end do + + end if + + end do +! +! Add the spike from the rank 1 update to W. +! + w(1:m) = w(1:m) + v(n) * u(1:m) +! +! Eliminate the spike. +! + sing = .false. + + do j = 1, n-1 + + if ( w(j) /= 0.0D+00 ) then +! +! Determine a Givens rotation which eliminates the +! J-th element of the spike. +! + if ( abs ( s(jj) ) < abs ( w(j) ) ) then + + cotan = s(jj) / w(j) + sin = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * cotan ** 2 ) + cos = sin * cotan + + if ( 1.0D+00 < abs ( cos ) * giant ) then + tau = 1.0D+00 / cos + else + tau = 1.0D+00 + end if + + else + + tan = w(j) / s(jj) + cos = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * tan ** 2 ) + sin = cos * tan + tau = sin + + end if +! +! Apply the transformation to S and reduce the spike in W. +! + l = jj + do i = j, m + temp = cos * s(l) + sin * w(i) + w(i) = - sin * s(l) + cos * w(i) + s(l) = temp + l = l + 1 + end do +! +! Store the information necessary to recover the Givens rotation. +! + w(j) = tau + + end if +! +! Test for zero diagonal elements in the output S. +! + if ( s(jj) == 0.0D+00 ) then + sing = .true. + end if + + jj = jj + ( m - j + 1 ) + + end do +! +! Move W back into the last column of the output S. +! + l = jj + do i = n, m + s(l) = w(i) + l = l + 1 + end do + + if ( s(jj) == 0.0D+00 ) then + sing = .true. + end if + + return +end +function r8_uniform_01 ( seed ) + +!*****************************************************************************80 +! +!! R8_UNIFORM_01 returns a unit pseudorandom R8. +! +! Discussion: +! +! An R8 is a real ( kind = 8 ) value. +! +! For now, the input quantity SEED is an integer variable. +! +! This routine implements the recursion +! +! seed = 16807 * seed mod ( 2^31 - 1 ) +! r8_uniform_01 = seed / ( 2^31 - 1 ) +! +! The integer arithmetic never requires more than 32 bits, +! including a sign bit. +! +! If the initial seed is 12345, then the first three computations are +! +! Input Output R8_UNIFORM_01 +! SEED SEED +! +! 12345 207482415 0.096616 +! 207482415 1790989824 0.833995 +! 1790989824 2035175616 0.947702 +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 05 July 2006 +! +! Author: +! +! John Burkardt +! +! Reference: +! +! Paul Bratley, Bennett Fox, Linus Schrage, +! A Guide to Simulation, +! Springer Verlag, pages 201-202, 1983. +! +! Pierre L'Ecuyer, +! Random Number Generation, +! in Handbook of Simulation, +! edited by Jerry Banks, +! Wiley Interscience, page 95, 1998. +! +! Bennett Fox, +! Algorithm 647: +! Implementation and Relative Efficiency of Quasirandom +! Sequence Generators, +! ACM Transactions on Mathematical Software, +! Volume 12, Number 4, pages 362-376, 1986. +! +! Peter Lewis, Allen Goodman, James Miller +! A Pseudo-Random Number Generator for the System/360, +! IBM Systems Journal, +! Volume 8, pages 136-143, 1969. +! +! Parameters: +! +! Input/output, integer ( kind = 4 ) SEED, the "seed" value, which should +! NOT be 0. On output, SEED has been updated. +! +! Output, real ( kind = 8 ) R8_UNIFORM_01, a new pseudorandom variate, +! strictly between 0 and 1. +! + implicit none + + integer ( kind = 4 ), parameter :: i4_huge = 2147483647 + integer ( kind = 4 ) k + real ( kind = 8 ) r8_uniform_01 + integer ( kind = 4 ) seed + + if ( seed == 0 ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'R8_UNIFORM_01 - Fatal error!' + write ( *, '(a)' ) ' Input value of SEED = 0.' + stop 1 + end if + + k = seed / 127773 + + seed = 16807 * ( seed - k * 127773 ) - k * 2836 + + if ( seed < 0 ) then + seed = seed + i4_huge + end if + + r8_uniform_01 = real ( seed, kind = 8 ) * 4.656612875D-10 + + return +end +subroutine r8mat_print ( m, n, a, title ) + +!*****************************************************************************80 +! +!! R8MAT_PRINT prints an R8MAT. +! +! Discussion: +! +! An R8MAT is an MxN array of R8's, stored by (I,J) -> [I+J*M]. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 12 September 2004 +! +! Author: +! +! John Burkardt +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, the number of rows in A. +! +! Input, integer ( kind = 4 ) N, the number of columns in A. +! +! Input, real ( kind = 8 ) A(M,N), the matrix. +! +! Input, character ( len = * ) TITLE, a title. +! + implicit none + + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) a(m,n) + character ( len = * ) title + + call r8mat_print_some ( m, n, a, 1, 1, m, n, title ) + + return +end +subroutine r8mat_print_some ( m, n, a, ilo, jlo, ihi, jhi, title ) + +!*****************************************************************************80 +! +!! R8MAT_PRINT_SOME prints some of an R8MAT. +! +! Discussion: +! +! An R8MAT is an MxN array of R8's, stored by (I,J) -> [I+J*M]. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 10 September 2009 +! +! Author: +! +! John Burkardt +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, N, the number of rows and columns. +! +! Input, real ( kind = 8 ) A(M,N), an M by N matrix to be printed. +! +! Input, integer ( kind = 4 ) ILO, JLO, the first row and column to print. +! +! Input, integer ( kind = 4 ) IHI, JHI, the last row and column to print. +! +! Input, character ( len = * ) TITLE, a title. +! + implicit none + + integer ( kind = 4 ), parameter :: incx = 5 + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) a(m,n) + character ( len = 14 ) ctemp(incx) + integer ( kind = 4 ) i + integer ( kind = 4 ) i2hi + integer ( kind = 4 ) i2lo + integer ( kind = 4 ) ihi + integer ( kind = 4 ) ilo + integer ( kind = 4 ) inc + integer ( kind = 4 ) j + integer ( kind = 4 ) j2 + integer ( kind = 4 ) j2hi + integer ( kind = 4 ) j2lo + integer ( kind = 4 ) jhi + integer ( kind = 4 ) jlo + character ( len = * ) title + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) trim ( title ) + + if ( m <= 0 .or. n <= 0 ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) ' (None)' + return + end if + + do j2lo = max ( jlo, 1 ), min ( jhi, n ), incx + + j2hi = j2lo + incx - 1 + j2hi = min ( j2hi, n ) + j2hi = min ( j2hi, jhi ) + + inc = j2hi + 1 - j2lo + + write ( *, '(a)' ) ' ' + + do j = j2lo, j2hi + j2 = j + 1 - j2lo + write ( ctemp(j2), '(i8,6x)' ) j + end do + + write ( *, '('' Col '',5a14)' ) ctemp(1:inc) + write ( *, '(a)' ) ' Row' + write ( *, '(a)' ) ' ' + + i2lo = max ( ilo, 1 ) + i2hi = min ( ihi, m ) + + do i = i2lo, i2hi + + do j2 = 1, inc + + j = j2lo - 1 + j2 + + if ( a(i,j) == real ( int ( a(i,j) ), kind = 8 ) ) then + write ( ctemp(j2), '(f8.0,6x)' ) a(i,j) + else + write ( ctemp(j2), '(g14.6)' ) a(i,j) + end if + + end do + + write ( *, '(i5,a,5a14)' ) i, ':', ( ctemp(j), j = 1, inc ) + + end do + + end do + + return +end +subroutine r8vec_print ( n, a, title ) + +!*****************************************************************************80 +! +!! R8VEC_PRINT prints an R8VEC. +! +! Discussion: +! +! An R8VEC is a vector of R8's. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 22 August 2000 +! +! Author: +! +! John Burkardt +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, the number of components of the vector. +! +! Input, real ( kind = 8 ) A(N), the vector to be printed. +! +! Input, character ( len = * ) TITLE, a title. +! + implicit none + + integer ( kind = 4 ) n + + real ( kind = 8 ) a(n) + integer ( kind = 4 ) i + character ( len = * ) title + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) trim ( title ) + write ( *, '(a)' ) ' ' + do i = 1, n + write ( *, '(2x,i8,2x,g16.8)' ) i, a(i) + end do + + return +end +subroutine rwupdt ( n, r, ldr, w, b, alpha, c, s ) + +!*****************************************************************************80 +! +!! RWUPDT computes the decomposition of triangular matrix augmented by one row. +! +! Discussion: +! +! Given an N by N upper triangular matrix R, this function +! computes the QR decomposition of the matrix formed when a row +! is added to R. If the row is specified by the vector W, then +! RWUPDT determines an orthogonal matrix Q such that when the +! N+1 by N matrix composed of R augmented by W is premultiplied +! by Q', the resulting matrix is upper trapezoidal. +! The matrix Q' is the product of N transformations +! +! G(N)*G(N-1)* ... *G(1) +! +! where G(I) is a Givens rotation in the (I,N+1) plane which eliminates +! elements in the (N+1)-st plane. RWUPDT also computes the product +! Q'*C where C is the (N+1)-vector (B,ALPHA). +! +! Q itself is not accumulated, rather the information to recover the G +! rotations is supplied. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, the order of R. +! +! Input/output, real ( kind = 8 ) R(LDR,N). On input the upper triangular +! part of R must contain the matrix to be updated. On output R contains the +! updated triangular matrix. +! +! Input, integer ( kind = 4 ) LDR, the leading dimension of the array R. +! LDR must not be less than N. +! +! Input, real ( kind = 8 ) W(N), the row vector to be added to R. +! +! Input/output, real ( kind = 8 ) B(N). On input, the first N elements +! of the vector C. On output the first N elements of the vector Q'*C. +! +! Input/output, real ( kind = 8 ) ALPHA. On input, the (N+1)-st element +! of the vector C. On output the (N+1)-st element of the vector Q'*C. +! +! Output, real ( kind = 8 ) C(N), S(N), the cosines and sines of the +! transforming Givens rotations. +! + implicit none + + integer ( kind = 4 ) ldr + integer ( kind = 4 ) n + + real ( kind = 8 ) alpha + real ( kind = 8 ) b(n) + real ( kind = 8 ) c(n) + real ( kind = 8 ) cotan + integer ( kind = 4 ) i + integer ( kind = 4 ) j + real ( kind = 8 ) r(ldr,n) + real ( kind = 8 ) rowj + real ( kind = 8 ) s(n) + real ( kind = 8 ) tan + real ( kind = 8 ) temp + real ( kind = 8 ) w(n) + + do j = 1, n + + rowj = w(j) +! +! Apply the previous transformations to R(I,J), I=1,2,...,J-1, and to W(J). +! + do i = 1, j - 1 + temp = c(i) * r(i,j) + s(i) * rowj + rowj = - s(i) * r(i,j) + c(i) * rowj + r(i,j) = temp + end do +! +! Determine a Givens rotation which eliminates W(J). +! + c(j) = 1.0D+00 + s(j) = 0.0D+00 + + if ( rowj /= 0.0D+00 ) then + + if ( abs ( r(j,j) ) < abs ( rowj ) ) then + cotan = r(j,j) / rowj + s(j) = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * cotan ** 2 ) + c(j) = s(j) * cotan + else + tan = rowj / r(j,j) + c(j) = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * tan ** 2 ) + s(j) = c(j) * tan + end if +! +! Apply the current transformation to R(J,J), B(J), and ALPHA. +! + r(j,j) = c(j) * r(j,j) + s(j) * rowj + temp = c(j) * b(j) + s(j) * alpha + alpha = - s(j) * b(j) + c(j) * alpha + b(j) = temp + + end if + + end do + + return +end +subroutine timestamp ( ) + +!*****************************************************************************80 +! +!! TIMESTAMP prints the current YMDHMS date as a time stamp. +! +! Example: +! +! 31 May 2001 9:45:54.872 AM +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 18 May 2013 +! +! Author: +! +! John Burkardt +! +! Parameters: +! +! None +! + implicit none + + character ( len = 8 ) ampm + integer ( kind = 4 ) d + integer ( kind = 4 ) h + integer ( kind = 4 ) m + integer ( kind = 4 ) mm + character ( len = 9 ), parameter, dimension(12) :: month = (/ & + 'January ', 'February ', 'March ', 'April ', & + 'May ', 'June ', 'July ', 'August ', & + 'September', 'October ', 'November ', 'December ' /) + integer ( kind = 4 ) n + integer ( kind = 4 ) s + integer ( kind = 4 ) values(8) + integer ( kind = 4 ) y + + call date_and_time ( values = values ) + + y = values(1) + m = values(2) + d = values(3) + h = values(5) + n = values(6) + s = values(7) + mm = values(8) + + if ( h < 12 ) then + ampm = 'AM' + else if ( h == 12 ) then + if ( n == 0 .and. s == 0 ) then + ampm = 'Noon' + else + ampm = 'PM' + end if + else + h = h - 12 + if ( h < 12 ) then + ampm = 'PM' + else if ( h == 12 ) then + if ( n == 0 .and. s == 0 ) then + ampm = 'Midnight' + else + ampm = 'AM' + end if + end if + end if + + write ( *, '(i2.2,1x,a,1x,i4,2x,i2,a1,i2.2,a1,i2.2,a1,i3.3,1x,a)' ) & + d, trim ( month(m) ), y, h, ':', n, ':', s, '.', mm, trim ( ampm ) + + return +end \ No newline at end of file diff --git a/python/05_tokamak/SplittedSympGPR/plotting.py b/python/05_tokamak/SplittedSympGPR/plotting.py new file mode 100644 index 0000000..9839439 --- /dev/null +++ b/python/05_tokamak/SplittedSympGPR/plotting.py @@ -0,0 +1,63 @@ +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" +from numpy import cos, sin, empty_like, linspace, roll +from matplotlib.pyplot import figure, plot, xlabel, ylabel, subplot, grid, legend + +def plot_orbit(z): + figure() + plot(z[0,:]*cos(z[1,:]), z[0,:]*sin(z[1,:]),',') + xlabel(r'$R-R_0$') + ylabel(r'$Z$') + + +def plot_cost_function(F, zlast, zold, pthold): + """Plot the cost function""" + r = linspace(-0.2,0.2,100) + fres = empty_like(r) + for kr in range(len(r)): + fres[kr] = F([r[kr]], zold[1:], pthold) + + figure() + subplot(2,1,1) + plot(r,fres) + out_conv = F([zlast[0]], zold[1:], pthold) + plot(zlast[0], out_conv, 'rx') + xlabel('r') + ylabel('F') + grid() + +def plot_cost_function_jac(F, zlast, zold, pthold): + """Plot the cost function with Jacobian""" + + r = linspace(-0.2,0.2,100) + fres = empty_like(r) + jac = empty_like(r) + for kr in range(len(r)): + out = F([r[kr]], zold[1:], pthold) + fres[kr] = out[0] + jac[kr] = out[1][0] + + jacnum = (roll(fres,-1)-roll(fres,1))/(2.*(r[1]-r[0])) + jacnum[0] = jacnum[1] + jacnum[-1] = jacnum[-2] + + figure() + subplot(2,1,1) + plot(r,fres) + out_conv = F([zlast[0]],zold[1:],pthold) + plot(zlast[0],out_conv[0],'rx') + xlabel('r') + ylabel('F') + grid() + + subplot(2,1,2) + plot(r,jac) + plot(r,jacnum,'r--') + plot(zlast[0],out_conv[1],'rx') + xlabel('r') + ylabel('dF/dr') + legend(['analytical', 'numerical']) + grid() diff --git a/python/05_tokamak/SympGPR/Makefile b/python/05_tokamak/SympGPR/Makefile new file mode 100644 index 0000000..4700534 --- /dev/null +++ b/python/05_tokamak/SympGPR/Makefile @@ -0,0 +1,3 @@ +include *.mk $(bar) +clean: + rm -f *.x *.so *.o *.mod *.pyf *.pyd *.dylib *.dll \ No newline at end of file diff --git a/python/05_tokamak/SympGPR/calc_fieldlines.py b/python/05_tokamak/SympGPR/calc_fieldlines.py new file mode 100644 index 0000000..2df875a --- /dev/null +++ b/python/05_tokamak/SympGPR/calc_fieldlines.py @@ -0,0 +1,128 @@ +from numpy import zeros, array, arange, append, hstack, pi, cos, sin, mod, linspace, concatenate,vstack +import numpy as np +from scipy.optimize import fsolve, root +from common import f, qe, c +import common +import tkinter +import matplotlib +# matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +from fieldlines import fieldlines +import ghalton +from profit.util.halton import halton +from random import randint +import random +# +# Attention: Variables are [pth, th, ph], so momentum in first component and +# "time" in third. Change to usual Z = z[:,1::-1] before using! +# +N = 78 +nm = 2000 +nturn = 2 # Number of full turns +nph = 32 # Number of steps per turn + +nphmap = 1 +eps = 0.001 +seq = ghalton.Halton(3) +X0 = seq.get(N)*array([0.26, 2*pi, 0])+array([0.1, 0, 0])# +yint = zeros([nph*nturn + 1, 3, N]) +q = zeros([N]) +p = zeros([N]) +Q = zeros([N]) +P = zeros([N]) + + +for ipart in range(0, N): + + r0 = X0[ipart, 0] + th0 = X0[ipart, 1] + ph0 = X0[ipart, 2] + fieldlines.init(nph=nph, am=-3, an=2, aeps=eps, aphase=0.0, arlast=r0) + pth0 = qe/c*fieldlines.ath(r0, th0, ph0) + + #%% + + z = zeros([nph*nturn + 1, 3]) + z[0,:] = [pth0, th0, 0.0] + + from time import time + tic = time() + + for kph in arange(nph*nturn): + z[kph+1, :] = z[kph, :] + fieldlines.timestep(z[kph+1, :]) + yint[:,:,ipart] = z + +#%% + th = z[::nph,1] + r = array([fieldlines.compute_r(zk, 0.3) for zk in z[::nph,:]]).flatten() + +#%% + +print('Integration of training data finished') + +q = yint[0,1] +p = yint[0,0]*1e2 +Q = yint[int(nph/nphmap),1] +P = yint[int(nph/nphmap),0]*1e2 +zqtrain = Q - q +zptrain = p - P + +#%% + +xtrain = q.flatten() +ytrain = P.flatten() +xtrain = hstack((q, P)).T +ztrain1 = zptrain.flatten() +ztrain2 = zqtrain.flatten() +ztrain = concatenate((ztrain1, ztrain2)) + +# set test data and calculate reference orbits +Ntest = 30 +nturntest = nm + +qminmap = 0.15 +qmaxmap = 0.25 +pminmap = 0 +pmaxmap = 2*np.pi + +qminplt = 0.15 +qmaxplt = 0.31 + +random.seed(1) +q0map = linspace(qminmap,qmaxmap,Ntest) +p0map = linspace(pminmap,pmaxmap,Ntest) +q0map = random.sample(list(q0map), Ntest) +p0map = random.sample(list(p0map), Ntest) +Q0map = np.array(q0map) +P0map = np.array(p0map) +X0test = np.stack((Q0map, P0map, np.zeros(Ntest))).T + +random.seed(1) +q0map = linspace(qminplt,qmaxplt,Ntest) +p0map = linspace(pminmap,pmaxmap,Ntest) +q0map = random.sample(list(q0map), Ntest) +p0map = random.sample(list(p0map), Ntest) +Q0map = np.array(q0map) +P0map = np.array(p0map) +X0testplt = np.stack((Q0map, P0map, np.zeros(Ntest))).T + +yinttest = zeros([nph*nturntest + 1, 3, Ntest]) +# plt.figure() +for ipart in range(0, Ntest): + fieldlines.init(nph=nph, am=-3, an=2, aeps=eps, aphase=0.0, arlast=X0test[ipart,0]) + X0test[ipart, 0] = qe/c*fieldlines.ath(X0test[ipart,0], X0test[ipart,1], X0test[ipart,2]) + + fieldlines.init(nph=nph, am=-3, an=2, aeps=eps, aphase=0.0, arlast=X0testplt[ipart,0]) + X0testplt[ipart, 0] = qe/c*fieldlines.ath(X0testplt[ipart,0], X0testplt[ipart,1], X0testplt[ipart,2]) + temp = zeros([nph*nturntest + 1, 3]) + temp[0,:] = [X0testplt[ipart,0], X0testplt[ipart,1], 0.0] + + + for kph in arange(nph*nturntest): + temp[kph+1, :] = temp[kph, :] + fieldlines.timestep(temp[kph+1, :]) + yinttest[:,:,ipart] = temp + # plt.plot( mod(temp[::nph,1], 2*pi), temp[::nph,0]*1e2, ',') + +#plt.show(block = True) \ No newline at end of file diff --git a/python/05_tokamak/SympGPR/common.py b/python/05_tokamak/SympGPR/common.py new file mode 100644 index 0000000..b9947a3 --- /dev/null +++ b/python/05_tokamak/SympGPR/common.py @@ -0,0 +1,153 @@ +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" + +from numpy import empty, nan, zeros, ones, sqrt, mod, array +from field_test import field +#%% + +taub = 7800 # estimated bounce time + +def timesteps(steps_per_bounce, nbounce): + """ compute step size and number of timesteps """ + return taub/steps_per_bounce, nbounce*steps_per_bounce + +f = field() + +qe = 1.0; m = 1.0; c = 1.0 # particle charge, mass and speed of light +mu = 1e-5 # magnetic moment + +# Initial conditions +r0 = 0.1 +th0 = 1.5 +ph0 = 0.0 +vpar0 = 0.0 + +# Compute toroidal momentum from initial conditions +f.evaluate(r0, th0, ph0) +pph0 = m*vpar0*f.hph + qe/c*f.Aph + +# Buffer to avoid double evaluation within black-box root finding routines +bufsize = 16 +kbuf = 0 +zbuf = empty((4,bufsize)); zbuf[:] = nan +neval = 0 +buffer = empty(bufsize,dtype=object) +fbuf = empty(bufsize,dtype=object) + +dHdvpar = lambda vpar: m*vpar +pth = lambda vpar,hth,Ath: m*vpar*hth + qe*Ath/c +pph = lambda vpar,hph,Aph: m*vpar*hph + qe*Aph/c + +def get_val(z): + """ evaluates H, pth, vpar at z """ + f.evaluate(z[0], z[1], z[2]) + vpar = 1.0/f.hph*(z[3]-qe/c*f.Aph) + H = m*vpar**2/2 + mu*f.B + qe*f.Phie + pth = m*f.hth*vpar + qe/c*f.Ath + ret = [H, pth, vpar] + return ret + +def get_der(z): + """ evaluates H, pth, vpar and first derivatives at z """ + global neval, kbuf + + # re-evaluation at same point + if(bufsize>0): + for kb in range(bufsize): + if all(z==zbuf[:,kb]): + f.__dict__ = fbuf[kb].__dict__.copy() + return buffer[kb] + + neval = neval+1 + + [H,pth,vpar] = get_val(z) + + dvpardx = -(qe/(m*c)*f.dAph + vpar*f.dhph)/f.hph + dvpardpph = 1.0/(m*f.hph) + + dHdx = m*vpar*dvpardx + mu*f.dB + qe*f.dPhie + dHdpph = m*vpar/f.hph + + dpthdx = m*dvpardx*f.hth + m*vpar*f.dhth + qe/c*f.dAth + dpthdpph = f.hth/f.hph + + ret = [H, pth, vpar, dHdx, dHdpph, dpthdx, dpthdpph, dvpardx, dvpardpph] + + # fill buffer + if(bufsize>0): + zbuf[:,kbuf] = z[:] + buffer[kbuf] = ret + fbuf[kbuf] = field() + fbuf[kbuf].__dict__ = f.__dict__.copy() + kbuf = mod(kbuf+1,bufsize) + + return ret + +def get_der2(z): + """ evaluates H, pth, vpar with first and second derivatives at z """ + [H, pth, vpar, + dHdx, dHdpph, dpthdx, dpthdpph, dvpardx, dvpardpph] = get_der(z) + + d2pthdx2 = zeros(6) # d2dr2, d2dth2, d2dph2, d2drdth, d2drdph, d2dthdph + d2pthdpphdz = zeros(4) # d2dpphdr, d2dpphdth, d2dpphdph, d2dpph2 + d2Hdx2 = zeros(6) + d2Hdpphdz = zeros(4) + d2vpardx2 = zeros(6) + d2vpardpphdz = zeros(4) + + d2vpardx2[:3] = -(qe/(m*c)*f.d2Aph[:3] + f.d2hph[:3]*vpar + + 2*f.dhph*dvpardx)/f.hph + d2vpardx2[3] = -(qe/(m*c)*f.d2Aph[3] + f.d2hph[3]*vpar + + f.dhph[0]*dvpardx[1] + f.dhph[1]*dvpardx[0])/f.hph + d2vpardpphdz[:3] = -1.0/(m*f.hph**2)*f.d2hph[:3] + + d2pthdx2[:3] = m*(d2vpardx2[:3]*f.hth + 2*dvpardx*f.dhth + vpar*f.d2hth[:3] + + qe/(m*c)*f.d2Ath[:3]) + + d2pthdx2[3] = m*(d2vpardx2[3]*f.hth + dvpardx[0]*f.dhth[1] + + dvpardx[1]*f.dhth[0] + vpar*f.d2hth[3] + qe/(m*c)*f.d2Ath[3]) + d2pthdpphdz[:3] = f.dhth/f.hph - f.hth/f.hph**2*f.dhph + + d2Hdx2[:3] = m*(dvpardx[:3]**2 + vpar*d2vpardx2[:3]) \ + + mu*f.d2B[:3] + qe*f.d2Phie[:3] + d2Hdx2[3] = m*(dvpardx[0]*dvpardx[1] + vpar*d2vpardx2[3]) \ + + mu*f.d2B[3] + qe*f.d2Phie[3] + d2Hdpphdz[:3] = m*(1.0/f.hph*dvpardx - vpar/f.hph**2*f.dhph) + + return [H, pth, vpar, + dHdx, dHdpph, dpthdx, dpthdpph, dvpardx, dvpardpph, + d2pthdx2, d2pthdpphdz, d2Hdx2, d2Hdpphdz, d2vpardx2, d2vpardpphdz] + +class NewtonResult(): + x = array([]) + xold = array([]) + +def newton1(f, x0, rtol, atol, args): + ret = NewtonResult() + x = array([x0]) + xold = array([x0*(1e0+1e30*rtol)]) + fval = atol*array([1e30,1e30]) + while(abs(fval[0]) > atol and abs(x-xold)/(abs(x)*(1e0+1e-30)) > rtol): + fval = f(x, *args) + xold = x + x = x - fval[0]/fval[1] + ret.xold = xold + ret.x = x + return ret + +def newton(f, x0, rtol, atol, args): + ret = NewtonResult() + x = x0 + xold = x0*(1e0+1e30*rtol) + fval = atol*array([1e30*ones(x0),1e30*ones(x0)]) + while(abs(fval[0]) > atol and + sqrt((x-xold).dot(x-xold))/(sqrt(x.dot(x))*(1e0+1e-30)) > rtol): + fval = f(x, *args) + xold = x + x = x - fval[0]/fval[1] + ret.xold = xold + ret.x = x + return ret diff --git a/python/05_tokamak/SympGPR/field_test.py b/python/05_tokamak/SympGPR/field_test.py new file mode 100644 index 0000000..2e17fa5 --- /dev/null +++ b/python/05_tokamak/SympGPR/field_test.py @@ -0,0 +1,47 @@ +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" +from numpy import sin, cos, zeros_like, array + +B0 = 1.0 # magnetic field modulus normalization +iota0 = 1.0 # constant part of rotational transform +a = 0.5 # (equivalent) minor radius +R0 = 1.0 # (equivalent) major radius + +class field: + """ Model tokamak field with B ~ B0*(1-r/R0*cos(th)) """ + def evaluate(self, r, th, ph): + cth = cos(th) + sth = sin(th) + zer = zeros_like(r) + + self.Ath = B0*(r**2/2.0 - r**3/(3.0*R0)*cth) + self.dAth = array((B0*(r - r**2/R0*cth), B0*r**3*sth/(3.0*R0), zer)) + self.d2Ath = array((B0*(1.0 - 2.0*r/R0*cth), + B0*r**3*cth/(3.0*R0), + zer, + B0*r**2/R0*sth, + zer, + zer)) + + self.Aph = -B0*iota0*(r**2/2.0-r**4/(4.0*a**2)) + self.dAph = array((-B0*iota0*(r-r**3/a**2), zer, zer)) + self.d2Aph = array((-B0*iota0*(1.0-3.0*r**2/a**2), zer, zer, zer, zer, zer)) + + self.hth = iota0*(1.0-r**2/a**2)*r**2/R0 + self.dhth = array(((2.0*iota0*r*(a**2-2.0*r**2))/(a**2*R0), zer, zer)) + self.d2hth = array(((2.0*iota0*(a**2-6.0*r**2))/(a**2*R0), zer, zer, zer, zer, zer)) + + self.hph = R0 + r*cth + self.dhph = array((cth, -r*sth, zer)) + self.d2hph = array((zer, -r*cth, zer, -sth, zer) ) + + self.B = B0*(1.0 - r/R0*cth) + self.dB = array((-B0/R0*cth, B0*r/R0*sth, zer)) + self.d2B = array((zer, B0*r/R0*cth, zer, B0/R0*sth, zer, zer)) + + self.Phie = zer + self.dPhie = array((zer, zer, zer)) + self.d2Phie = array((zer, zer, zer, zer, zer, zer)) diff --git a/python/05_tokamak/SympGPR/fieldlines.f90 b/python/05_tokamak/SympGPR/fieldlines.f90 new file mode 100644 index 0000000..7f6e388 --- /dev/null +++ b/python/05_tokamak/SympGPR/fieldlines.f90 @@ -0,0 +1,174 @@ +module fieldlines +implicit none +save + +real(8), parameter :: pi = 4d0*atan(1d0) + +real(8), parameter :: B0 = 1d0 +real(8), parameter :: iota0 = 1d0 ! constant part of rotational transform +real(8), parameter :: a = 0.5d0 ! (equivalent) minor radius +real(8), parameter :: R0 = 1d0 ! (equivalent) major radius + +real(8) :: dph ! Timestep +real(8) :: eps ! Absolute perturbation +real(8) :: phase ! Perturbation phase +integer :: m, n ! Perturbation harmonics + +real(8) :: rlast = 0d0 + +contains + +subroutine init(nph, am, an, aeps, aphase, arlast) + integer :: nph, am, an + real(8) :: aeps, aphase, arlast + + dph = 2*pi/nph + m = am + n = an + eps = aeps + phase = aphase + rlast = arlast +end subroutine init + + +function Ath(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: Ath + + Ath = B0*(r**2/2d0 - r**3/(3d0*R0)*cos(th)) +end function Ath + + +function dAthdr(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: dAthdr + + dAthdr = B0*(r - r**2/R0*cos(th)) +end function dAthdr + + +function dAthdth(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: dAthdth + + dAthdth = B0*r**3*sin(th)/(3d0*R0) +end function dAthdth + + +function Aph(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: Aph + + Aph = -B0*iota0*(r**2/2d0-r**4/(4d0*a**2))*(1d0+eps*cos(m*th+n*ph+phase)) +end function Aph + + +function dAphdr(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: dAphdr + + dAphdr = -B0*iota0*(r-r**3/a**2)*(1d0+eps*cos(m*th+n*ph+phase)) +end function dAphdr + + +function dAphdth(r, th, ph) + real(8), intent(in) :: r, th, ph + real(8) :: dAphdth + + dAphdth = B0*iota0*(r**2/2d0-r**4/(4d0*a**2))*m*eps*sin(m*th + n*ph + phase) +end function dAphdth + + +subroutine f_r(x, y, dy, args) + ! Implicit function to solve for r + real(8), intent(in) :: x ! r + real(8), intent(in) :: args(3) ! pth, th, ph + real(8), intent(out) :: y ! Target function + real(8), intent(out) :: dy ! Jacobian + ! Compute r implicitly for given th, p_th and ph + y = args(1) - Ath(x, args(2), args(3)) ! pth - pth(r, th, ph) + dy = -dAthdr(x, args(2), args(3)) +end subroutine f_r + + +function compute_r(z, rstart) result(r) + real(8), intent(in) :: z(3) + real(8), intent(in) :: rstart + real(8) :: r + + integer :: k + real(8) :: y, dy + + r = rstart + do k = 1, 20 ! Newton iterations + call f_r(r, y, dy, z) + r = r - y/dy + end do +end function compute_r + + +subroutine F_tstep(znew, y, dy, zold) + real(8), intent(in) :: znew(2) + real(8), intent(in) :: zold(3) + real(8), intent(out) :: y(2) ! Target function + real(8), intent(out) :: dy(2,2) ! Jacobian + + real(8) :: z(3), r + real(8) :: dApdr, dApdt, dAtdr, dAtdt + + z(1:2) = 0.5d0*(zold(1:2) + znew) + z(3) = zold(3) + 0.5d0*dph + r = compute_r(z, rlast) + rlast = r + + dApdr = dAphdr(r, z(2), z(3)) + dApdt = dAphdth(r, z(2), z(3)) + dAtdr = dAthdr(r, z(2), z(3)) + dAtdt = dAthdth(r, z(2), z(3)) + + y(1) = zold(1) - znew(1) + dph*(dApdt - dApdr*dAtdt/dAtdr) + y(2) = zold(2) - znew(2) - dph*dApdr/dAtdr + + ! print *, y + + ! TODO: look in SIMPLE + dy(1,1) = 0d0 + dy(2,1) = 0d0 + dy(1,2) = 0d0 + dy(2,2) = 0d0 + +end subroutine F_tstep + + +subroutine timestep(z) + real(8), intent(inout) :: z(3) + + integer :: info + real(8) :: zold(3) + real(8) :: z12new(2) + real(8) :: fvec(3) + + zold = z + z12new = z(1:2) + + call hybrd1(f_tstep_wrap, 2, z12new, fvec, 1d-13, info) +! print *, info + + z(1:2) = z12new + z(3) = zold(3) + dph + + contains + + subroutine f_tstep_wrap(n, x, fvec, iflag) + integer, intent(in) :: n + double precision, intent(in) :: x(n) + double precision, intent(out) :: fvec(n) + integer, intent(in) :: iflag + + real(8) :: dummy(2,2) ! for later Jacobian + + call F_tstep(x, fvec, dummy, zold) + end subroutine f_tstep_wrap +end subroutine timestep + +end module fieldlines diff --git a/python/05_tokamak/SympGPR/fieldlines_fast.py b/python/05_tokamak/SympGPR/fieldlines_fast.py new file mode 100644 index 0000000..cf93a19 --- /dev/null +++ b/python/05_tokamak/SympGPR/fieldlines_fast.py @@ -0,0 +1,54 @@ +#%% +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" +from numpy import zeros, array, arange, append, hstack, pi, cos, sin, mod +from scipy.optimize import fsolve, root +import common +from plotting import plot_orbit +from matplotlib.pyplot import * +from fieldlines import fieldlines + +# +# Attention: Variables are [pth, th, ph], so momentum in first component and +# "time" in third. Change to usual Z = z[:,1::-1] before using! +# + +nturn = 1000 # Number of full turns +nph = 32 # Number of steps per turn + +r0 = 0.3 +th0 = 0.0 +ph0 = 0.0 + +fieldlines.init(nph=nph, am=-3, an=2, aeps=.001, aphase=0.0, arlast=r0) + +pth0 = fieldlines.ath(r0, th0, ph0) + +#%% + +z = zeros([nph*nturn + 1, 3]) +z[0,:] = [pth0, th0, 0.0] + +from time import time +tic = time() + +for kph in arange(nph*nturn): + z[kph+1, :] = z[kph, :] + fieldlines.timestep(z[kph+1, :]) + + +print(f'Time taken: {time()-tic}') +print(f'Safety factor: {(z[nph, 2] - z[0, 2])/(z[nph, 1] - z[0, 1])}') + +#%% +figure() +plot(z[::nph,0], mod(z[::nph,1], 2*pi), ',') + +#%% +figure() +th = z[::nph,1] +r = array([fieldlines.compute_r(zk, 0.3) for zk in z[::nph,:]]).flatten() +plot(r*cos(th), r*sin(th), ',') diff --git a/python/05_tokamak/SympGPR/fieldlines_mid.py b/python/05_tokamak/SympGPR/fieldlines_mid.py new file mode 100644 index 0000000..49d171f --- /dev/null +++ b/python/05_tokamak/SympGPR/fieldlines_mid.py @@ -0,0 +1,67 @@ +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" +from numpy import zeros, array, arange, append, hstack, pi, cos, sin +from scipy.optimize import fsolve, root +from common import f, qe, c +import common +from plotting import plot_orbit +from matplotlib.pyplot import * + +nturn = 50 # Number of full turns +nph = 10 # Number of steps per turn +dph = 2*pi/nph # Integrator step in phi +epspert = .005 # Absolute perturbation +m = -3 # Poloidal perturbation harmonic +n = 2 # Toroidal perturbation harmonic + +r0 = 0.3 +th0 = 0.0 +ph0 = 0.0 +f.evaluate(r0, th0, ph0) +pth0 = qe/c*f.Ath + +z = zeros([3, nph*nturn + 1]) +z[:,0] = [pth0, th0, 0.0] + +def compute_r(pth, th, ph): + # Compute r implicitly for given th, p_th and ph + def rootfun(x): + f.evaluate(x, th, ph) + return pth - qe/c*f.Ath # pth - pth(r, th, ph) + + sol = fsolve(rootfun, r0) + return sol + +def F_tstep(znew, zold): + z = zeros(3) + z[0:2] = 0.5*(zold[0:2] + znew) + z[2] = zold[2] + 0.5*dph + r = compute_r(z[0], z[1], z[2]) + f.evaluate(r, z[1], z[2]) + f.dAph = f.dAph - epspert*sin(m*z[1] + n*z[2]) # Apply perturbation on Aph + ret = zeros(2) + ret[0] = zold[0] - znew[0] + dph*(f.dAph[1] - f.dAph[0]*f.dAth[1]/f.dAth[0]) # dAph/dth + ret[1] = zold[1] - znew[1] - dph*f.dAph[0]/f.dAth[0] # dAph/dpth + return ret + +from time import time +tic = time() + +for kph in arange(nph*nturn): + zold = z[:,kph] + sol = root(F_tstep, zold[0:2], method='hybr', tol=1e-12, args=(zold)) + z[0:2, kph+1] = sol.x + z[2, kph+1] = z[2, kph] + dph + +print(f'Time taken: {time()-tic}') +print(f'Safety factor: {(z[2, nph] - z[2, 0])/(z[1, nph] - z[1, 0])}') + +#%% +#plot_orbit(z) +th = z[1,:] #::nph] +r = array([compute_r(zk[0], zk[1], 0.0) for zk in z.T]).flatten() +plot(r*cos(th), r*sin(th), '.') +# %% diff --git a/python/05_tokamak/SympGPR/fieldlines_rk.py b/python/05_tokamak/SympGPR/fieldlines_rk.py new file mode 100644 index 0000000..0b4d0de --- /dev/null +++ b/python/05_tokamak/SympGPR/fieldlines_rk.py @@ -0,0 +1,77 @@ +#%% +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" +from numpy import zeros, array, arange, append, hstack, pi, cos, sin, mod, pi +from scipy.integrate import odeint +from scipy.optimize import fsolve +from common import f, qe, c +import common +from plotting import plot_orbit +from matplotlib.pyplot import * + +nturn = 50 +nph = 10 +dph = 2*pi/nph +eps = .0001 # Absolute perturbation +m = -3 # Poloidal perturbation harmonic +n = 2 # Toroidal perturbation harmonic +p = array([0.0, pi/5.0]) # Perturbation phase + +r0 = 0.3 +th0 = 0.0 +ph0 = 0.0 +f.evaluate(r0, th0, ph0) +pth0 = qe/c*f.Ath + +z = zeros([3, nturn*nph+1]) +z[:,0] = [pth0, th0, 0.0] +rprev = r0 +#%% +def compute_r(pth, th, ph): + # Compute r implicitly for given th, p_th and ph + global rprev + def rootfun(x): + f.evaluate(x, th, ph) + return pth - qe/c*f.Ath # pth - pth(r, th, ph) + + sol = fsolve(rootfun, rprev) + reprev = sol + return sol + +def zdot(z, ph): + r = compute_r(z[0], z[1], ph) + f.evaluate(r, z[1], ph) + f.dAph[1] = f.dAph[1] - eps*sin(m*z[1] + n*ph + p) # Apply perturbation on Aph + ret = zeros(2) + ret[0] = f.dAph[1] - f.dAph[0]*f.dAth[1]/f.dAth[0] # pthdot = dAph/dth + ret[1] = -f.dAph[0]/f.dAth[0] # thdot = dAph/dpth + return ret + +from time import time +tic = time() + +for kturn in arange(nph*nturn): + + znew = odeint(zdot, z[0:2, kturn], [z[2, kturn], z[2, kturn]+dph]) + + z[0:2, kturn+1] = znew[-1, :] + z[2, kturn+1] = z[2, kturn] + dph + +print('Time taken: {}'.format(time()-tic)) +print(f'Safety factor: {(z[2, nph] - z[2, 0])/(z[1, nph] - z[1, 0])}') +#%% +figure() +plot(mod(z[1,::nph], 2*pi), z[0,::nph], ',') +#%% +figure() +plot(mod(z[1,:], 2*pi), z[0,:], ',') +#%% +#plot_orbit(z) +th = z[1,::nph] +r = array([compute_r(zk[0], zk[1], 0.0) for zk in z[:,::nph].T]).flatten() +figure() +plot(r*cos(th), r*sin(th), '.') +# %% diff --git a/python/05_tokamak/SympGPR/func.py b/python/05_tokamak/SympGPR/func.py new file mode 100644 index 0000000..ddcf198 --- /dev/null +++ b/python/05_tokamak/SympGPR/func.py @@ -0,0 +1,246 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Apr 21 16:47:00 2020 + +@author: Katharina Rath +""" + +import numpy as np +from scipy.optimize import newton, bisect +from scipy.linalg import solve_triangular +import scipy +from fieldlines import fieldlines + +from kernels import * + +def f_kern(x, y, x0, y0, l): + return kern_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdx0(x, y, x0, y0, l): + return d2kdxdx0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydy0(x, y, x0, y0, l): + return d2kdydy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdxdy0(x, y, x0, y0, l): + return d2kdxdy0_num(x,y,x0,y0,l[0], l[1]) + +def d2kdydx0(x, y, x0, y0, l): + return d2kdxdy0(x, y, x0, y0, l) + +def build_K(xin, x0in, hyp, K): + # set up covariance matrix with derivative observations, Eq. (38) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0]//2 + N0 = K.shape[1]//2 + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = d2kdxdx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,lk] = d2kdxdy0( + x0[lk], y0[lk], x[k], y[k], l) + K[k,N0+lk] = d2kdydx0( + x0[lk], y0[lk], x[k], y[k], l) + K[N+k,N0+lk] = d2kdydy0( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def buildKreg(xin, x0in, hyp, K): + # set up "usual" covariance matrix for GP on regular grid (q,p) + # print(hyp) + l = hyp[:-1] + sig = hyp[-1] + N = K.shape[0] + N0 = K.shape[1] + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + for k in range(N): + for lk in range(N0): + K[k,lk] = f_kern( + x0[lk], y0[lk], x[k], y[k], l) + K[:,:] = sig*K[:,:] + +def build_dK(xin, x0in, hyp): + # set up covariance matrix + N = len(xin)//2 + N0 = len(x0in)//2 + l = hyp[:-1] + sig = hyp[-1] + x0 = x0in[0:N0] + x = xin[0:N] + y0 = x0in[N0:2*N0] + y = xin[N:2*N] + k11 = np.empty((N0, N)) + k12 = np.empty((N0, N)) + k21 = np.empty((N0, N)) + k22 = np.empty((N0, N)) + + dK = [] + + for k in range(N0): + for lk in range(N): + k11[k,lk] = sig*d3kdxdx0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k21[k,lk] = sig*d3kdxdy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k12[k,lk] = sig*d3kdxdy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k22[k,lk] = sig*d3kdydy0dlx_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + + dK.append(np.vstack([ + np.hstack([k11, k12]), + np.hstack([k21, k22]) + ])) + + for k in range(N0): + for lk in range(N): + k11[k,lk] = sig*d3kdxdx0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k21[k,lk] = sig*d3kdxdy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k12[k,lk] = sig*d3kdxdy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + k22[k,lk] = sig*d3kdydy0dly_num( + x0[k], y0[k], x[lk], y[lk], l[0], l[1]) + + dK.append(np.vstack([ + np.hstack([k11, k12]), + np.hstack([k21, k22]) + ])) + + for k in range(N): + for lk in range(N0): + k11[k,lk] = d2kdxdx0( + x0[lk], y0[lk], x[k], y[k], l) + k21[k,lk] = d2kdxdy0( + x0[lk], y0[lk], x[k], y[k], l) + k12[k,lk] = d2kdydx0( + x0[lk], y0[lk], x[k], y[k], l) + k22[k,lk] = d2kdydy0( + x0[lk], y0[lk], x[k], y[k], l) + dK.append(np.vstack([ + np.hstack([k11, k12]), + np.hstack([k21, k22]) + ])) + #dK[:,:] = sig*dK + return dK + +def gpsolve(Ky, ft): + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_triangular( + L.T, solve_triangular(L, ft, lower=True, check_finite=False), + lower=False, check_finite=False) + + return L, alpha + +# compute log-likelihood according to RW, p.19 +def solve_cholesky(L, b): + return solve_triangular( + L.T, solve_triangular(L, b, lower=True, check_finite=False), + lower=False, check_finite=False) + +def nll_chol_reg(hyp, x, y, N): + K = np.empty((N, N)) + buildKreg(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + ret = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + return ret +# negative log-posterior +def nll_chol(hyp, x, y, N): + K = np.empty((N, N)) + build_K(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + ret = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + return ret + +def nll_grad(hyp, x, y, N): + K = np.empty((N, N)) + build_K(x, x, hyp[:-1], K) + Ky = K + np.abs(hyp[-1])*np.diag(np.ones(N)) + Kyinv = np.linalg.inv(Ky) # invert GP matrix + L = scipy.linalg.cholesky(Ky, lower = True) + alpha = solve_cholesky(L, y) + nlp_val = 0.5*y.T.dot(alpha) + np.sum(np.log(L.diagonal())) + dK = build_dK(x, x, hyp[:-1]) + alpha = Kyinv.dot(y) + # Rasmussen (5.9) + nlp_grad = np.array([ + -0.5*alpha.T.dot(dK[0].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[0])), + -0.5*alpha.T.dot(dK[1].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[1])), + -0.5*alpha.T.dot(dK[1].dot(alpha)) + 0.5*np.trace(Kyinv.dot(dK[2])) + ]) + return nlp_val, nlp_grad + +def guessP(x, y, hypp, xtrainp, ztrainp, Kyinvp, N): + Ntest = 1 + Kstar = np.empty((Ntest, int(len(xtrainp)/2))) + buildKreg(np.hstack((x,y)), xtrainp, hypp, Kstar) + Ef = Kstar.dot(Kyinvp.dot(ztrainp)) + return Ef + +def calcQ(x,y, xtrain, l, Kyinv, ztrain): + # get \Delta q from GP on mixed grid. + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack(([x], [y])), l, Kstar) + qGP = Kstar.T.dot(Kyinv.dot(ztrain)) + dq = qGP[1] + return dq + +def Pnewton(P, x, y, l, xtrain, Kyinv, ztrain): + Kstar = np.empty((len(xtrain), 2)) + build_K(xtrain, np.hstack((x, P)), l, Kstar) + pGP = Kstar.T.dot(Kyinv.dot(ztrain)) + f = pGP[0] - y + P + # print(pGP[0]) + return f + +def calcP(x,y, l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest): + # as P is given in an implicit relation, use newton to solve for P (Eq. (42)) + # use the GP on regular grid (q,p) for a first guess for P + pgss = guessP([x], [y], hypp, xtrainp, ztrainp, Kyinvp, Ntest) + res, r = newton(Pnewton, pgss, full_output=True, maxiter=50000, disp=True, + args = (np.array([x]), np.array ([y]), l, xtrain, Kyinv, ztrain)) + return res + +def applymap_tok(nm, Ntest, l, hypp, Q0map, P0map, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv): + # Application of symplectic map + #init + pmap = np.zeros([nm, Ntest]) + qmap = np.zeros([nm, Ntest]) + #set initial conditions + pmap[0,:] = P0map + qmap[0,:] = Q0map + # loop through all test points and all time steps + for i in range(0,nm-1): + for k in range(0, Ntest): + if np.isnan(pmap[i, k]): + pmap[i+1,k] = np.nan + else: + pmap[i+1, k] = calcP(qmap[i,k], pmap[i, k], l, hypp, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv, Ntest) + + zk = np.array([pmap[i+1, k]*1e-2, qmap[i,k], 0]) + temp = fieldlines.compute_r(zk, 0.3) + if temp > 0.5 or pmap[i+1, k] < 0.0: + pmap[i+1, k] = np.nan + for k in range(0, Ntest): + if np.isnan(pmap[i+1, k]): + qmap[i+1,k] = np.nan + else: + # then: set new Q via calculating \Delta q and adding q + dqmap = calcQ(qmap[i,k], pmap[i+1,k], xtrain, l, Kyinv, ztrain) + qmap[i+1, k] = np.mod(dqmap + qmap[i, k], 2.0*np.pi) + return qmap, pmap + + diff --git a/python/05_tokamak/SympGPR/init_func.py b/python/05_tokamak/SympGPR/init_func.py new file mode 100644 index 0000000..15ce043 --- /dev/null +++ b/python/05_tokamak/SympGPR/init_func.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed May 6 14:50:32 2020 + +@author: ert +""" +#%% +from sympy import * +from sympy.utilities.autowrap import autowrap, ufuncify +from sympy.utilities.lambdify import lambdastr, lambdify, implemented_function +import shutil + +tmp = '.' +#try: +# shutil.rmtree(tmp) +#except: +# pass + +#%% # Kernel functions (kernel, 1st and second derivatives) +x, y, xa, ya, xb, yb, l, lx, ly = symbols(r'x y x_a y_a x_b, y_b, l, lx, ly') + + +kern_sqexp = exp(-x**2/(2.0*l**2)) +kern_sqexp_sin = exp(-sin(0.5*x)**2/(2.0*l**2)) + +kern_y = kern_sqexp.subs(x, ya-yb).subs(l, ly) +kern_x = kern_sqexp_sin.subs(x, xa-xb).subs(l, lx) +kern = (kern_x*kern_y).simplify() + +dkdxa = diff(kern, xa).simplify() +dkdxb = diff(kern, xb).simplify() +dkdya = diff(kern, ya).simplify() +dkdyb = diff(kern, yb).simplify() +dkdxadxb = diff(kern, xa, xb).simplify() +dkdyadyb = diff(kern, ya, yb).simplify() +dkdxadyb = diff(kern, xa, yb).simplify() + +d3kdxdx0dy0 = diff(kern, xa, xb, yb).simplify() +d3kdydy0dy0 = diff(kern, ya, yb, yb).simplify() +d3kdxdy0dy0 = diff(kern, xa, yb, yb).simplify() + +dkdlx = diff(kern, lx).simplify() +dkdly = diff(kern, ly).simplify() + +d3kdxdx0dlx = diff(kern, xa, xb, lx).simplify() +d3kdydy0dlx = diff(kern, ya, yb, lx).simplify() +d3kdxdy0dlx = diff(kern, xa, yb, lx).simplify() + +d3kdxdx0dly = diff(kern, xa, xb, ly).simplify() +d3kdydy0dly = diff(kern, ya, yb, ly).simplify() +d3kdxdy0dly = diff(kern, xa, yb, ly).simplify() + +#%% +from sympy.utilities.codegen import codegen +seq = (xa, ya, xb, yb, lx, ly) +[(name, code), (h_name, header)] = \ +codegen([('kern_num', kern), + ('dkdx_num', dkdxa), + ('dkdy_num', dkdya), + ('dkdx0_num', dkdxb), + ('dkdy0_num', dkdyb), + ('d2kdxdx0_num', dkdxadxb), + ('d2kdydy0_num', dkdyadyb), + ('d2kdxdy0_num', dkdxadyb), + ('d3kdxdx0dy0_num', d3kdxdx0dy0), + ('d3kdydy0dy0_num', d3kdydy0dy0), + ('d3kdxdy0dy0_num', d3kdxdy0dy0), + ('dkdlx_num', dkdlx), + ('dkdly_num', dkdly), + ('d3kdxdx0dlx_num', d3kdxdx0dlx), + ('d3kdydy0dlx_num', d3kdydy0dlx), + ('d3kdxdy0dlx_num', d3kdxdy0dlx), + ('d3kdxdx0dly_num', d3kdxdx0dly), + ('d3kdydy0dly_num', d3kdydy0dly), + ('d3kdxdy0dly_num', d3kdxdy0dly), + ], "F95", "kernels", + argument_sequence=seq, + header=False, empty=False) +with open(name, 'w') as f: + f.write(code) diff --git a/python/05_tokamak/SympGPR/kernels.f90 b/python/05_tokamak/SympGPR/kernels.f90 new file mode 100644 index 0000000..a6a970b --- /dev/null +++ b/python/05_tokamak/SympGPR/kernels.f90 @@ -0,0 +1,231 @@ +REAL*8 function kern_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +kern_num = exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0*sin(0.5d0*x_a - 0.5d0 & + *x_b)**2/lx**2) +end function +REAL*8 function dkdx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx_num = -0.5d0*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a & + - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy_num = 1.0d0*(-y_a + y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2* & + sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function dkdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdx0_num = 0.5d0*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a & + - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/lx**2 +end function +REAL*8 function dkdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdy0_num = 1.0d0*(y_a - y_b)*exp(0.5d0*(-lx**2*(y_a - y_b)**2 - ly**2* & + sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**2 +end function +REAL*8 function d2kdxdx0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdx0_num = 0.25d0*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b) - sin(0.5d0*x_a & + - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp(0.5d0*(-lx**2* & + (y_a - y_b)**2 - ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly** & + 2))/lx**4 +end function +REAL*8 function d2kdydy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdydy0_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp(0.5d0*(-lx**2*(y_a - & + y_b)**2 - ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly & + **4 +end function +REAL*8 function d2kdxdy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d2kdxdy0_num = -0.5d0*(y_a - y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly & + **2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - & + 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/(lx**2*ly**2) +end function +REAL*8 function d3kdxdx0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dy0_num = 0.25d0*(y_a - y_b)*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b) - & + sin(0.5d0*x_a - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)*exp( & + -0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)** & + 2)/(lx**2*ly**2))/(lx**4*ly**2) +end function +REAL*8 function d3kdydy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dy0_num = (3.0d0*ly**2 - (y_a - y_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx & + **2*ly**2))/ly**6 +end function +REAL*8 function d3kdxdy0dy0_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dy0_num = 0.5d0*(1.0d0*ly**2 - (y_a - y_b)**2)*exp(-0.5d0*(lx**2 & + *(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly & + **2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b)/(lx** & + 2*ly**4) +end function +REAL*8 function dkdlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdlx_num = 1.0d0*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0*sin(0.5d0*x_a & + - 0.5d0*x_b)**2/lx**2)*sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**3 +end function +REAL*8 function dkdly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +dkdly_num = 1.0d0*(y_a - y_b)**2*exp(-0.5d0*(y_a - y_b)**2/ly**2 - 0.5d0 & + *sin(0.5d0*x_a - 0.5d0*x_b)**2/lx**2)/ly**3 +end function +REAL*8 function d3kdxdx0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dlx_num = (-0.5d0*lx**4*cos(1.0d0*x_a - 1.0d0*x_b) + lx**2*( & + 0.75d0*cos(1.0d0*x_a - 1.0d0*x_b) + 0.5d0)*sin(0.5d0*x_a - 0.5d0* & + x_b)**2 - 0.25d0*sin(0.5d0*x_a - 0.5d0*x_b)**4*cos(0.5d0*x_a - & + 0.5d0*x_b)**2)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0 & + *x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/lx**7 +end function +REAL*8 function d3kdydy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dlx_num = 1.0d0*(ly**2 - (y_a - y_b)**2)*exp(-0.5d0*(lx**2*(y_a & + - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))* & + sin(0.5d0*x_a - 0.5d0*x_b)**2/(lx**3*ly**4) +end function +REAL*8 function d3kdxdy0dlx_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dlx_num = (lx**2 - 0.5d0*sin(0.5d0*x_a - 0.5d0*x_b)**2)*(y_a - & + y_b)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - & + 0.5d0*x_b)**2)/(lx**2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos( & + 0.5d0*x_a - 0.5d0*x_b)/(lx**5*ly**2) +end function +REAL*8 function d3kdxdx0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdx0dly_num = 0.25d0*(y_a - y_b)**2*(lx**2*cos(1.0d0*x_a - 1.0d0*x_b & + ) - sin(0.5d0*x_a - 0.5d0*x_b)**2*cos(0.5d0*x_a - 0.5d0*x_b)**2)* & + exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0* & + x_b)**2)/(lx**2*ly**2))/(lx**4*ly**3) +end function +REAL*8 function d3kdydy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdydy0dly_num = (-2.0d0*ly**4 + 5.0d0*ly**2*(y_a - y_b)**2 - 1.0d0*( & + y_a - y_b)**4)*exp(-0.5d0*(lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0 & + *x_a - 0.5d0*x_b)**2)/(lx**2*ly**2))/ly**7 +end function +REAL*8 function d3kdxdy0dly_num(x_a, y_a, x_b, y_b, lx, ly) +implicit none +REAL*8, intent(in) :: x_a +REAL*8, intent(in) :: y_a +REAL*8, intent(in) :: x_b +REAL*8, intent(in) :: y_b +REAL*8, intent(in) :: lx +REAL*8, intent(in) :: ly +d3kdxdy0dly_num = (ly**2 - 0.5d0*(y_a - y_b)**2)*(y_a - y_b)*exp(-0.5d0* & + (lx**2*(y_a - y_b)**2 + ly**2*sin(0.5d0*x_a - 0.5d0*x_b)**2)/(lx & + **2*ly**2))*sin(0.5d0*x_a - 0.5d0*x_b)*cos(0.5d0*x_a - 0.5d0*x_b) & + /(lx**2*ly**5) +end function diff --git a/python/05_tokamak/SympGPR/kernels.py b/python/05_tokamak/SympGPR/kernels.py new file mode 100644 index 0000000..06f1f13 --- /dev/null +++ b/python/05_tokamak/SympGPR/kernels.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun Aug 9 22:30:35 2020 + +@author: manal +""" + +def __bootstrap__(): + global __bootstrap__, __loader__, __file__ + import sys, pkg_resources, imp + __file__ = pkg_resources.resource_filename(__name__,'kernels.cpython-38-x86_64-linux-gnu.so') + __loader__ = None; del __bootstrap__, __loader__ + imp.load_dynamic(__name__,__file__) +__bootstrap__() \ No newline at end of file diff --git a/python/05_tokamak/SympGPR/main.py b/python/05_tokamak/SympGPR/main.py new file mode 100644 index 0000000..0a69a21 --- /dev/null +++ b/python/05_tokamak/SympGPR/main.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Sep 29 11:42:52 2020 + +@author: Katharina Rath +""" + +import numpy as np +import random +from scipy.optimize import minimize +from func import (build_K, buildKreg, applymap_tok, nll_chol_reg, nll_chol, nll_grad) +import tkinter +import matplotlib +# matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +from sklearn.metrics import mean_squared_error +import scipy +from calc_fieldlines import q, Q, p, P, zqtrain, zptrain, ztrain, X0test, yinttest, xtrain, Ntest, nm, N +from scipy.integrate import solve_ivp +from mpl_toolkits.mplot3d import Axes3D +#%% init parameters + +sig2_n = 1e-12 #noise**2 in observations + +Q0map = X0test[:,1] +P0map = X0test[:,0]*1e2 + +#%% set up GP +# as indicated in Algorithm 1: Semi-implicit symplectic GP map +#hyperparameter optimization of length scales (lq, lp) + +# Step 1: Usual GP regression of P over (q,p) +#fit GP + hyperparameter optimization to have a first guess for newton for P +xtrainp = np.hstack((q, p)) +ztrainp = P-p + +def nll_transform2(log10hyp, sig2n, x, y, N): + hyp = 10**log10hyp + return nll_chol_reg(np.hstack((hyp, [sig2n])), x, y, N) +res = minimize(nll_transform2, np.array((0, 0, 1)), args = (1e-8, xtrainp, ztrainp.T.flatten(), N), method='L-BFGS-B') +print(res.success) + +lp = 10**res.x[0:2] +sigp = 10**res.x[2] +hypp = np.hstack((lp, sigp)) +print('Optimized lengthscales for regular GP: lq =', "{:.2f}".format(lp[0]), 'lp = ', "{:.2f}".format(lp[1]), 'sig = ', "{:.2f}".format(sigp)) +# build K and its inverse +Kp = np.zeros((N, N)) +buildKreg(xtrainp, xtrainp, hypp, Kp) +Kyinvp = scipy.linalg.inv(Kp + sig2_n*np.eye(Kp.shape[0])) +#%% +# Step 2: symplectic GP regression of -Delta p and Delta q over mixed variables (q,P) +# hyperparameter optimization for lengthscales (lq, lp) and GP fitting + +def nll_transform_grad(log10hyp, sig2n, x, y, N): + hyp = log10hyp + out = nll_grad(np.hstack((hyp, [sig2n])), x, y, N) + return out[0] +res = minimize(nll_transform_grad, np.array((0.5, 0.5, 10)), args = (sig2_n, xtrain, ztrain.T.flatten(), 2*N), method='L-BFGS-B') + +sol1 = res.x[0:2] +sig = res.x[2] +print(res.success) +l = [np.abs(sol1[0]), np.abs(sol1[1])] +print('Optimized lengthscales for mixed GP: lq =', "{:.2f}".format(l[0]), 'lp = ', "{:.2f}".format(l[1]), 'sig = ', "{:.2f}".format(sig)) + +#%% +#build K(x,x') and regularized inverse with sig2_n +# K(x,x') corresponds to L(q,P,q',P') given in Eq. (38) +hyp = np.hstack((l, sig)) +K = np.empty((2*N, 2*N)) +build_K(xtrain, xtrain, hyp, K) +Kyinv = scipy.linalg.inv(K + sig2_n*np.eye(K.shape[0])) + +# caluate training error +Eftrain = K.dot(Kyinv.dot(ztrain)) +outtrain = mean_squared_error(ztrain, Eftrain) +print('training error', "{:.1e}".format(outtrain)) + +#%% Application of symplectic map +qmap, pmap = applymap_tok( + nm, Ntest, hyp, hypp, Q0map, P0map, xtrainp, ztrainp, Kyinvp, xtrain, ztrain, Kyinv) + +fmap = np.stack((qmap, pmap)) + +#%% plot results + +plt.rc('xtick',labelsize=20) +plt.rc('ytick',labelsize=20) +plt.figure(figsize = [10,4]) +plt.subplot(1,3,1) +for i in range(0, Ntest): + plt.plot(np.mod(qmap[:,i], 2*np.pi), 1e-2*pmap[:,i], 'k^', label = 'GP', markersize = 0.5) +plt.xlabel(r"$\vartheta$", fontsize = 20) +plt.ylabel(r"$p_\vartheta$", fontsize = 20) +plt.ylim([0.007, 0.06]) +plt.tight_layout() + +plt.subplot(1,3,2) +plt.plot(np.mod(yinttest[::32,1],2*np.pi), yinttest[::32,0], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) +plt.xlabel(r"$\vartheta$", fontsize = 20) +plt.ylabel(r"$p_\vartheta$", fontsize = 20) +plt.ylim([0.007, 0.06]) +plt.tight_layout() + +plt.subplot(1,3,3) +for i in range(0, Ntest): + plt.plot(np.mod(qmap[:,i], 2*np.pi), 1e-2*pmap[:,i], 'k^', label = 'GP', markersize = 0.5) +plt.plot(np.mod(yinttest[::32,1],2*np.pi), yinttest[::32,0], color = 'darkgrey', marker = 'o', linestyle = 'None', markersize = 0.5) +plt.xlabel(r"$\vartheta$", fontsize = 20) +plt.ylabel(r"$p_\vartheta$", fontsize = 20) +plt.ylim([0.007, 0.06]) +plt.tight_layout() +# plt.show(block = True) \ No newline at end of file diff --git a/python/05_tokamak/SympGPR/make_fieldlines.mk b/python/05_tokamak/SympGPR/make_fieldlines.mk new file mode 100644 index 0000000..309ef46 --- /dev/null +++ b/python/05_tokamak/SympGPR/make_fieldlines.mk @@ -0,0 +1,13 @@ +all: fieldlines.*.so + +fieldlines.*.so: fieldlines.pyf fieldlines.o minpack.o + python3 -m numpy.f2py -m fieldlines -c fieldlines.pyf fieldlines.o minpack.o + +fieldlines.pyf: fieldlines.f90 + python3 -m numpy.f2py fieldlines.f90 -m fieldlines -h fieldlines.pyf --overwrite-signature + +fieldlines.o: fieldlines.f90 + gfortran -fPIC -fbacktrace -c -g fieldlines.f90 + +minpack.o: minpack.f90 + gfortran -fPIC -c -march=native -O3 minpack.f90 diff --git a/python/05_tokamak/SympGPR/make_kernels.mk b/python/05_tokamak/SympGPR/make_kernels.mk new file mode 100644 index 0000000..c63c736 --- /dev/null +++ b/python/05_tokamak/SympGPR/make_kernels.mk @@ -0,0 +1,10 @@ +FC = gfortran +FFLAGS = -Wall -march=native -O2 -g -fbacktrace +PYTHON = python3 +NAME = kernels + +all: $(NAME).f90 + $(PYTHON) -m numpy.f2py -m $(NAME) -c $(NAME).f90 --f90flags='$(FFLAGS)' -lgomp + +$(NAME).f90: init_func.py + $(PYTHON) init_func.py \ No newline at end of file diff --git a/python/05_tokamak/SympGPR/minpack.f90 b/python/05_tokamak/SympGPR/minpack.f90 new file mode 100644 index 0000000..c0b393b --- /dev/null +++ b/python/05_tokamak/SympGPR/minpack.f90 @@ -0,0 +1,6036 @@ +! Minpack Copyright Notice (1999) University of Chicago. All rights reserved + +! Redistribution and use in source and binary forms, with or +! without modification, are permitted provided that the +! following conditions are met: + +! 1. Redistributions of source code must retain the above +! copyright notice, this list of conditions and the following +! disclaimer. + +! 2. Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials +! provided with the distribution. + +! 3. The end-user documentation included with the +! redistribution, if any, must include the following +! acknowledgment: + +! "This product includes software developed by the +! University of Chicago, as Operator of Argonne National +! Laboratory. + +! Alternately, this acknowledgment may appear in the software +! itself, if and wherever such third-party acknowledgments +! normally appear. + +! 4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" +! WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE +! UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND +! THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES +! OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE +! OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY +! OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR +! USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF +! THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) +! DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION +! UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL +! BE CORRECTED. + +! 5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT +! HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF +! ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, +! INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF +! ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF +! PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER +! SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT +! (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, +! EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE +! POSSIBILITY OF SUCH LOSS OR DAMAGES. + + +subroutine chkder ( m, n, x, fvec, fjac, ldfjac, xp, fvecp, mode, err ) + +!*****************************************************************************80 +! +!! CHKDER checks the gradients of M functions of N variables. +! +! Discussion: +! +! CHKDER checks the gradients of M nonlinear functions in N variables, +! evaluated at a point X, for consistency with the functions themselves. +! +! The user calls CHKDER twice, first with MODE = 1 and then with MODE = 2. +! +! MODE = 1. +! On input, +! X contains the point of evaluation. +! On output, +! XP is set to a neighboring point. +! +! Now the user must evaluate the function and gradients at X, and the +! function at XP. Then the function is called again: +! +! MODE = 2. +! On input, +! FVEC contains the function values at X, +! FJAC contains the function gradients at X. +! FVECP contains the functions evaluated at XP. +! On output, +! ERR contains measures of correctness of the respective gradients. +! +! The function does not perform reliably if cancellation or +! rounding errors cause a severe loss of significance in the +! evaluation of a function. Therefore, none of the components +! of X should be unusually small (in particular, zero) or any +! other value which may cause loss of significance. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, is the number of functions. +! +! Input, integer ( kind = 4 ) N, is the number of variables. +! +! Input, real ( kind = 8 ) X(N), the point at which the jacobian is to be +! evaluated. +! +! Input, real ( kind = 8 ) FVEC(M), is used only when MODE = 2. +! In that case, it should contain the function values at X. +! +! Input, real ( kind = 8 ) FJAC(LDFJAC,N), an M by N array. When MODE = 2, +! FJAC(I,J) should contain the value of dF(I)/dX(J). +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least M. +! +! Output, real ( kind = 8 ) XP(N), on output with MODE = 1, is a neighboring +! point of X, at which the function is to be evaluated. +! +! Input, real ( kind = 8 ) FVECP(M), on input with MODE = 2, is the function +! value at XP. +! +! Input, integer ( kind = 4 ) MODE, should be set to 1 on the first call, and +! 2 on the second. +! +! Output, real ( kind = 8 ) ERR(M). On output when MODE = 2, ERR contains +! measures of correctness of the respective gradients. If there is no +! severe loss of significance, then if ERR(I): +! = 1.0D+00, the I-th gradient is correct, +! = 0.0D+00, the I-th gradient is incorrect. +! > 0.5D+00, the I-th gradient is probably correct. +! < 0.5D+00, the I-th gradient is probably incorrect. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) eps + real ( kind = 8 ) epsf + real ( kind = 8 ) epslog + real ( kind = 8 ) epsmch + real ( kind = 8 ) err(m) + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) fvecp(m) + integer ( kind = 4 ) i + integer ( kind = 4 ) j + integer ( kind = 4 ) mode + real ( kind = 8 ) temp + real ( kind = 8 ) x(n) + real ( kind = 8 ) xp(n) + + epsmch = epsilon ( epsmch ) + eps = sqrt ( epsmch ) +! +! MODE = 1. +! + if ( mode == 1 ) then + + do j = 1, n + temp = eps * abs ( x(j) ) + if ( temp == 0.0D+00 ) then + temp = eps + end if + xp(j) = x(j) + temp + end do +! +! MODE = 2. +! + else if ( mode == 2 ) then + + epsf = 100.0D+00 * epsmch + epslog = log10 ( eps ) + + err = 0.0D+00 + + do j = 1, n + temp = abs ( x(j) ) + if ( temp == 0.0D+00 ) then + temp = 1.0D+00 + end if + err(1:m) = err(1:m) + temp * fjac(1:m,j) + end do + + do i = 1, m + + temp = 1.0D+00 + + if ( fvec(i) /= 0.0D+00 .and. fvecp(i) /= 0.0D+00 .and. & + abs ( fvecp(i)-fvec(i)) >= epsf * abs ( fvec(i) ) ) then + temp = eps * abs ( (fvecp(i)-fvec(i)) / eps - err(i) ) & + / ( abs ( fvec(i) ) + abs ( fvecp(i) ) ) + end if + + err(i) = 1.0D+00 + + if ( epsmch < temp .and. temp < eps ) then + err(i) = ( log10 ( temp ) - epslog ) / epslog + end if + + if ( eps <= temp ) then + err(i) = 0.0D+00 + end if + + end do + + end if + + return +end +subroutine dogleg ( n, r, lr, diag, qtb, delta, x ) + +!*****************************************************************************80 +! +!! DOGLEG finds the minimizing combination of Gauss-Newton and gradient steps. +! +! Discussion: +! +! Given an M by N matrix A, an N by N nonsingular diagonal +! matrix D, an M-vector B, and a positive number DELTA, the +! problem is to determine the convex combination X of the +! Gauss-Newton and scaled gradient directions that minimizes +! (A*X - B) in the least squares sense, subject to the +! restriction that the euclidean norm of D*X be at most DELTA. +! +! This function completes the solution of the problem +! if it is provided with the necessary information from the +! QR factorization of A. That is, if A = Q*R, where Q has +! orthogonal columns and R is an upper triangular matrix, +! then DOGLEG expects the full upper triangle of R and +! the first N components of Q'*B. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, the order of the matrix R. +! +! Input, real ( kind = 8 ) R(LR), the upper triangular matrix R stored +! by rows. +! +! Input, integer ( kind = 4 ) LR, the size of the R array, which must be +! no less than (N*(N+1))/2. +! +! Input, real ( kind = 8 ) DIAG(N), the diagonal elements of the matrix D. +! +! Input, real ( kind = 8 ) QTB(N), the first N elements of the vector Q'* B. +! +! Input, real ( kind = 8 ) DELTA, is a positive upper bound on the +! euclidean norm of D*X(1:N). +! +! Output, real ( kind = 8 ) X(N), the desired convex combination of the +! Gauss-Newton direction and the scaled gradient direction. +! + implicit none + + integer ( kind = 4 ) lr + integer ( kind = 4 ) n + + real ( kind = 8 ) alpha + real ( kind = 8 ) bnorm + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) enorm + real ( kind = 8 ) epsmch + real ( kind = 8 ) gnorm + integer ( kind = 4 ) i + integer ( kind = 4 ) j + integer ( kind = 4 ) jj + integer ( kind = 4 ) k + integer ( kind = 4 ) l + real ( kind = 8 ) qnorm + real ( kind = 8 ) qtb(n) + real ( kind = 8 ) r(lr) + real ( kind = 8 ) sgnorm + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) x(n) + + epsmch = epsilon ( epsmch ) +! +! Calculate the Gauss-Newton direction. +! + jj = ( n * ( n + 1 ) ) / 2 + 1 + + do k = 1, n + + j = n - k + 1 + jj = jj - k + l = jj + 1 + sum2 = 0.0D+00 + + do i = j + 1, n + sum2 = sum2 + r(l) * x(i) + l = l + 1 + end do + + temp = r(jj) + + if ( temp == 0.0D+00 ) then + + l = j + do i = 1, j + temp = max ( temp, abs ( r(l)) ) + l = l + n - i + end do + + if ( temp == 0.0D+00 ) then + temp = epsmch + else + temp = epsmch * temp + end if + + end if + + x(j) = ( qtb(j) - sum2 ) / temp + + end do +! +! Test whether the Gauss-Newton direction is acceptable. +! + wa1(1:n) = 0.0D+00 + wa2(1:n) = diag(1:n) * x(1:n) + qnorm = enorm ( n, wa2 ) + + if ( qnorm <= delta ) then + return + end if +! +! The Gauss-Newton direction is not acceptable. +! Calculate the scaled gradient direction. +! + l = 1 + do j = 1, n + temp = qtb(j) + do i = j, n + wa1(i) = wa1(i) + r(l) * temp + l = l + 1 + end do + wa1(j) = wa1(j) / diag(j) + end do +! +! Calculate the norm of the scaled gradient. +! Test for the special case in which the scaled gradient is zero. +! + gnorm = enorm ( n, wa1 ) + sgnorm = 0.0D+00 + alpha = delta / qnorm + + if ( gnorm /= 0.0D+00 ) then +! +! Calculate the point along the scaled gradient which minimizes the quadratic. +! + wa1(1:n) = ( wa1(1:n) / gnorm ) / diag(1:n) + + l = 1 + do j = 1, n + sum2 = 0.0D+00 + do i = j, n + sum2 = sum2 + r(l) * wa1(i) + l = l + 1 + end do + wa2(j) = sum2 + end do + + temp = enorm ( n, wa2 ) + sgnorm = ( gnorm / temp ) / temp +! +! Test whether the scaled gradient direction is acceptable. +! + alpha = 0.0D+00 +! +! The scaled gradient direction is not acceptable. +! Calculate the point along the dogleg at which the quadratic is minimized. +! + if ( sgnorm < delta ) then + + bnorm = enorm ( n, qtb ) + temp = ( bnorm / gnorm ) * ( bnorm / qnorm ) * ( sgnorm / delta ) + temp = temp - ( delta / qnorm ) * ( sgnorm / delta) ** 2 & + + sqrt ( ( temp - ( delta / qnorm ) ) ** 2 & + + ( 1.0D+00 - ( delta / qnorm ) ** 2 ) & + * ( 1.0D+00 - ( sgnorm / delta ) ** 2 ) ) + + alpha = ( ( delta / qnorm ) * ( 1.0D+00 - ( sgnorm / delta ) ** 2 ) ) & + / temp + + end if + + end if +! +! Form appropriate convex combination of the Gauss-Newton +! direction and the scaled gradient direction. +! + temp = ( 1.0D+00 - alpha ) * min ( sgnorm, delta ) + + x(1:n) = temp * wa1(1:n) + alpha * x(1:n) + + return +end +function enorm ( n, x ) + +!*****************************************************************************80 +! +!! ENORM computes the Euclidean norm of a vector. +! +! Discussion: +! +! This is an extremely simplified version of the original ENORM +! routine, which has been renamed to "ENORM2". +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, is the length of the vector. +! +! Input, real ( kind = 8 ) X(N), the vector whose norm is desired. +! +! Output, real ( kind = 8 ) ENORM, the Euclidean norm of the vector. +! + implicit none + + integer ( kind = 4 ) n + real ( kind = 8 ) x(n) + real ( kind = 8 ) enorm + + enorm = sqrt ( sum ( x(1:n) ** 2 )) + + return +end +function enorm2 ( n, x ) + +!*****************************************************************************80 +! +!! ENORM2 computes the Euclidean norm of a vector. +! +! Discussion: +! +! This routine was named ENORM. It has been renamed "ENORM2", +! and a simplified routine has been substituted. +! +! The Euclidean norm is computed by accumulating the sum of +! squares in three different sums. The sums of squares for the +! small and large components are scaled so that no overflows +! occur. Non-destructive underflows are permitted. Underflows +! and overflows do not occur in the computation of the unscaled +! sum of squares for the intermediate components. +! +! The definitions of small, intermediate and large components +! depend on two constants, RDWARF and RGIANT. The main +! restrictions on these constants are that RDWARF^2 not +! underflow and RGIANT^2 not overflow. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1 +! Argonne National Laboratory, +! Argonne, Illinois. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, is the length of the vector. +! +! Input, real ( kind = 8 ) X(N), the vector whose norm is desired. +! +! Output, real ( kind = 8 ) ENORM2, the Euclidean norm of the vector. +! + implicit none + + integer ( kind = 4 ) n + + real ( kind = 8 ) agiant + real ( kind = 8 ) enorm2 + integer ( kind = 4 ) i + real ( kind = 8 ) rdwarf + real ( kind = 8 ) rgiant + real ( kind = 8 ) s1 + real ( kind = 8 ) s2 + real ( kind = 8 ) s3 + real ( kind = 8 ) x(n) + real ( kind = 8 ) xabs + real ( kind = 8 ) x1max + real ( kind = 8 ) x3max + + rdwarf = sqrt ( tiny ( rdwarf ) ) + rgiant = sqrt ( huge ( rgiant ) ) + + s1 = 0.0D+00 + s2 = 0.0D+00 + s3 = 0.0D+00 + x1max = 0.0D+00 + x3max = 0.0D+00 + agiant = rgiant / real ( n, kind = 8 ) + + do i = 1, n + + xabs = abs ( x(i) ) + + if ( xabs <= rdwarf ) then + + if ( x3max < xabs ) then + s3 = 1.0D+00 + s3 * ( x3max / xabs ) ** 2 + x3max = xabs + else if ( xabs /= 0.0D+00 ) then + s3 = s3 + ( xabs / x3max ) ** 2 + end if + + else if ( agiant <= xabs ) then + + if ( x1max < xabs ) then + s1 = 1.0D+00 + s1 * ( x1max / xabs ) ** 2 + x1max = xabs + else + s1 = s1 + ( xabs / x1max ) ** 2 + end if + + else + + s2 = s2 + xabs ** 2 + + end if + + end do +! +! Calculation of norm. +! + if ( s1 /= 0.0D+00 ) then + + enorm2 = x1max * sqrt ( s1 + ( s2 / x1max ) / x1max ) + + else if ( s2 /= 0.0D+00 ) then + + if ( x3max <= s2 ) then + enorm2 = sqrt ( s2 * ( 1.0D+00 + ( x3max / s2 ) * ( x3max * s3 ) ) ) + else + enorm2 = sqrt ( x3max * ( ( s2 / x3max ) + ( x3max * s3 ) ) ) + end if + + else + + enorm2 = x3max * sqrt ( s3 ) + + end if + + return +end +subroutine fdjac1 ( fcn, n, x, fvec, fjac, ldfjac, iflag, ml, mu, epsfcn ) + +!*****************************************************************************80 +! +!! FDJAC1 estimates an N by N jacobian matrix using forward differences. +! +! Discussion: +! +! This function computes a forward-difference approximation +! to the N by N jacobian matrix associated with a specified +! problem of N functions in N variables. If the jacobian has +! a banded form, then function evaluations are saved by only +! approximating the nonzero terms. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( n, x, fvec, iflag ) +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(n) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) N, the number of functions and variables. +! +! Input, real ( kind = 8 ) X(N), the point where the jacobian is evaluated. +! +! Input, real ( kind = 8 ) FVEC(N), the functions evaluated at X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), the N by N approximate +! jacobian matrix. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC, which +! must not be less than N. +! +! Output, integer ( kind = 4 ) IFLAG, is an error flag returned by FCN. +! If FCN returns a nonzero value of IFLAG, then this routine returns +! immediately to the calling program, with the value of IFLAG. +! +! Input, integer ( kind = 4 ) ML, MU, specify the number of subdiagonals and +! superdiagonals within the band of the jacobian matrix. If the +! jacobian is not banded, set ML and MU to N-1. +! +! Input, real ( kind = 8 ) EPSFCN, is used in determining a suitable step +! length for the forward-difference approximation. This approximation +! assumes that the relative errors in the functions are of the order of +! EPSFCN. If EPSFCN is less than the machine precision, it is assumed that +! the relative errors in the functions are of the order of the machine +! precision. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) n + + real ( kind = 8 ) eps + real ( kind = 8 ) epsfcn + real ( kind = 8 ) epsmch + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fvec(n) + real ( kind = 8 ) h + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) j + integer ( kind = 4 ) k + integer ( kind = 4 ) ml + integer ( kind = 4 ) msum + integer ( kind = 4 ) mu + real ( kind = 8 ) temp + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) x(n) + + epsmch = epsilon ( epsmch ) + + eps = sqrt ( max ( epsfcn, epsmch ) ) + msum = ml + mu + 1 +! +! Computation of dense approximate jacobian. +! + if ( n <= msum ) then + + do j = 1, n + + temp = x(j) + h = eps * abs ( temp ) + if ( h == 0.0D+00 ) then + h = eps + end if + + iflag = 1 + x(j) = temp + h + call fcn ( n, x, wa1, iflag ) + + if ( iflag < 0 ) then + exit + end if + + x(j) = temp + fjac(1:n,j) = ( wa1(1:n) - fvec(1:n) ) / h + + end do + + else +! +! Computation of banded approximate jacobian. +! + do k = 1, msum + + do j = k, n, msum + wa2(j) = x(j) + h = eps * abs ( wa2(j) ) + if ( h == 0.0D+00 ) then + h = eps + end if + x(j) = wa2(j) + h + end do + + iflag = 1 + call fcn ( n, x, wa1, iflag ) + + if ( iflag < 0 ) then + exit + end if + + do j = k, n, msum + + x(j) = wa2(j) + + h = eps * abs ( wa2(j) ) + if ( h == 0.0D+00 ) then + h = eps + end if + + fjac(1:n,j) = 0.0D+00 + + do i = 1, n + if ( j - mu <= i .and. i <= j + ml ) then + fjac(i,j) = ( wa1(i) - fvec(i) ) / h + end if + end do + + end do + + end do + + end if + + return +end +subroutine fdjac2 ( fcn, m, n, x, fvec, fjac, ldfjac, iflag, epsfcn ) + +!*****************************************************************************80 +! +!! FDJAC2 estimates an M by N jacobian matrix using forward differences. +! +! Discussion: +! +! This function computes a forward-difference approximation +! to the M by N jacobian matrix associated with a specified +! problem of M functions in N variables. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( m, n, x, fvec, iflag ) +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) M, is the number of functions. +! +! Input, integer ( kind = 4 ) N, is the number of variables. +! N must not exceed M. +! +! Input, real ( kind = 8 ) X(N), the point where the jacobian is evaluated. +! +! Input, real ( kind = 8 ) FVEC(M), the functions evaluated at X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), the M by N approximate +! jacobian matrix. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC, +! which must not be less than M. +! +! Output, integer ( kind = 4 ) IFLAG, is an error flag returned by FCN. +! If FCN returns a nonzero value of IFLAG, then this routine returns +! immediately to the calling program, with the value of IFLAG. +! +! Input, real ( kind = 8 ) EPSFCN, is used in determining a suitable +! step length for the forward-difference approximation. This approximation +! assumes that the relative errors in the functions are of the order of +! EPSFCN. If EPSFCN is less than the machine precision, it is assumed that +! the relative errors in the functions are of the order of the machine +! precision. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) eps + real ( kind = 8 ) epsfcn + real ( kind = 8 ) epsmch + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) h + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) j + real ( kind = 8 ) temp + real ( kind = 8 ) wa(m) + real ( kind = 8 ) x(n) + + epsmch = epsilon ( epsmch ) + + eps = sqrt ( max ( epsfcn, epsmch ) ) + + do j = 1, n + + temp = x(j) + h = eps * abs ( temp ) + if ( h == 0.0D+00 ) then + h = eps + end if + + iflag = 1 + x(j) = temp + h + call fcn ( m, n, x, wa, iflag ) + + if ( iflag < 0 ) then + exit + end if + + x(j) = temp + fjac(1:m,j) = ( wa(1:m) - fvec(1:m) ) / h + + end do + + return +end +subroutine hybrd ( fcn, n, x, fvec, xtol, maxfev, ml, mu, epsfcn, diag, mode, & + factor, nprint, info, nfev, fjac, ldfjac, r, lr, qtf ) + +!*****************************************************************************80 +! +!! HYBRD seeks a zero of N nonlinear equations in N variables. +! +! Discussion: +! +! HYBRD finds a zero of a system of N nonlinear functions in N variables +! by a modification of the Powell hybrid method. The user must provide a +! subroutine which calculates the functions. +! +! The jacobian is then calculated by a forward-difference approximation. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( n, x, fvec, iflag ) +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(n) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) N, the number of functions and variables. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(N), the functions evaluated at the output X. +! +! Input, real ( kind = 8 ) XTOL. Termination occurs when the relative error +! between two consecutive iterates is at most XTOL. XTOL should be +! nonnegative. +! +! Input, integer ( kind = 4 ) MAXFEV. Termination occurs when the number of +! calls to FCN is at least MAXFEV by the end of an iteration. +! +! Input, integer ( kind = 4 ) ML, MU, specify the number of subdiagonals and +! superdiagonals within the band of the jacobian matrix. If the jacobian +! is not banded, set ML and MU to at least n - 1. +! +! Input, real ( kind = 8 ) EPSFCN, is used in determining a suitable step +! length for the forward-difference approximation. This approximation +! assumes that the relative errors in the functions are of the order of +! EPSFCN. If EPSFCN is less than the machine precision, it is assumed that +! the relative errors in the functions are of the order of the machine +! precision. +! +! Input/output, real ( kind = 8 ) DIAG(N). If MODE = 1, then DIAG is set +! internally. If MODE = 2, then DIAG must contain positive entries that +! serve as multiplicative scale factors for the variables. +! +! Input, integer ( kind = 4 ) MODE, scaling option. +! 1, variables will be scaled internally. +! 2, scaling is specified by the input DIAG vector. +! +! Input, real ( kind = 8 ) FACTOR, determines the initial step bound. This +! bound is set to the product of FACTOR and the euclidean norm of DIAG*X if +! nonzero, or else to FACTOR itself. In most cases, FACTOR should lie +! in the interval (0.1, 100) with 100 the recommended value. +! +! Input, integer ( kind = 4 ) NPRINT, enables controlled printing of +! iterates if it is positive. In this case, FCN is called with IFLAG = 0 at +! the beginning of the first iteration and every NPRINT iterations thereafter +! and immediately prior to return, with X and FVEC available +! for printing. If NPRINT is not positive, no special calls +! of FCN with IFLAG = 0 are made. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. +! See the description of FCN. +! Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, relative error between two consecutive iterates is at most XTOL. +! 2, number of calls to FCN has reached or exceeded MAXFEV. +! 3, XTOL is too small. No further improvement in the approximate +! solution X is possible. +! 4, iteration is not making good progress, as measured by the improvement +! from the last five jacobian evaluations. +! 5, iteration is not making good progress, as measured by the improvement +! from the last ten iterations. +! +! Output, integer ( kind = 4 ) NFEV, the number of calls to FCN. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an N by N array which contains +! the orthogonal matrix Q produced by the QR factorization of the final +! approximate jacobian. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least N. +! +! Output, real ( kind = 8 ) R(LR), the upper triangular matrix produced by +! the QR factorization of the final approximate jacobian, stored rowwise. +! +! Input, integer ( kind = 4 ) LR, the size of the R array, which must be no +! less than (N*(N+1))/2. +! +! Output, real ( kind = 8 ) QTF(N), contains the vector Q'*FVEC. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) lr + integer ( kind = 4 ) n + + real ( kind = 8 ) actred + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) enorm + real ( kind = 8 ) epsfcn + real ( kind = 8 ) epsmch + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fnorm + real ( kind = 8 ) fnorm1 + real ( kind = 8 ) fvec(n) + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) info + integer ( kind = 4 ) iter + integer ( kind = 4 ) iwa(1) + integer ( kind = 4 ) j + logical jeval + integer ( kind = 4 ) l + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) ml + integer ( kind = 4 ) mode + integer ( kind = 4 ) msum + integer ( kind = 4 ) mu + integer ( kind = 4 ) ncfail + integer ( kind = 4 ) nslow1 + integer ( kind = 4 ) nslow2 + integer ( kind = 4 ) ncsuc + integer ( kind = 4 ) nfev + integer ( kind = 4 ) nprint + logical pivot + real ( kind = 8 ) pnorm + real ( kind = 8 ) prered + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) r(lr) + real ( kind = 8 ) ratio + logical sing + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) wa3(n) + real ( kind = 8 ) wa4(n) + real ( kind = 8 ) x(n) + real ( kind = 8 ) xnorm + real ( kind = 8 ) xtol + + epsmch = epsilon ( epsmch ) + + info = 0 + iflag = 0 + nfev = 0 +! +! Check the input parameters for errors. +! + if ( n <= 0 ) then + return + else if ( xtol < 0.0D+00 ) then + return + else if ( maxfev <= 0 ) then + return + else if ( ml < 0 ) then + return + else if ( mu < 0 ) then + return + else if ( factor <= 0.0D+00 ) then + return + else if ( ldfjac < n ) then + return + else if ( lr < ( n * ( n + 1 ) ) / 2 ) then + return + end if + + if ( mode == 2 ) then + + do j = 1, n + if ( diag(j) <= 0.0D+00 ) then + go to 300 + end if + end do + + end if +! +! Evaluate the function at the starting point +! and calculate its norm. +! + iflag = 1 + call fcn ( n, x, fvec, iflag ) + nfev = 1 + + if ( iflag < 0 ) then + go to 300 + end if + + fnorm = enorm ( n, fvec ) +! +! Determine the number of calls to FCN needed to compute the jacobian matrix. +! + msum = min ( ml + mu + 1, n ) +! +! Initialize iteration counter and monitors. +! + iter = 1 + ncsuc = 0 + ncfail = 0 + nslow1 = 0 + nslow2 = 0 +! +! Beginning of the outer loop. +! +30 continue + + jeval = .true. +! +! Calculate the jacobian matrix. +! + iflag = 2 + call fdjac1 ( fcn, n, x, fvec, fjac, ldfjac, iflag, ml, mu, epsfcn ) + + nfev = nfev + msum + + if ( iflag < 0 ) then + go to 300 + end if +! +! Compute the QR factorization of the jacobian. +! + pivot = .false. + call qrfac ( n, n, fjac, ldfjac, pivot, iwa, 1, wa1, wa2 ) +! +! On the first iteration, if MODE is 1, scale according +! to the norms of the columns of the initial jacobian. +! + if ( iter == 1 ) then + + if ( mode /= 2 ) then + + diag(1:n) = wa2(1:n) + do j = 1, n + if ( wa2(j) == 0.0D+00 ) then + diag(j) = 1.0D+00 + end if + end do + + end if +! +! On the first iteration, calculate the norm of the scaled X +! and initialize the step bound DELTA. +! + wa3(1:n) = diag(1:n) * x(1:n) + xnorm = enorm ( n, wa3 ) + delta = factor * xnorm + if ( delta == 0.0D+00 ) then + delta = factor + end if + + end if +! +! Form Q' * FVEC and store in QTF. +! + qtf(1:n) = fvec(1:n) + + do j = 1, n + + if ( fjac(j,j) /= 0.0D+00 ) then + temp = - dot_product ( qtf(j:n), fjac(j:n,j) ) / fjac(j,j) + qtf(j:n) = qtf(j:n) + fjac(j:n,j) * temp + end if + + end do +! +! Copy the triangular factor of the QR factorization into R. +! + sing = .false. + + do j = 1, n + l = j + do i = 1, j - 1 + r(l) = fjac(i,j) + l = l + n - i + end do + r(l) = wa1(j) + if ( wa1(j) == 0.0D+00 ) then + sing = .true. + end if + end do +! +! Accumulate the orthogonal factor in FJAC. +! + call qform ( n, n, fjac, ldfjac ) +! +! Rescale if necessary. +! + if ( mode /= 2 ) then + do j = 1, n + diag(j) = max ( diag(j), wa2(j) ) + end do + end if +! +! Beginning of the inner loop. +! +180 continue +! +! If requested, call FCN to enable printing of iterates. +! + if ( 0 < nprint ) then + iflag = 0 + if ( mod ( iter - 1, nprint ) == 0 ) then + call fcn ( n, x, fvec, iflag ) + end if + if ( iflag < 0 ) then + go to 300 + end if + end if +! +! Determine the direction P. +! + call dogleg ( n, r, lr, diag, qtf, delta, wa1 ) +! +! Store the direction P and X + P. +! Calculate the norm of P. +! + wa1(1:n) = - wa1(1:n) + wa2(1:n) = x(1:n) + wa1(1:n) + wa3(1:n) = diag(1:n) * wa1(1:n) + + pnorm = enorm ( n, wa3 ) +! +! On the first iteration, adjust the initial step bound. +! + if ( iter == 1 ) then + delta = min ( delta, pnorm ) + end if +! +! Evaluate the function at X + P and calculate its norm. +! + iflag = 1 + call fcn ( n, wa2, wa4, iflag ) + nfev = nfev + 1 + + if ( iflag < 0 ) then + go to 300 + end if + + fnorm1 = enorm ( n, wa4 ) +! +! Compute the scaled actual reduction. +! + actred = -1.0D+00 + if ( fnorm1 < fnorm ) then + actred = 1.0D+00 - ( fnorm1 / fnorm ) ** 2 + endif +! +! Compute the scaled predicted reduction. +! + l = 1 + do i = 1, n + sum2 = 0.0D+00 + do j = i, n + sum2 = sum2 + r(l) * wa1(j) + l = l + 1 + end do + wa3(i) = qtf(i) + sum2 + end do + + temp = enorm ( n, wa3 ) + prered = 0.0D+00 + if ( temp < fnorm ) then + prered = 1.0D+00 - ( temp / fnorm ) ** 2 + end if +! +! Compute the ratio of the actual to the predicted reduction. +! + ratio = 0.0D+00 + if ( 0.0D+00 < prered ) then + ratio = actred / prered + end if +! +! Update the step bound. +! + if ( ratio < 0.1D+00 ) then + + ncsuc = 0 + ncfail = ncfail + 1 + delta = 0.5D+00 * delta + + else + + ncfail = 0 + ncsuc = ncsuc + 1 + + if ( 0.5D+00 <= ratio .or. 1 < ncsuc ) then + delta = max ( delta, pnorm / 0.5D+00 ) + end if + + if ( abs ( ratio - 1.0D+00 ) <= 0.1D+00 ) then + delta = pnorm / 0.5D+00 + end if + + end if +! +! Test for successful iteration. +! +! Successful iteration. +! Update X, FVEC, and their norms. +! + if ( 0.0001D+00 <= ratio ) then + x(1:n) = wa2(1:n) + wa2(1:n) = diag(1:n) * x(1:n) + fvec(1:n) = wa4(1:n) + xnorm = enorm ( n, wa2 ) + fnorm = fnorm1 + iter = iter + 1 + end if +! +! Determine the progress of the iteration. +! + nslow1 = nslow1 + 1 + if ( 0.001D+00 <= actred ) then + nslow1 = 0 + end if + + if ( jeval ) then + nslow2 = nslow2 + 1 + end if + + if ( 0.1D+00 <= actred ) then + nslow2 = 0 + end if +! +! Test for convergence. +! + if ( delta <= xtol * xnorm .or. fnorm == 0.0D+00 ) then + info = 1 + end if + + if ( info /= 0 ) then + go to 300 + end if +! +! Tests for termination and stringent tolerances. +! + if ( maxfev <= nfev ) then + info = 2 + end if + + if ( 0.1D+00 * max ( 0.1D+00 * delta, pnorm ) <= epsmch * xnorm ) then + info = 3 + end if + + if ( nslow2 == 5 ) then + info = 4 + end if + + if ( nslow1 == 10 ) then + info = 5 + end if + + if ( info /= 0 ) then + go to 300 + end if +! +! Criterion for recalculating jacobian approximation +! by forward differences. +! + if ( ncfail == 2 ) then + go to 290 + end if +! +! Calculate the rank one modification to the jacobian +! and update QTF if necessary. +! + do j = 1, n + sum2 = dot_product ( wa4(1:n), fjac(1:n,j) ) + wa2(j) = ( sum2 - wa3(j) ) / pnorm + wa1(j) = diag(j) * ( ( diag(j) * wa1(j) ) / pnorm ) + if ( 0.0001D+00 <= ratio ) then + qtf(j) = sum2 + end if + end do +! +! Compute the QR factorization of the updated jacobian. +! + call r1updt ( n, n, r, lr, wa1, wa2, wa3, sing ) + call r1mpyq ( n, n, fjac, ldfjac, wa2, wa3 ) + call r1mpyq ( 1, n, qtf, 1, wa2, wa3 ) +! +! End of the inner loop. +! + jeval = .false. + go to 180 + + 290 continue +! +! End of the outer loop. +! + go to 30 + + 300 continue +! +! Termination, either normal or user imposed. +! + if ( iflag < 0 ) then + info = iflag + end if + + iflag = 0 + + if ( 0 < nprint ) then + call fcn ( n, x, fvec, iflag ) + end if + + return +end +subroutine hybrd1 ( fcn, n, x, fvec, tol, info ) + +!*****************************************************************************80 +! +!! HYBRD1 seeks a zero of N nonlinear equations in N variables. +! +! Discussion: +! +! HYBRD1 finds a zero of a system of N nonlinear functions in N variables +! by a modification of the Powell hybrid method. This is done by using the +! more general nonlinear equation solver HYBRD. The user must provide a +! subroutine which calculates the functions. The jacobian is then +! calculated by a forward-difference approximation. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 19 August 2016 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( n, x, fvec, iflag ) +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(n) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) N, the number of functions and variables. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(N), the functions evaluated at the output X. +! +! Input, real ( kind = 8 ) TOL. Termination occurs when the algorithm +! estimates that the relative error between X and the solution is at +! most TOL. TOL should be nonnegative. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See the +! description of FCN. +! Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, algorithm estimates that the relative error between X and the +! solution is at most TOL. +! 2, number of calls to FCN has reached or exceeded 200*(N+1). +! 3, TOL is too small. No further improvement in the approximate +! solution X is possible. +! 4, the iteration is not making good progress. +! + implicit none + + integer ( kind = 4 ) lwa + integer ( kind = 4 ) n + + real ( kind = 8 ) diag(n) + real ( kind = 8 ) epsfcn + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(n,n) + real ( kind = 8 ) fvec(n) + integer ( kind = 4 ) info + integer ( kind = 4 ) j + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) lr + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) ml + integer ( kind = 4 ) mode + integer ( kind = 4 ) mu + integer ( kind = 4 ) nfev + integer ( kind = 4 ) nprint + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) r((n*(n+1))/2) + real ( kind = 8 ) tol + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + if ( n <= 0 ) then + info = 0 + return + end if + + if ( tol < 0.0D+00 ) then + info = 0 + return + end if + + xtol = tol + maxfev = 200 * ( n + 1 ) + ml = n - 1 + mu = n - 1 + epsfcn = 0.0D+00 + diag(1:n) = 1.0D+00 + mode = 2 + factor = 100.0D+00 + nprint = 0 + info = 0 + nfev = 0 + fjac(1:n,1:n) = 0.0D+00 + ldfjac = n + r(1:(n*(n+1))/2) = 0.0D+00 + lr = ( n * ( n + 1 ) ) / 2 + qtf(1:n) = 0.0D+00 + + call hybrd ( fcn, n, x, fvec, xtol, maxfev, ml, mu, epsfcn, diag, mode, & + factor, nprint, info, nfev, fjac, ldfjac, r, lr, qtf ) + + if ( info == 5 ) then + info = 4 + end if + + return +end +subroutine hybrj ( fcn, n, x, fvec, fjac, ldfjac, xtol, maxfev, diag, mode, & + factor, nprint, info, nfev, njev, r, lr, qtf ) + +!*****************************************************************************80 +! +!! HYBRJ seeks a zero of N nonlinear equations in N variables. +! +! Discussion: +! +! HYBRJ finds a zero of a system of N nonlinear functions in N variables +! by a modification of the Powell hybrid method. The user must provide a +! subroutine which calculates the functions and the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the jacobian. FCN should have the form: +! +! subroutine fcn ( n, x, fvec, fjac, ldfjac, iflag ) +! integer ( kind = 4 ) ldfjac +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjac(ldfjac,n) +! real ( kind = 8 ) fvec(n) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If IFLAG = 2 on input, FCN should calculate the jacobian at X and +! return this matrix in FJAC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) N, the number of functions and variables. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(N), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an N by N matrix, containing +! the orthogonal matrix Q produced by the QR factorization +! of the final approximate jacobian. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of the +! array FJAC. LDFJAC must be at least N. +! +! Input, real ( kind = 8 ) XTOL. Termination occurs when the relative error +! between two consecutive iterates is at most XTOL. XTOL should be +! nonnegative. +! +! Input, integer ( kind = 4 ) MAXFEV. Termination occurs when the number of +! calls to FCN is at least MAXFEV by the end of an iteration. +! +! Input/output, real ( kind = 8 ) DIAG(N). If MODE = 1, then DIAG is set +! internally. If MODE = 2, then DIAG must contain positive entries that +! serve as multiplicative scale factors for the variables. +! +! Input, integer ( kind = 4 ) MODE, scaling option. +! 1, variables will be scaled internally. +! 2, scaling is specified by the input DIAG vector. +! +! Input, real ( kind = 8 ) FACTOR, determines the initial step bound. This +! bound is set to the product of FACTOR and the euclidean norm of DIAG*X if +! nonzero, or else to FACTOR itself. In most cases, FACTOR should lie +! in the interval (0.1, 100) with 100 the recommended value. +! +! Input, integer ( kind = 4 ) NPRINT, enables controlled printing of iterates +! if it is positive. In this case, FCN is called with IFLAG = 0 at the +! beginning of the first iteration and every NPRINT iterations thereafter +! and immediately prior to return, with X and FVEC available +! for printing. If NPRINT is not positive, no special calls +! of FCN with IFLAG = 0 are made. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. +! See the description of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, relative error between two consecutive iterates is at most XTOL. +! 2, number of calls to FCN with IFLAG = 1 has reached MAXFEV. +! 3, XTOL is too small. No further improvement in +! the approximate solution X is possible. +! 4, iteration is not making good progress, as measured by the +! improvement from the last five jacobian evaluations. +! 5, iteration is not making good progress, as measured by the +! improvement from the last ten iterations. +! +! Output, integer ( kind = 4 ) NFEV, the number of calls to FCN +! with IFLAG = 1. +! +! Output, integer ( kind = 4 ) NJEV, the number of calls to FCN +! with IFLAG = 2. +! +! Output, real ( kind = 8 ) R(LR), the upper triangular matrix produced +! by the QR factorization of the final approximate jacobian, stored rowwise. +! +! Input, integer ( kind = 4 ) LR, the size of the R array, which must +! be no less than (N*(N+1))/2. +! +! Output, real ( kind = 8 ) QTF(N), contains the vector Q'*FVEC. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) lr + integer ( kind = 4 ) n + + real ( kind = 8 ) actred + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) enorm + real ( kind = 8 ) epsmch + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fnorm + real ( kind = 8 ) fnorm1 + real ( kind = 8 ) fvec(n) + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) info + integer ( kind = 4 ) iter + integer ( kind = 4 ) iwa(1) + integer ( kind = 4 ) j + logical jeval + integer ( kind = 4 ) l + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) ncfail + integer ( kind = 4 ) nslow1 + integer ( kind = 4 ) nslow2 + integer ( kind = 4 ) ncsuc + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + logical pivot + real ( kind = 8 ) pnorm + real ( kind = 8 ) prered + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) r(lr) + real ( kind = 8 ) ratio + logical sing + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) wa3(n) + real ( kind = 8 ) wa4(n) + real ( kind = 8 ) x(n) + real ( kind = 8 ) xnorm + real ( kind = 8 ) xtol + + epsmch = epsilon ( epsmch ) + + info = 0 + iflag = 0 + nfev = 0 + njev = 0 +! +! Check the input parameters for errors. +! + if ( n <= 0 ) then + if ( iflag < 0 ) then + info = iflag + end if + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + + if ( ldfjac < n .or. & + xtol < 0.0D+00 .or. & + maxfev <= 0 .or. & + factor <= 0.0D+00 .or. & + lr < ( n * ( n + 1 ) ) / 2 ) then + if ( iflag < 0 ) then + info = iflag + end if + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + + if ( mode == 2 ) then + do j = 1, n + if ( diag(j) <= 0.0D+00 ) then + if ( iflag < 0 ) then + info = iflag + end if + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + end do + end if +! +! Evaluate the function at the starting point and calculate its norm. +! + iflag = 1 + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + nfev = 1 + + if ( iflag < 0 ) then + if ( iflag < 0 ) then + info = iflag + end if + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + + fnorm = enorm ( n, fvec ) +! +! Initialize iteration counter and monitors. +! + iter = 1 + ncsuc = 0 + ncfail = 0 + nslow1 = 0 + nslow2 = 0 +! +! Beginning of the outer loop. +! + do + + jeval = .true. +! +! Calculate the jacobian matrix. +! + iflag = 2 + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + njev = njev + 1 + + if ( iflag < 0 ) then + info = iflag + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if +! +! Compute the QR factorization of the jacobian. +! + pivot = .false. + call qrfac ( n, n, fjac, ldfjac, pivot, iwa, 1, wa1, wa2 ) +! +! On the first iteration, if MODE is 1, scale according +! to the norms of the columns of the initial jacobian. +! + if ( iter == 1 ) then + + if ( mode /= 2 ) then + diag(1:n) = wa2(1:n) + do j = 1, n + if ( wa2(j) == 0.0D+00 ) then + diag(j) = 1.0D+00 + end if + end do + end if +! +! On the first iteration, calculate the norm of the scaled X +! and initialize the step bound DELTA. +! + wa3(1:n) = diag(1:n) * x(1:n) + xnorm = enorm ( n, wa3 ) + delta = factor * xnorm + if ( delta == 0.0D+00 ) then + delta = factor + end if + + end if +! +! Form Q'*FVEC and store in QTF. +! + qtf(1:n) = fvec(1:n) + + do j = 1, n + if ( fjac(j,j) /= 0.0D+00 ) then + sum2 = 0.0D+00 + do i = j, n + sum2 = sum2 + fjac(i,j) * qtf(i) + end do + temp = - sum2 / fjac(j,j) + do i = j, n + qtf(i) = qtf(i) + fjac(i,j) * temp + end do + end if + end do +! +! Copy the triangular factor of the QR factorization into R. +! + sing = .false. + + do j = 1, n + l = j + do i = 1, j - 1 + r(l) = fjac(i,j) + l = l + n - i + end do + r(l) = wa1(j) + if ( wa1(j) == 0.0D+00 ) then + sing = .true. + end if + end do +! +! Accumulate the orthogonal factor in FJAC. +! + call qform ( n, n, fjac, ldfjac ) +! +! Rescale if necessary. +! + if ( mode /= 2 ) then + do j = 1, n + diag(j) = max ( diag(j), wa2(j) ) + end do + end if +! +! Beginning of the inner loop. +! + do +! +! If requested, call FCN to enable printing of iterates. +! + if ( 0 < nprint ) then + + iflag = 0 + if ( mod ( iter - 1, nprint ) == 0 ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + + if ( iflag < 0 ) then + info = iflag + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + + end if +! +! Determine the direction P. +! + call dogleg ( n, r, lr, diag, qtf, delta, wa1 ) +! +! Store the direction P and X + P. +! Calculate the norm of P. +! + wa1(1:n) = - wa1(1:n) + wa2(1:n) = x(1:n) + wa1(1:n) + wa3(1:n) = diag(1:n) * wa1(1:n) + pnorm = enorm ( n, wa3 ) +! +! On the first iteration, adjust the initial step bound. +! + if ( iter == 1 ) then + delta = min ( delta, pnorm ) + end if +! +! Evaluate the function at X + P and calculate its norm. +! + iflag = 1 + call fcn ( n, wa2, wa4, fjac, ldfjac, iflag ) + nfev = nfev + 1 + + if ( iflag < 0 ) then + info = iflag + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if + + fnorm1 = enorm ( n, wa4 ) +! +! Compute the scaled actual reduction. +! + actred = -1.0D+00 + if ( fnorm1 < fnorm ) then + actred = 1.0D+00 - ( fnorm1 / fnorm ) ** 2 + end if +! +! Compute the scaled predicted reduction. +! + l = 1 + do i = 1, n + sum2 = 0.0D+00 + do j = i, n + sum2 = sum2 + r(l) * wa1(j) + l = l + 1 + end do + wa3(i) = qtf(i) + sum2 + end do + + temp = enorm ( n, wa3 ) + prered = 0.0D+00 + if ( temp < fnorm ) then + prered = 1.0D+00 - ( temp / fnorm ) ** 2 + end if +! +! Compute the ratio of the actual to the predicted reduction. +! + if ( 0.0D+00 < prered ) then + ratio = actred / prered + else + ratio = 0.0D+00 + end if +! +! Update the step bound. +! + if ( ratio < 0.1D+00 ) then + + ncsuc = 0 + ncfail = ncfail + 1 + delta = 0.5D+00 * delta + + else + + ncfail = 0 + ncsuc = ncsuc + 1 + + if ( 0.5D+00 <= ratio .or. 1 < ncsuc ) then + delta = max ( delta, pnorm / 0.5D+00 ) + end if + + if ( abs ( ratio - 1.0D+00 ) <= 0.1D+00 ) then + delta = pnorm / 0.5D+00 + end if + + end if +! +! Test for successful iteration. +! + +! +! Successful iteration. +! Update X, FVEC, and their norms. +! + if ( 0.0001D+00 <= ratio ) then + x(1:n) = wa2(1:n) + wa2(1:n) = diag(1:n) * x(1:n) + fvec(1:n) = wa4(1:n) + xnorm = enorm ( n, wa2 ) + fnorm = fnorm1 + iter = iter + 1 + end if +! +! Determine the progress of the iteration. +! + nslow1 = nslow1 + 1 + if ( 0.001D+00 <= actred ) then + nslow1 = 0 + end if + + if ( jeval ) then + nslow2 = nslow2 + 1 + end if + + if ( 0.1D+00 <= actred ) then + nslow2 = 0 + end if +! +! Test for convergence. +! + if ( delta <= xtol * xnorm .or. fnorm == 0.0D+00 ) then + info = 1 + end if + + if ( info /= 0 ) then + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if +! +! Tests for termination and stringent tolerances. +! + if ( maxfev <= nfev ) then + info = 2 + end if + + if ( 0.1D+00 * max ( 0.1D+00 * delta, pnorm ) <= epsmch * xnorm ) then + info = 3 + end if + + if ( nslow2 == 5 ) then + info = 4 + end if + + if ( nslow1 == 10 ) then + info = 5 + end if + + if ( info /= 0 ) then + iflag = 0 + if ( 0 < nprint ) then + call fcn ( n, x, fvec, fjac, ldfjac, iflag ) + end if + return + end if +! +! Criterion for recalculating jacobian. +! + if ( ncfail == 2 ) then + exit + end if +! +! Calculate the rank one modification to the jacobian +! and update QTF if necessary. +! + do j = 1, n + sum2 = dot_product ( wa4(1:n), fjac(1:n,j) ) + wa2(j) = ( sum2 - wa3(j) ) / pnorm + wa1(j) = diag(j) * ( ( diag(j) * wa1(j) ) / pnorm ) + if ( 0.0001D+00 <= ratio ) then + qtf(j) = sum2 + end if + end do +! +! Compute the QR factorization of the updated jacobian. +! + call r1updt ( n, n, r, lr, wa1, wa2, wa3, sing ) + call r1mpyq ( n, n, fjac, ldfjac, wa2, wa3 ) + call r1mpyq ( 1, n, qtf, 1, wa2, wa3 ) +! +! End of the inner loop. +! + jeval = .false. + + end do +! +! End of the outer loop. +! + end do + +end +subroutine hybrj1 ( fcn, n, x, fvec, fjac, ldfjac, tol, info ) + +!*****************************************************************************80 +! +!! HYBRJ1 seeks a zero of N equations in N variables by Powell's method. +! +! Discussion: +! +! HYBRJ1 finds a zero of a system of N nonlinear functions in N variables +! by a modification of the Powell hybrid method. This is done by using the +! more general nonlinear equation solver HYBRJ. The user +! must provide a subroutine which calculates the functions +! and the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the jacobian. FCN should have the form: +! subroutine fcn ( n, x, fvec, fjac, ldfjac, iflag ) +! integer ( kind = 4 ) ldfjac +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjac(ldfjac,n) +! real ( kind = 8 ) fvec(n) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If IFLAG = 2 on input, FCN should calculate the jacobian at X and +! return this matrix in FJAC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) N, the number of functions and variables. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(N), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an N by N array which contains +! the orthogonal matrix Q produced by the QR factorization of the final +! approximate jacobian. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least N. +! +! Input, real ( kind = 8 ) TOL. Termination occurs when the algorithm +! estimates that the relative error between X and the solution is at most +! TOL. TOL should be nonnegative. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, algorithm estimates that the relative error between X and the +! solution is at most TOL. +! 2, number of calls to FCN with IFLAG = 1 has reached 100*(N+1). +! 3, TOL is too small. No further improvement in the approximate +! solution X is possible. +! 4, iteration is not making good progress. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) n + + real ( kind = 8 ) diag(n) + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fvec(n) + integer ( kind = 4 ) info + integer ( kind = 4 ) j + integer ( kind = 4 ) lr + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) r((n*(n+1))/2) + real ( kind = 8 ) tol + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + info = 0 + + if ( n <= 0 ) then + return + else if ( ldfjac < n ) then + return + else if ( tol < 0.0D+00 ) then + return + end if + + maxfev = 100 * ( n + 1 ) + xtol = tol + mode = 2 + diag(1:n) = 1.0D+00 + factor = 100.0D+00 + nprint = 0 + lr = ( n * ( n + 1 ) ) / 2 + + call hybrj ( fcn, n, x, fvec, fjac, ldfjac, xtol, maxfev, diag, mode, & + factor, nprint, info, nfev, njev, r, lr, qtf ) + + if ( info == 5 ) then + info = 4 + end if + + return +end +subroutine lmder ( fcn, m, n, x, fvec, fjac, ldfjac, ftol, xtol, gtol, maxfev, & + diag, mode, factor, nprint, info, nfev, njev, ipvt, qtf ) + +!*****************************************************************************80 +! +!! LMDER minimizes M functions in N variables by the Levenberg-Marquardt method. +! +! Discussion: +! +! LMDER minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm. +! The user must provide a subroutine which calculates the functions +! and the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the jacobian. FCN should have the form: +! subroutine fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) +! integer ( kind = 4 ) ldfjac +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjac(ldfjac,n) +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If IFLAG = 2 on input, FCN should calculate the jacobian at X and +! return this matrix in FJAC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) M, is the number of functions. +! +! Input, integer ( kind = 4 ) N, is the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an M by N array. The upper +! N by N submatrix of FJAC contains an upper triangular matrix R with +! diagonal elements of nonincreasing magnitude such that +! P' * ( JAC' * JAC ) * P = R' * R, +! where P is a permutation matrix and JAC is the final calculated jacobian. +! Column J of P is column IPVT(J) of the identity matrix. The lower +! trapezoidal part of FJAC contains information generated during +! the computation of R. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least M. +! +! Input, real ( kind = 8 ) FTOL. Termination occurs when both the actual +! and predicted relative reductions in the sum of squares are at most FTOL. +! Therefore, FTOL measures the relative error desired in the sum of +! squares. FTOL should be nonnegative. +! +! Input, real ( kind = 8 ) XTOL. Termination occurs when the relative error +! between two consecutive iterates is at most XTOL. XTOL should be +! nonnegative. +! +! Input, real ( kind = 8 ) GTOL. Termination occurs when the cosine of the +! angle between FVEC and any column of the jacobian is at most GTOL in +! absolute value. Therefore, GTOL measures the orthogonality desired +! between the function vector and the columns of the jacobian. GTOL should +! be nonnegative. +! +! Input, integer ( kind = 4 ) MAXFEV. Termination occurs when the number of +! calls to FCN with IFLAG = 1 is at least MAXFEV by the end of an iteration. +! +! Input/output, real ( kind = 8 ) DIAG(N). If MODE = 1, then DIAG is set +! internally. If MODE = 2, then DIAG must contain positive entries that +! serve as multiplicative scale factors for the variables. +! +! Input, integer ( kind = 4 ) MODE, scaling option. +! 1, variables will be scaled internally. +! 2, scaling is specified by the input DIAG vector. +! +! Input, real ( kind = 8 ) FACTOR, determines the initial step bound. This +! bound is set to the product of FACTOR and the euclidean norm of DIAG*X if +! nonzero, or else to FACTOR itself. In most cases, FACTOR should lie +! in the interval (0.1, 100) with 100 the recommended value. +! +! Input, integer ( kind = 4 ) NPRINT, enables controlled printing of iterates +! if it is positive. In this case, FCN is called with IFLAG = 0 at the +! beginning of the first iteration and every NPRINT iterations thereafter +! and immediately prior to return, with X and FVEC available +! for printing. If NPRINT is not positive, no special calls +! of FCN with IFLAG = 0 are made. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, both actual and predicted relative reductions in the sum of +! squares are at most FTOL. +! 2, relative error between two consecutive iterates is at most XTOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, the cosine of the angle between FVEC and any column of the jacobian +! is at most GTOL in absolute value. +! 5, number of calls to FCN with IFLAG = 1 has reached MAXFEV. +! 6, FTOL is too small. No further reduction in the sum of squares +! is possible. +! 7, XTOL is too small. No further improvement in the approximate +! solution X is possible. +! 8, GTOL is too small. FVEC is orthogonal to the columns of the +! jacobian to machine precision. +! +! Output, integer ( kind = 4 ) NFEV, the number of calls to FCN with +! IFLAG = 1. +! +! Output, integer ( kind = 4 ) NJEV, the number of calls to FCN with +! IFLAG = 2. +! +! Output, integer ( kind = 4 ) IPVT(N), defines a permutation matrix P +! such that JAC*P = Q*R, where JAC is the final calculated jacobian, Q is +! orthogonal (not stored), and R is upper triangular with diagonal +! elements of nonincreasing magnitude. Column J of P is column +! IPVT(J) of the identity matrix. +! +! Output, real ( kind = 8 ) QTF(N), contains the first N elements of Q'*FVEC. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) actred + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) dirder + real ( kind = 8 ) enorm + real ( kind = 8 ) epsmch + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fnorm + real ( kind = 8 ) fnorm1 + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gnorm + real ( kind = 8 ) gtol + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) iter + integer ( kind = 4 ) j + integer ( kind = 4 ) l + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + real ( kind = 8 ) par + logical pivot + real ( kind = 8 ) pnorm + real ( kind = 8 ) prered + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) ratio + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) temp1 + real ( kind = 8 ) temp2 + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) wa3(n) + real ( kind = 8 ) wa4(m) + real ( kind = 8 ) xnorm + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + epsmch = epsilon ( epsmch ) + + info = 0 + iflag = 0 + nfev = 0 + njev = 0 +! +! Check the input parameters for errors. +! + if ( n <= 0 ) then + go to 300 + end if + + if ( m < n ) then + go to 300 + end if + + if ( ldfjac < m & + .or. ftol < 0.0D+00 .or. xtol < 0.0D+00 .or. gtol < 0.0D+00 & + .or. maxfev <= 0 .or. factor <= 0.0D+00 ) then + go to 300 + end if + + if ( mode == 2 ) then + do j = 1, n + if ( diag(j) <= 0.0D+00 ) then + go to 300 + end if + end do + end if +! +! Evaluate the function at the starting point and calculate its norm. +! + iflag = 1 + call fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) + nfev = 1 + if ( iflag < 0 ) then + go to 300 + end if + + fnorm = enorm ( m, fvec ) +! +! Initialize Levenberg-Marquardt parameter and iteration counter. +! + par = 0.0D+00 + iter = 1 +! +! Beginning of the outer loop. +! +30 continue +! +! Calculate the jacobian matrix. +! + iflag = 2 + call fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) + + njev = njev + 1 + + if ( iflag < 0 ) then + go to 300 + end if +! +! If requested, call FCN to enable printing of iterates. +! + if ( 0 < nprint ) then + iflag = 0 + if ( mod ( iter - 1, nprint ) == 0 ) then + call fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) + end if + if ( iflag < 0 ) then + go to 300 + end if + end if +! +! Compute the QR factorization of the jacobian. +! + pivot = .true. + call qrfac ( m, n, fjac, ldfjac, pivot, ipvt, n, wa1, wa2 ) +! +! On the first iteration and if mode is 1, scale according +! to the norms of the columns of the initial jacobian. +! + if ( iter == 1 ) then + + if ( mode /= 2 ) then + diag(1:n) = wa2(1:n) + do j = 1, n + if ( wa2(j) == 0.0D+00 ) then + diag(j) = 1.0D+00 + end if + end do + end if +! +! On the first iteration, calculate the norm of the scaled X +! and initialize the step bound DELTA. +! + wa3(1:n) = diag(1:n) * x(1:n) + + xnorm = enorm ( n, wa3 ) + delta = factor * xnorm + if ( delta == 0.0D+00 ) then + delta = factor + end if + + end if +! +! Form Q'*FVEC and store the first N components in QTF. +! + wa4(1:m) = fvec(1:m) + + do j = 1, n + + if ( fjac(j,j) /= 0.0D+00 ) then + sum2 = dot_product ( wa4(j:m), fjac(j:m,j) ) + temp = - sum2 / fjac(j,j) + wa4(j:m) = wa4(j:m) + fjac(j:m,j) * temp + end if + + fjac(j,j) = wa1(j) + qtf(j) = wa4(j) + + end do +! +! Compute the norm of the scaled gradient. +! + gnorm = 0.0D+00 + + if ( fnorm /= 0.0D+00 ) then + + do j = 1, n + l = ipvt(j) + if ( wa2(l) /= 0.0D+00 ) then + sum2 = dot_product ( qtf(1:j), fjac(1:j,j) ) / fnorm + gnorm = max ( gnorm, abs ( sum2 / wa2(l) ) ) + end if + end do + + end if +! +! Test for convergence of the gradient norm. +! + if ( gnorm <= gtol ) then + info = 4 + go to 300 + end if +! +! Rescale if necessary. +! + if ( mode /= 2 ) then + do j = 1, n + diag(j) = max ( diag(j), wa2(j) ) + end do + end if +! +! Beginning of the inner loop. +! +200 continue +! +! Determine the Levenberg-Marquardt parameter. +! + call lmpar ( n, fjac, ldfjac, ipvt, diag, qtf, delta, par, wa1, wa2 ) +! +! Store the direction p and x + p. calculate the norm of p. +! + wa1(1:n) = - wa1(1:n) + wa2(1:n) = x(1:n) + wa1(1:n) + wa3(1:n) = diag(1:n) * wa1(1:n) + + pnorm = enorm ( n, wa3 ) +! +! On the first iteration, adjust the initial step bound. +! + if ( iter == 1 ) then + delta = min ( delta, pnorm ) + end if +! +! Evaluate the function at x + p and calculate its norm. +! + iflag = 1 + call fcn ( m, n, wa2, wa4, fjac, ldfjac, iflag ) + + nfev = nfev + 1 + + if ( iflag < 0 ) then + go to 300 + end if + + fnorm1 = enorm ( m, wa4 ) +! +! Compute the scaled actual reduction. +! + actred = -1.0D+00 + if ( 0.1D+00 * fnorm1 < fnorm ) then + actred = 1.0D+00 - ( fnorm1 / fnorm ) ** 2 + end if +! +! Compute the scaled predicted reduction and +! the scaled directional derivative. +! + do j = 1, n + wa3(j) = 0.0D+00 + l = ipvt(j) + temp = wa1(l) + wa3(1:j) = wa3(1:j) + fjac(1:j,j) * temp + end do + + temp1 = enorm ( n, wa3 ) / fnorm + temp2 = ( sqrt ( par ) * pnorm ) / fnorm + prered = temp1 ** 2 + temp2 ** 2 / 0.5D+00 + dirder = - ( temp1 ** 2 + temp2 ** 2 ) +! +! Compute the ratio of the actual to the predicted reduction. +! + if ( prered /= 0.0D+00 ) then + ratio = actred / prered + else + ratio = 0.0D+00 + end if +! +! Update the step bound. +! + if ( ratio <= 0.25D+00 ) then + + if ( 0.0D+00 <= actred ) then + temp = 0.5D+00 + end if + + if ( actred < 0.0D+00 ) then + temp = 0.5D+00 * dirder / ( dirder + 0.5D+00 * actred ) + end if + + if ( 0.1D+00 * fnorm1 >= fnorm .or. temp < 0.1D+00 ) then + temp = 0.1D+00 + end if + + delta = temp * min ( delta, pnorm / 0.1D+00 ) + par = par / temp + + else + + if ( par == 0.0D+00 .or. ratio >= 0.75D+00 ) then + delta = 2.0D+00 * pnorm + par = 0.5D+00 * par + end if + + end if +! +! Successful iteration. +! +! Update X, FVEC, and their norms. +! + if ( 0.0001D+00 <= ratio ) then + x(1:n) = wa2(1:n) + wa2(1:n) = diag(1:n) * x(1:n) + fvec(1:m) = wa4(1:m) + xnorm = enorm ( n, wa2 ) + fnorm = fnorm1 + iter = iter + 1 + end if +! +! Tests for convergence. +! + if ( abs ( actred) <= ftol .and. & + prered <= ftol .and. & + 0.5D+00 * ratio <= 1.0D+00 ) then + info = 1 + end if + + if ( delta <= xtol * xnorm ) then + info = 2 + end if + + if ( abs ( actred) <= ftol .and. prered <= ftol & + .and. 0.5D+00 * ratio <= 1.0D+00 .and. info == 2 ) then + info = 3 + end if + + if ( info /= 0 ) then + go to 300 + end if +! +! Tests for termination and stringent tolerances. +! + if ( nfev >= maxfev ) then + info = 5 + end if + + if ( abs ( actred ) <= epsmch .and. prered <= epsmch & + .and. 0.5D+00 * ratio <= 1.0D+00 ) then + info = 6 + end if + + if ( delta <= epsmch * xnorm ) then + info = 7 + end if + + if ( gnorm <= epsmch ) then + info = 8 + end if + + if ( info /= 0 ) then + go to 300 + end if +! +! End of the inner loop. repeat if iteration unsuccessful. +! + if ( ratio < 0.0001D+00 ) then + go to 200 + end if +! +! End of the outer loop. +! + go to 30 + + 300 continue +! +! Termination, either normal or user imposed. +! + if ( iflag < 0 ) then + info = iflag + end if + + iflag = 0 + + if ( 0 < nprint ) then + call fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) + end if + + return +end +subroutine lmder1 ( fcn, m, n, x, fvec, fjac, ldfjac, tol, info ) + +!*****************************************************************************80 +! +!! LMDER1 minimizes M functions in N variables by Levenberg-Marquardt method. +! +! Discussion: +! +! LMDER1 minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm. +! This is done by using the more general least-squares solver LMDER. +! The user must provide a subroutine which calculates the functions +! and the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the jacobian. FCN should have the form: +! subroutine fcn ( m, n, x, fvec, fjac, ldfjac, iflag ) +! integer ( kind = 4 ) ldfjac +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjac(ldfjac,n) +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If IFLAG = 2 on input, FCN should calculate the jacobian at X and +! return this matrix in FJAC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) M, the number of functions. +! +! Input, integer ( kind = 4 ) N, is the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an M by N array. The upper +! N by N submatrix contains an upper triangular matrix R with +! diagonal elements of nonincreasing magnitude such that +! P' * ( JAC' * JAC ) * P = R' * R, +! where P is a permutation matrix and JAC is the final calculated +! jacobian. Column J of P is column IPVT(J) of the identity matrix. +! The lower trapezoidal part of FJAC contains information generated during +! the computation of R. +! +! Input, integer ( kind = 4 ) LDFJAC, is the leading dimension of FJAC, +! which must be no less than M. +! +! Input, real ( kind = 8 ) TOL. Termination occurs when the algorithm +! estimates either that the relative error in the sum of squares is at +! most TOL or that the relative error between X and the solution is at +! most TOL. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, algorithm estimates that the relative error in the sum of squares +! is at most TOL. +! 2, algorithm estimates that the relative error between X and the +! solution is at most TOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, FVEC is orthogonal to the columns of the jacobian to machine precision. +! 5, number of calls to FCN with IFLAG = 1 has reached 100*(N+1). +! 6, TOL is too small. No further reduction in the sum of squares is +! possible. +! 7, TOL is too small. No further improvement in the approximate +! solution X is possible. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) diag(n) + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gtol + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) tol + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + info = 0 + + if ( n <= 0 ) then + return + else if ( m < n ) then + return + else if ( ldfjac < m ) then + return + else if ( tol < 0.0D+00 ) then + return + end if + + factor = 100.0D+00 + maxfev = 100 * ( n + 1 ) + ftol = tol + xtol = tol + gtol = 0.0D+00 + mode = 1 + nprint = 0 + + call lmder ( fcn, m, n, x, fvec, fjac, ldfjac, ftol, xtol, gtol, maxfev, & + diag, mode, factor, nprint, info, nfev, njev, ipvt, qtf ) + + if ( info == 8 ) then + info = 4 + end if + + return +end +subroutine lmdif ( fcn, m, n, x, fvec, ftol, xtol, gtol, maxfev, epsfcn, & + diag, mode, factor, nprint, info, nfev, fjac, ldfjac, ipvt, qtf ) + +!*****************************************************************************80 +! +!! LMDIF minimizes M functions in N variables by the Levenberg-Marquardt method. +! +! Discussion: +! +! LMDIF minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm. +! The user must provide a subroutine which calculates the functions. +! The jacobian is then calculated by a forward-difference approximation. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( m, n, x, fvec, iflag ) +! integer ( kind = 4 ) m +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) M, the number of functions. +! +! Input, integer ( kind = 4 ) N, the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Input, real ( kind = 8 ) FTOL. Termination occurs when both the actual +! and predicted relative reductions in the sum of squares are at most FTOL. +! Therefore, FTOL measures the relative error desired in the sum of +! squares. FTOL should be nonnegative. +! +! Input, real ( kind = 8 ) XTOL. Termination occurs when the relative error +! between two consecutive iterates is at most XTOL. Therefore, XTOL +! measures the relative error desired in the approximate solution. XTOL +! should be nonnegative. +! +! Input, real ( kind = 8 ) GTOL. termination occurs when the cosine of the +! angle between FVEC and any column of the jacobian is at most GTOL in +! absolute value. Therefore, GTOL measures the orthogonality desired +! between the function vector and the columns of the jacobian. GTOL should +! be nonnegative. +! +! Input, integer ( kind = 4 ) MAXFEV. Termination occurs when the number of +! calls to FCN is at least MAXFEV by the end of an iteration. +! +! Input, real ( kind = 8 ) EPSFCN, is used in determining a suitable step +! length for the forward-difference approximation. This approximation +! assumes that the relative errors in the functions are of the order of +! EPSFCN. If EPSFCN is less than the machine precision, it is assumed that +! the relative errors in the functions are of the order of the machine +! precision. +! +! Input/output, real ( kind = 8 ) DIAG(N). If MODE = 1, then DIAG is set +! internally. If MODE = 2, then DIAG must contain positive entries that +! serve as multiplicative scale factors for the variables. +! +! Input, integer ( kind = 4 ) MODE, scaling option. +! 1, variables will be scaled internally. +! 2, scaling is specified by the input DIAG vector. +! +! Input, real ( kind = 8 ) FACTOR, determines the initial step bound. +! This bound is set to the product of FACTOR and the euclidean norm of +! DIAG*X if nonzero, or else to FACTOR itself. In most cases, FACTOR should +! lie in the interval (0.1, 100) with 100 the recommended value. +! +! Input, integer ( kind = 4 ) NPRINT, enables controlled printing of iterates +! if it is positive. In this case, FCN is called with IFLAG = 0 at the +! beginning of the first iteration and every NPRINT iterations thereafter +! and immediately prior to return, with X and FVEC available +! for printing. If NPRINT is not positive, no special calls +! of FCN with IFLAG = 0 are made. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, both actual and predicted relative reductions in the sum of squares +! are at most FTOL. +! 2, relative error between two consecutive iterates is at most XTOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, the cosine of the angle between FVEC and any column of the jacobian +! is at most GTOL in absolute value. +! 5, number of calls to FCN has reached or exceeded MAXFEV. +! 6, FTOL is too small. No further reduction in the sum of squares +! is possible. +! 7, XTOL is too small. No further improvement in the approximate +! solution X is possible. +! 8, GTOL is too small. FVEC is orthogonal to the columns of the +! jacobian to machine precision. +! +! Output, integer ( kind = 4 ) NFEV, the number of calls to FCN. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an M by N array. The upper +! N by N submatrix of FJAC contains an upper triangular matrix R with +! diagonal elements of nonincreasing magnitude such that +! +! P' * ( JAC' * JAC ) * P = R' * R, +! +! where P is a permutation matrix and JAC is the final calculated jacobian. +! Column J of P is column IPVT(J) of the identity matrix. The lower +! trapezoidal part of FJAC contains information generated during +! the computation of R. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least M. +! +! Output, integer ( kind = 4 ) IPVT(N), defines a permutation matrix P such +! that JAC * P = Q * R, where JAC is the final calculated jacobian, Q is +! orthogonal (not stored), and R is upper triangular with diagonal +! elements of nonincreasing magnitude. Column J of P is column IPVT(J) +! of the identity matrix. +! +! Output, real ( kind = 8 ) QTF(N), the first N elements of Q'*FVEC. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) actred + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) dirder + real ( kind = 8 ) enorm + real ( kind = 8 ) epsfcn + real ( kind = 8 ) epsmch + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fnorm + real ( kind = 8 ) fnorm1 + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gnorm + real ( kind = 8 ) gtol + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) iter + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) j + integer ( kind = 4 ) l + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) nprint + real ( kind = 8 ) par + logical pivot + real ( kind = 8 ) pnorm + real ( kind = 8 ) prered + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) ratio + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) temp1 + real ( kind = 8 ) temp2 + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) wa3(n) + real ( kind = 8 ) wa4(m) + real ( kind = 8 ) x(n) + real ( kind = 8 ) xnorm + real ( kind = 8 ) xtol + + epsmch = epsilon ( epsmch ) + + info = 0 + iflag = 0 + nfev = 0 + + if ( n <= 0 ) then + go to 300 + else if ( m < n ) then + go to 300 + else if ( ldfjac < m ) then + go to 300 + else if ( ftol < 0.0D+00 ) then + go to 300 + else if ( xtol < 0.0D+00 ) then + go to 300 + else if ( gtol < 0.0D+00 ) then + go to 300 + else if ( maxfev <= 0 ) then + go to 300 + else if ( factor <= 0.0D+00 ) then + go to 300 + end if + + if ( mode == 2 ) then + do j = 1, n + if ( diag(j) <= 0.0D+00 ) then + go to 300 + end if + end do + end if +! +! Evaluate the function at the starting point and calculate its norm. +! + iflag = 1 + call fcn ( m, n, x, fvec, iflag ) + nfev = 1 + + if ( iflag < 0 ) then + go to 300 + end if + + fnorm = enorm ( m, fvec ) +! +! Initialize Levenberg-Marquardt parameter and iteration counter. +! + par = 0.0D+00 + iter = 1 +! +! Beginning of the outer loop. +! +30 continue +! +! Calculate the jacobian matrix. +! + iflag = 2 + call fdjac2 ( fcn, m, n, x, fvec, fjac, ldfjac, iflag, epsfcn ) + nfev = nfev + n + + if ( iflag < 0 ) then + go to 300 + end if +! +! If requested, call FCN to enable printing of iterates. +! + if ( 0 < nprint ) then + iflag = 0 + if ( mod ( iter - 1, nprint ) == 0 ) then + call fcn ( m, n, x, fvec, iflag ) + end if + if ( iflag < 0 ) then + go to 300 + end if + end if +! +! Compute the QR factorization of the jacobian. +! + pivot = .true. + call qrfac ( m, n, fjac, ldfjac, pivot, ipvt, n, wa1, wa2 ) +! +! On the first iteration and if MODE is 1, scale according +! to the norms of the columns of the initial jacobian. +! + if ( iter == 1 ) then + + if ( mode /= 2 ) then + diag(1:n) = wa2(1:n) + do j = 1, n + if ( wa2(j) == 0.0D+00 ) then + diag(j) = 1.0D+00 + end if + end do + end if +! +! On the first iteration, calculate the norm of the scaled X +! and initialize the step bound DELTA. +! + wa3(1:n) = diag(1:n) * x(1:n) + xnorm = enorm ( n, wa3 ) + delta = factor * xnorm + if ( delta == 0.0D+00 ) then + delta = factor + end if + end if +! +! Form Q' * FVEC and store the first N components in QTF. +! + wa4(1:m) = fvec(1:m) + + do j = 1, n + + if ( fjac(j,j) /= 0.0D+00 ) then + sum2 = dot_product ( wa4(j:m), fjac(j:m,j) ) + temp = - sum2 / fjac(j,j) + wa4(j:m) = wa4(j:m) + fjac(j:m,j) * temp + end if + + fjac(j,j) = wa1(j) + qtf(j) = wa4(j) + + end do +! +! Compute the norm of the scaled gradient. +! + gnorm = 0.0D+00 + + if ( fnorm /= 0.0D+00 ) then + + do j = 1, n + + l = ipvt(j) + + if ( wa2(l) /= 0.0D+00 ) then + sum2 = 0.0D+00 + do i = 1, j + sum2 = sum2 + fjac(i,j) * ( qtf(i) / fnorm ) + end do + gnorm = max ( gnorm, abs ( sum2 / wa2(l) ) ) + end if + + end do + + end if +! +! Test for convergence of the gradient norm. +! + if ( gnorm <= gtol ) then + info = 4 + go to 300 + end if +! +! Rescale if necessary. +! + if ( mode /= 2 ) then + do j = 1, n + diag(j) = max ( diag(j), wa2(j) ) + end do + end if +! +! Beginning of the inner loop. +! +200 continue +! +! Determine the Levenberg-Marquardt parameter. +! + call lmpar ( n, fjac, ldfjac, ipvt, diag, qtf, delta, par, wa1, wa2 ) +! +! Store the direction P and X + P. +! Calculate the norm of P. +! + wa1(1:n) = -wa1(1:n) + wa2(1:n) = x(1:n) + wa1(1:n) + wa3(1:n) = diag(1:n) * wa1(1:n) + + pnorm = enorm ( n, wa3 ) +! +! On the first iteration, adjust the initial step bound. +! + if ( iter == 1 ) then + delta = min ( delta, pnorm ) + end if +! +! Evaluate the function at X + P and calculate its norm. +! + iflag = 1 + call fcn ( m, n, wa2, wa4, iflag ) + nfev = nfev + 1 + if ( iflag < 0 ) then + go to 300 + end if + fnorm1 = enorm ( m, wa4 ) +! +! Compute the scaled actual reduction. +! + if ( 0.1D+00 * fnorm1 < fnorm ) then + actred = 1.0D+00 - ( fnorm1 / fnorm ) ** 2 + else + actred = -1.0D+00 + end if +! +! Compute the scaled predicted reduction and the scaled directional derivative. +! + do j = 1, n + wa3(j) = 0.0D+00 + l = ipvt(j) + temp = wa1(l) + wa3(1:j) = wa3(1:j) + fjac(1:j,j) * temp + end do + + temp1 = enorm ( n, wa3 ) / fnorm + temp2 = ( sqrt ( par ) * pnorm ) / fnorm + prered = temp1 ** 2 + temp2 ** 2 / 0.5D+00 + dirder = - ( temp1 ** 2 + temp2 ** 2 ) +! +! Compute the ratio of the actual to the predicted reduction. +! + ratio = 0.0D+00 + if ( prered /= 0.0D+00 ) then + ratio = actred / prered + end if +! +! Update the step bound. +! + if ( ratio <= 0.25D+00 ) then + + if ( actred >= 0.0D+00 ) then + temp = 0.5D+00 + endif + + if ( actred < 0.0D+00 ) then + temp = 0.5D+00 * dirder / ( dirder + 0.5D+00 * actred ) + end if + + if ( 0.1D+00 * fnorm1 >= fnorm .or. temp < 0.1D+00 ) then + temp = 0.1D+00 + end if + + delta = temp * min ( delta, pnorm / 0.1D+00 ) + par = par / temp + + else + + if ( par == 0.0D+00 .or. ratio >= 0.75D+00 ) then + delta = 2.0D+00 * pnorm + par = 0.5D+00 * par + end if + + end if +! +! Test for successful iteration. +! + +! +! Successful iteration. update X, FVEC, and their norms. +! + if ( 0.0001D+00 <= ratio ) then + x(1:n) = wa2(1:n) + wa2(1:n) = diag(1:n) * x(1:n) + fvec(1:m) = wa4(1:m) + xnorm = enorm ( n, wa2 ) + fnorm = fnorm1 + iter = iter + 1 + end if +! +! Tests for convergence. +! + if ( abs ( actred) <= ftol .and. prered <= ftol & + .and. 0.5D+00 * ratio <= 1.0D+00 ) then + info = 1 + end if + + if ( delta <= xtol * xnorm ) then + info = 2 + end if + + if ( abs ( actred) <= ftol .and. prered <= ftol & + .and. 0.5D+00 * ratio <= 1.0D+00 .and. info == 2 ) info = 3 + + if ( info /= 0 ) then + go to 300 + end if +! +! Tests for termination and stringent tolerances. +! + if ( maxfev <= nfev ) then + info = 5 + end if + + if ( abs ( actred) <= epsmch .and. prered <= epsmch & + .and. 0.5D+00 * ratio <= 1.0D+00 ) then + info = 6 + end if + + if ( delta <= epsmch * xnorm ) then + info = 7 + end if + + if ( gnorm <= epsmch ) then + info = 8 + end if + + if ( info /= 0 ) then + go to 300 + end if +! +! End of the inner loop. Repeat if iteration unsuccessful. +! + if ( ratio < 0.0001D+00 ) then + go to 200 + end if +! +! End of the outer loop. +! + go to 30 + +300 continue +! +! Termination, either normal or user imposed. +! + if ( iflag < 0 ) then + info = iflag + end if + + iflag = 0 + + if ( 0 < nprint ) then + call fcn ( m, n, x, fvec, iflag ) + end if + + return +end +subroutine lmdif1 ( fcn, m, n, x, fvec, tol, info ) + +!*****************************************************************************80 +! +!! LMDIF1 minimizes M functions in N variables using Levenberg-Marquardt method. +! +! Discussion: +! +! LMDIF1 minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm. +! This is done by using the more general least-squares solver LMDIF. +! The user must provide a subroutine which calculates the functions. +! The jacobian is then calculated by a forward-difference approximation. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions. The routine should have the form: +! subroutine fcn ( m, n, x, fvec, iflag ) +! integer ( kind = 4 ) n +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! To terminate the algorithm, FCN may set IFLAG negative on return. +! +! Input, integer ( kind = 4 ) M, the number of functions. +! +! Input, integer ( kind = 4 ) N, the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Input, real ( kind = 8 ) TOL. Termination occurs when the algorithm +! estimates either that the relative error in the sum of squares is at +! most TOL or that the relative error between X and the solution is at +! most TOL. TOL should be nonnegative. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, algorithm estimates that the relative error in the sum of squares +! is at most TOL. +! 2, algorithm estimates that the relative error between X and the +! solution is at most TOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, FVEC is orthogonal to the columns of the jacobian to machine precision. +! 5, number of calls to FCN has reached or exceeded 200*(N+1). +! 6, TOL is too small. No further reduction in the sum of squares +! is possible. +! 7, TOL is too small. No further improvement in the approximate +! solution X is possible. +! + implicit none + + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) diag(n) + real ( kind = 8 ) epsfcn + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(m,n) + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gtol + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) nprint + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) tol + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + info = 0 + + if ( n <= 0 ) then + return + else if ( m < n ) then + return + else if ( tol < 0.0D+00 ) then + return + end if + + factor = 100.0D+00 + maxfev = 200 * ( n + 1 ) + ftol = tol + xtol = tol + gtol = 0.0D+00 + epsfcn = 0.0D+00 + mode = 1 + nprint = 0 + ldfjac = m + + call lmdif ( fcn, m, n, x, fvec, ftol, xtol, gtol, maxfev, epsfcn, & + diag, mode, factor, nprint, info, nfev, fjac, ldfjac, ipvt, qtf ) + + if ( info == 8 ) then + info = 4 + end if + + return +end +subroutine lmpar ( n, r, ldr, ipvt, diag, qtb, delta, par, x, sdiag ) + +!*****************************************************************************80 +! +!! LMPAR computes a parameter for the Levenberg-Marquardt method. +! +! Discussion: +! +! Given an M by N matrix A, an N by N nonsingular diagonal +! matrix D, an M-vector B, and a positive number DELTA, +! the problem is to determine a value for the parameter +! PAR such that if X solves the system +! +! A*X = B, +! sqrt ( PAR ) * D * X = 0, +! +! in the least squares sense, and DXNORM is the euclidean +! norm of D*X, then either PAR is zero and +! +! ( DXNORM - DELTA ) <= 0.1 * DELTA, +! +! or PAR is positive and +! +! abs ( DXNORM - DELTA) <= 0.1 * DELTA. +! +! This function completes the solution of the problem +! if it is provided with the necessary information from the +! QR factorization, with column pivoting, of A. That is, if +! A*P = Q*R, where P is a permutation matrix, Q has orthogonal +! columns, and R is an upper triangular matrix with diagonal +! elements of nonincreasing magnitude, then LMPAR expects +! the full upper triangle of R, the permutation matrix P, +! and the first N components of Q'*B. On output +! LMPAR also provides an upper triangular matrix S such that +! +! P' * ( A' * A + PAR * D * D ) * P = S'* S. +! +! S is employed within LMPAR and may be of separate interest. +! +! Only a few iterations are generally needed for convergence +! of the algorithm. +! +! If, however, the limit of 10 iterations is reached, then the output +! PAR will contain the best value obtained so far. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 24 January 2014 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, the order of R. +! +! Input/output, real ( kind = 8 ) R(LDR,N),the N by N matrix. The full +! upper triangle must contain the full upper triangle of the matrix R. +! On output the full upper triangle is unaltered, and the strict lower +! triangle contains the strict upper triangle (transposed) of the upper +! triangular matrix S. +! +! Input, integer ( kind = 4 ) LDR, the leading dimension of R. LDR must be +! no less than N. +! +! Input, integer ( kind = 4 ) IPVT(N), defines the permutation matrix P +! such that A*P = Q*R. Column J of P is column IPVT(J) of the +! identity matrix. +! +! Input, real ( kind = 8 ) DIAG(N), the diagonal elements of the matrix D. +! +! Input, real ( kind = 8 ) QTB(N), the first N elements of the vector Q'*B. +! +! Input, real ( kind = 8 ) DELTA, an upper bound on the euclidean norm +! of D*X. DELTA should be positive. +! +! Input/output, real ( kind = 8 ) PAR. On input an initial estimate of the +! Levenberg-Marquardt parameter. On output the final estimate. +! PAR should be nonnegative. +! +! Output, real ( kind = 8 ) X(N), the least squares solution of the system +! A*X = B, sqrt(PAR)*D*X = 0, for the output value of PAR. +! +! Output, real ( kind = 8 ) SDIAG(N), the diagonal elements of the upper +! triangular matrix S. +! + implicit none + + integer ( kind = 4 ) ldr + integer ( kind = 4 ) n + + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) dwarf + real ( kind = 8 ) dxnorm + real ( kind = 8 ) enorm + real ( kind = 8 ) gnorm + real ( kind = 8 ) fp + integer ( kind = 4 ) i + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) iter + integer ( kind = 4 ) j + integer ( kind = 4 ) k + integer ( kind = 4 ) l + integer ( kind = 4 ) nsing + real ( kind = 8 ) par + real ( kind = 8 ) parc + real ( kind = 8 ) parl + real ( kind = 8 ) paru + real ( kind = 8 ) qnorm + real ( kind = 8 ) qtb(n) + real ( kind = 8 ) r(ldr,n) + real ( kind = 8 ) sdiag(n) + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) x(n) +! +! DWARF is the smallest positive magnitude. +! + dwarf = tiny ( dwarf ) +! +! Compute and store in X the Gauss-Newton direction. +! +! If the jacobian is rank-deficient, obtain a least squares solution. +! + nsing = n + + do j = 1, n + wa1(j) = qtb(j) + if ( r(j,j) == 0.0D+00 .and. nsing == n ) then + nsing = j - 1 + end if + if ( nsing < n ) then + wa1(j) = 0.0D+00 + end if + end do + + do k = 1, nsing + j = nsing - k + 1 + wa1(j) = wa1(j) / r(j,j) + temp = wa1(j) + wa1(1:j-1) = wa1(1:j-1) - r(1:j-1,j) * temp + end do + + do j = 1, n + l = ipvt(j) + x(l) = wa1(j) + end do +! +! Initialize the iteration counter. +! Evaluate the function at the origin, and test +! for acceptance of the Gauss-Newton direction. +! + iter = 0 + wa2(1:n) = diag(1:n) * x(1:n) + dxnorm = enorm ( n, wa2 ) + fp = dxnorm - delta + + if ( fp <= 0.1D+00 * delta ) then + if ( iter == 0 ) then + par = 0.0D+00 + end if + return + end if +! +! If the jacobian is not rank deficient, the Newton +! step provides a lower bound, PARL, for the zero of +! the function. +! +! Otherwise set this bound to zero. +! + parl = 0.0D+00 + + if ( n <= nsing ) then + + do j = 1, n + l = ipvt(j) + wa1(j) = diag(l) * ( wa2(l) / dxnorm ) + end do + + do j = 1, n + sum2 = dot_product ( wa1(1:j-1), r(1:j-1,j) ) + wa1(j) = ( wa1(j) - sum2 ) / r(j,j) + end do + + temp = enorm ( n, wa1 ) + parl = ( ( fp / delta ) / temp ) / temp + + end if +! +! Calculate an upper bound, PARU, for the zero of the function. +! + do j = 1, n + sum2 = dot_product ( qtb(1:j), r(1:j,j) ) + l = ipvt(j) + wa1(j) = sum2 / diag(l) + end do + + gnorm = enorm ( n, wa1 ) + paru = gnorm / delta + + if ( paru == 0.0D+00 ) then + paru = dwarf / min ( delta, 0.1D+00 ) + end if +! +! If the input PAR lies outside of the interval (PARL, PARU), +! set PAR to the closer endpoint. +! + par = max ( par, parl ) + par = min ( par, paru ) + if ( par == 0.0D+00 ) then + par = gnorm / dxnorm + end if +! +! Beginning of an iteration. +! + do + + iter = iter + 1 +! +! Evaluate the function at the current value of PAR. +! + if ( par == 0.0D+00 ) then + par = max ( dwarf, 0.001D+00 * paru ) + end if + + wa1(1:n) = sqrt ( par ) * diag(1:n) + + call qrsolv ( n, r, ldr, ipvt, wa1, qtb, x, sdiag ) + + wa2(1:n) = diag(1:n) * x(1:n) + dxnorm = enorm ( n, wa2 ) + temp = fp + fp = dxnorm - delta +! +! If the function is small enough, accept the current value of PAR. +! + if ( abs ( fp ) <= 0.1D+00 * delta ) then + exit + end if +! +! Test for the exceptional cases where PARL +! is zero or the number of iterations has reached 10. +! + if ( parl == 0.0D+00 .and. fp <= temp .and. temp < 0.0D+00 ) then + exit + else if ( iter == 10 ) then + exit + end if +! +! Compute the Newton correction. +! + do j = 1, n + l = ipvt(j) + wa1(j) = diag(l) * ( wa2(l) / dxnorm ) + end do + + do j = 1, n + wa1(j) = wa1(j) / sdiag(j) + temp = wa1(j) + wa1(j+1:n) = wa1(j+1:n) - r(j+1:n,j) * temp + end do + + temp = enorm ( n, wa1 ) + parc = ( ( fp / delta ) / temp ) / temp +! +! Depending on the sign of the function, update PARL or PARU. +! + if ( 0.0D+00 < fp ) then + parl = max ( parl, par ) + else if ( fp < 0.0D+00 ) then + paru = min ( paru, par ) + end if +! +! Compute an improved estimate for PAR. +! + par = max ( parl, par + parc ) +! +! End of an iteration. +! + end do +! +! Termination. +! + if ( iter == 0 ) then + par = 0.0D+00 + end if + + return +end +subroutine lmstr ( fcn, m, n, x, fvec, fjac, ldfjac, ftol, xtol, gtol, maxfev, & + diag, mode, factor, nprint, info, nfev, njev, ipvt, qtf ) + +!*****************************************************************************80 +! +!! LMSTR minimizes M functions in N variables using Levenberg-Marquardt method. +! +! Discussion: +! +! LMSTR minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm +! which uses minimal storage. +! +! The user must provide a subroutine which calculates the functions and +! the rows of the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the rows of the jacobian. +! FCN should have the form: +! subroutine fcn ( m, n, x, fvec, fjrow, iflag ) +! integer ( kind = 4 ) m +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjrow(n) +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If the input value of IFLAG is I > 1, calculate the (I-1)-st row of +! the jacobian at X, and return this vector in FJROW. +! To terminate the algorithm, set the output value of IFLAG negative. +! +! Input, integer ( kind = 4 ) M, the number of functions. +! +! Input, integer ( kind = 4 ) N, the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an N by N array. The upper +! triangle of FJAC contains an upper triangular matrix R such that +! +! P' * ( JAC' * JAC ) * P = R' * R, +! +! where P is a permutation matrix and JAC is the final calculated jacobian. +! Column J of P is column IPVT(J) of the identity matrix. The lower +! triangular part of FJAC contains information generated during +! the computation of R. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least N. +! +! Input, real ( kind = 8 ) FTOL. Termination occurs when both the actual and +! predicted relative reductions in the sum of squares are at most FTOL. +! Therefore, FTOL measures the relative error desired in the sum of +! squares. FTOL should be nonnegative. +! +! Input, real ( kind = 8 ) XTOL. Termination occurs when the relative error +! between two consecutive iterates is at most XTOL. XTOL should be +! nonnegative. +! +! Input, real ( kind = 8 ) GTOL. termination occurs when the cosine of the +! angle between FVEC and any column of the jacobian is at most GTOL in +! absolute value. Therefore, GTOL measures the orthogonality desired +! between the function vector and the columns of the jacobian. GTOL should +! be nonnegative. +! +! Input, integer ( kind = 4 ) MAXFEV. Termination occurs when the number +! of calls to FCN with IFLAG = 1 is at least MAXFEV by the end of +! an iteration. +! +! Input/output, real ( kind = 8 ) DIAG(N). If MODE = 1, then DIAG is set +! internally. If MODE = 2, then DIAG must contain positive entries that +! serve as multiplicative scale factors for the variables. +! +! Input, integer ( kind = 4 ) MODE, scaling option. +! 1, variables will be scaled internally. +! 2, scaling is specified by the input DIAG vector. +! +! Input, real ( kind = 8 ) FACTOR, determines the initial step bound. This +! bound is set to the product of FACTOR and the euclidean norm of DIAG*X if +! nonzero, or else to FACTOR itself. In most cases, FACTOR should lie +! in the interval (0.1, 100) with 100 the recommended value. +! +! Input, integer ( kind = 4 ) NPRINT, enables controlled printing of iterates +! if it is positive. In this case, FCN is called with IFLAG = 0 at the +! beginning of the first iteration and every NPRINT iterations thereafter +! and immediately prior to return, with X and FVEC available +! for printing. If NPRINT is not positive, no special calls +! of FCN with IFLAG = 0 are made. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See the +! description of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, both actual and predicted relative reductions in the sum of squares +! are at most FTOL. +! 2, relative error between two consecutive iterates is at most XTOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, the cosine of the angle between FVEC and any column of the jacobian +! is at most GTOL in absolute value. +! 5, number of calls to FCN with IFLAG = 1 has reached MAXFEV. +! 6, FTOL is too small. No further reduction in the sum of squares is +! possible. +! 7, XTOL is too small. No further improvement in the approximate +! solution X is possible. +! 8, GTOL is too small. FVEC is orthogonal to the columns of the +! jacobian to machine precision. +! +! Output, integer ( kind = 4 ) NFEV, the number of calls to FCN +! with IFLAG = 1. +! +! Output, integer ( kind = 4 ) NJEV, the number of calls to FCN +! with IFLAG = 2. +! +! Output, integer ( kind = 4 ) IPVT(N), defines a permutation matrix P such +! that JAC * P = Q * R, where JAC is the final calculated jacobian, Q is +! orthogonal (not stored), and R is upper triangular. +! Column J of P is column IPVT(J) of the identity matrix. +! +! Output, real ( kind = 8 ) QTF(N), contains the first N elements of Q'*FVEC. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) actred + real ( kind = 8 ) delta + real ( kind = 8 ) diag(n) + real ( kind = 8 ) dirder + real ( kind = 8 ) enorm + real ( kind = 8 ) epsmch + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) fnorm + real ( kind = 8 ) fnorm1 + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gnorm + real ( kind = 8 ) gtol + integer ( kind = 4 ) i + integer ( kind = 4 ) iflag + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) iter + integer ( kind = 4 ) j + integer ( kind = 4 ) l + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + real ( kind = 8 ) par + logical pivot + real ( kind = 8 ) pnorm + real ( kind = 8 ) prered + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) ratio + logical sing + real ( kind = 8 ) sum2 + real ( kind = 8 ) temp + real ( kind = 8 ) temp1 + real ( kind = 8 ) temp2 + real ( kind = 8 ) wa1(n) + real ( kind = 8 ) wa2(n) + real ( kind = 8 ) wa3(n) + real ( kind = 8 ) wa4(m) + real ( kind = 8 ) x(n) + real ( kind = 8 ) xnorm + real ( kind = 8 ) xtol + + epsmch = epsilon ( epsmch ) + + info = 0 + iflag = 0 + nfev = 0 + njev = 0 +! +! Check the input parameters for errors. +! + if ( n <= 0 ) then + go to 340 + else if ( m < n ) then + go to 340 + else if ( ldfjac < n ) then + go to 340 + else if ( ftol < 0.0D+00 ) then + go to 340 + else if ( xtol < 0.0D+00 ) then + go to 340 + else if ( gtol < 0.0D+00 ) then + go to 340 + else if ( maxfev <= 0 ) then + go to 340 + else if ( factor <= 0.0D+00 ) then + go to 340 + end if + + if ( mode == 2 ) then + do j = 1, n + if ( diag(j) <= 0.0D+00 ) then + go to 340 + end if + end do + end if +! +! Evaluate the function at the starting point and calculate its norm. +! + iflag = 1 + call fcn ( m, n, x, fvec, wa3, iflag ) + nfev = 1 + + if ( iflag < 0 ) then + go to 340 + end if + + fnorm = enorm ( m, fvec ) +! +! Initialize Levenberg-Marquardt parameter and iteration counter. +! + par = 0.0D+00 + iter = 1 +! +! Beginning of the outer loop. +! + 30 continue +! +! If requested, call FCN to enable printing of iterates. +! + if ( 0 < nprint ) then + iflag = 0 + if ( mod ( iter-1, nprint ) == 0 ) then + call fcn ( m, n, x, fvec, wa3, iflag ) + end if + if ( iflag < 0 ) then + go to 340 + end if + end if +! +! Compute the QR factorization of the jacobian matrix calculated one row +! at a time, while simultaneously forming Q'* FVEC and storing +! the first N components in QTF. +! + qtf(1:n) = 0.0D+00 + fjac(1:n,1:n) = 0.0D+00 + iflag = 2 + + do i = 1, m + call fcn ( m, n, x, fvec, wa3, iflag ) + if ( iflag < 0 ) then + go to 340 + end if + temp = fvec(i) + call rwupdt ( n, fjac, ldfjac, wa3, qtf, temp, wa1, wa2 ) + iflag = iflag + 1 + end do + + njev = njev + 1 +! +! If the jacobian is rank deficient, call QRFAC to +! reorder its columns and update the components of QTF. +! + sing = .false. + do j = 1, n + if ( fjac(j,j) == 0.0D+00 ) then + sing = .true. + end if + ipvt(j) = j + wa2(j) = enorm ( j, fjac(1,j) ) + end do + + if ( sing ) then + + pivot = .true. + call qrfac ( n, n, fjac, ldfjac, pivot, ipvt, n, wa1, wa2 ) + + do j = 1, n + + if ( fjac(j,j) /= 0.0D+00 ) then + + sum2 = dot_product ( qtf(j:n), fjac(j:n,j) ) + temp = - sum2 / fjac(j,j) + qtf(j:n) = qtf(j:n) + fjac(j:n,j) * temp + + end if + + fjac(j,j) = wa1(j) + + end do + + end if +! +! On the first iteration +! if mode is 1, +! scale according to the norms of the columns of the initial jacobian. +! calculate the norm of the scaled X, +! initialize the step bound delta. +! + if ( iter == 1 ) then + + if ( mode /= 2 ) then + + diag(1:n) = wa2(1:n) + do j = 1, n + if ( wa2(j) == 0.0D+00 ) then + diag(j) = 1.0D+00 + end if + end do + + end if + + wa3(1:n) = diag(1:n) * x(1:n) + xnorm = enorm ( n, wa3 ) + delta = factor * xnorm + if ( delta == 0.0D+00 ) then + delta = factor + end if + + end if +! +! Compute the norm of the scaled gradient. +! + gnorm = 0.0D+00 + + if ( fnorm /= 0.0D+00 ) then + + do j = 1, n + l = ipvt(j) + if ( wa2(l) /= 0.0D+00 ) then + sum2 = dot_product ( qtf(1:j), fjac(1:j,j) ) / fnorm + gnorm = max ( gnorm, abs ( sum2 / wa2(l) ) ) + end if + end do + + end if +! +! Test for convergence of the gradient norm. +! + if ( gnorm <= gtol ) then + info = 4 + go to 340 + end if +! +! Rescale if necessary. +! + if ( mode /= 2 ) then + do j = 1, n + diag(j) = max ( diag(j), wa2(j) ) + end do + end if +! +! Beginning of the inner loop. +! +240 continue +! +! Determine the Levenberg-Marquardt parameter. +! + call lmpar ( n, fjac, ldfjac, ipvt, diag, qtf, delta, par, wa1, wa2 ) +! +! Store the direction P and X + P. +! Calculate the norm of P. +! + wa1(1:n) = -wa1(1:n) + wa2(1:n) = x(1:n) + wa1(1:n) + wa3(1:n) = diag(1:n) * wa1(1:n) + pnorm = enorm ( n, wa3 ) +! +! On the first iteration, adjust the initial step bound. +! + if ( iter == 1 ) then + delta = min ( delta, pnorm ) + end if +! +! Evaluate the function at X + P and calculate its norm. +! + iflag = 1 + call fcn ( m, n, wa2, wa4, wa3, iflag ) + nfev = nfev + 1 + if ( iflag < 0 ) then + go to 340 + end if + fnorm1 = enorm ( m, wa4 ) +! +! Compute the scaled actual reduction. +! + if ( 0.1D+00 * fnorm1 < fnorm ) then + actred = 1.0D+00 - ( fnorm1 / fnorm ) ** 2 + else + actred = -1.0D+00 + end if +! +! Compute the scaled predicted reduction and +! the scaled directional derivative. +! + do j = 1, n + wa3(j) = 0.0D+00 + l = ipvt(j) + temp = wa1(l) + wa3(1:j) = wa3(1:j) + fjac(1:j,j) * temp + end do + + temp1 = enorm ( n, wa3 ) / fnorm + temp2 = ( sqrt(par) * pnorm ) / fnorm + prered = temp1 ** 2 + temp2 ** 2 / 0.5D+00 + dirder = - ( temp1 ** 2 + temp2 ** 2 ) +! +! Compute the ratio of the actual to the predicted reduction. +! + if ( prered /= 0.0D+00 ) then + ratio = actred / prered + else + ratio = 0.0D+00 + end if +! +! Update the step bound. +! + if ( ratio <= 0.25D+00 ) then + + if ( actred >= 0.0D+00 ) then + temp = 0.5D+00 + else + temp = 0.5D+00 * dirder / ( dirder + 0.5D+00 * actred ) + end if + + if ( 0.1D+00 * fnorm1 >= fnorm .or. temp < 0.1D+00 ) then + temp = 0.1D+00 + end if + + delta = temp * min ( delta, pnorm / 0.1D+00 ) + par = par / temp + + else + + if ( par == 0.0D+00 .or. ratio >= 0.75D+00 ) then + delta = pnorm / 0.5D+00 + par = 0.5D+00 * par + end if + + end if +! +! Test for successful iteration. +! + if ( ratio >= 0.0001D+00 ) then + x(1:n) = wa2(1:n) + wa2(1:n) = diag(1:n) * x(1:n) + fvec(1:m) = wa4(1:m) + xnorm = enorm ( n, wa2 ) + fnorm = fnorm1 + iter = iter + 1 + end if +! +! Tests for convergence, termination and stringent tolerances. +! + if ( abs ( actred ) <= ftol .and. prered <= ftol & + .and. 0.5D+00 * ratio <= 1.0D+00 ) then + info = 1 + end if + + if ( delta <= xtol * xnorm ) then + info = 2 + end if + + if ( abs ( actred ) <= ftol .and. prered <= ftol & + .and. 0.5D+00 * ratio <= 1.0D+00 .and. info == 2 ) then + info = 3 + end if + + if ( info /= 0 ) then + go to 340 + end if + + if ( nfev >= maxfev) then + info = 5 + end if + + if ( abs ( actred ) <= epsmch .and. prered <= epsmch & + .and. 0.5D+00 * ratio <= 1.0D+00 ) then + info = 6 + end if + + if ( delta <= epsmch * xnorm ) then + info = 7 + end if + + if ( gnorm <= epsmch ) then + info = 8 + end if + + if ( info /= 0 ) then + go to 340 + end if +! +! End of the inner loop. Repeat if iteration unsuccessful. +! + if ( ratio < 0.0001D+00 ) then + go to 240 + end if +! +! End of the outer loop. +! + go to 30 + + 340 continue +! +! Termination, either normal or user imposed. +! + if ( iflag < 0 ) then + info = iflag + end if + + iflag = 0 + + if ( 0 < nprint ) then + call fcn ( m, n, x, fvec, wa3, iflag ) + end if + + return +end +subroutine lmstr1 ( fcn, m, n, x, fvec, fjac, ldfjac, tol, info ) + +!*****************************************************************************80 +! +!! LMSTR1 minimizes M functions in N variables using Levenberg-Marquardt method. +! +! Discussion: +! +! LMSTR1 minimizes the sum of the squares of M nonlinear functions in +! N variables by a modification of the Levenberg-Marquardt algorithm +! which uses minimal storage. +! +! This is done by using the more general least-squares solver +! LMSTR. The user must provide a subroutine which calculates +! the functions and the rows of the jacobian. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 19 August 2016 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, external FCN, the name of the user-supplied subroutine which +! calculates the functions and the rows of the jacobian. +! FCN should have the form: +! subroutine fcn ( m, n, x, fvec, fjrow, iflag ) +! integer ( kind = 4 ) m +! integer ( kind = 4 ) n +! real ( kind = 8 ) fjrow(n) +! real ( kind = 8 ) fvec(m) +! integer ( kind = 4 ) iflag +! real ( kind = 8 ) x(n) +! If IFLAG = 0 on input, then FCN is only being called to allow the user +! to print out the current iterate. +! If IFLAG = 1 on input, FCN should calculate the functions at X and +! return this vector in FVEC. +! If the input value of IFLAG is I > 1, calculate the (I-1)-st row of +! the jacobian at X, and return this vector in FJROW. +! To terminate the algorithm, set the output value of IFLAG negative. +! +! Input, integer ( kind = 4 ) M, the number of functions. +! +! Input, integer ( kind = 4 ) N, the number of variables. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) X(N). On input, X must contain an initial +! estimate of the solution vector. On output X contains the final +! estimate of the solution vector. +! +! Output, real ( kind = 8 ) FVEC(M), the functions evaluated at the output X. +! +! Output, real ( kind = 8 ) FJAC(LDFJAC,N), an N by N array. The upper +! triangle contains an upper triangular matrix R such that +! +! P' * ( JAC' * JAC ) * P = R' * R, +! +! where P is a permutation matrix and JAC is the final calculated +! jacobian. Column J of P is column IPVT(J) of the identity matrix. +! The lower triangular part of FJAC contains information generated +! during the computation of R. +! +! Input, integer ( kind = 4 ) LDFJAC, the leading dimension of FJAC. +! LDFJAC must be at least N. +! +! Input, real ( kind = 8 ) TOL. Termination occurs when the algorithm +! estimates either that the relative error in the sum of squares is at +! most TOL or that the relative error between X and the solution is at +! most TOL. TOL should be nonnegative. +! +! Output, integer ( kind = 4 ) INFO, error flag. If the user has terminated +! execution, INFO is set to the (negative) value of IFLAG. See description +! of FCN. Otherwise, INFO is set as follows: +! 0, improper input parameters. +! 1, algorithm estimates that the relative error in the sum of squares +! is at most TOL. +! 2, algorithm estimates that the relative error between X and the +! solution is at most TOL. +! 3, conditions for INFO = 1 and INFO = 2 both hold. +! 4, FVEC is orthogonal to the columns of the jacobian to machine precision. +! 5, number of calls to FCN with IFLAG = 1 has reached 100*(N+1). +! 6, TOL is too small. No further reduction in the sum of squares +! is possible. +! 7, TOL is too small. No further improvement in the approximate +! solution X is possible. +! + implicit none + + integer ( kind = 4 ) ldfjac + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) diag(n) + real ( kind = 8 ) factor + external fcn + real ( kind = 8 ) fjac(ldfjac,n) + real ( kind = 8 ) ftol + real ( kind = 8 ) fvec(m) + real ( kind = 8 ) gtol + integer ( kind = 4 ) info + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) maxfev + integer ( kind = 4 ) mode + integer ( kind = 4 ) nfev + integer ( kind = 4 ) njev + integer ( kind = 4 ) nprint + real ( kind = 8 ) qtf(n) + real ( kind = 8 ) tol + real ( kind = 8 ) x(n) + real ( kind = 8 ) xtol + + if ( n <= 0 ) then + info = 0 + return + end if + + if ( m < n ) then + info = 0 + return + end if + + if ( ldfjac < n ) then + info = 0 + return + end if + + if ( tol < 0.0D+00 ) then + info = 0 + return + end if + + fvec(1:n) = 0.0D+00 + fjac(1:ldfjac,1:n) = 0.0D+00 + ftol = tol + xtol = tol + gtol = 0.0D+00 + maxfev = 100 * ( n + 1 ) + diag(1:n) = 0.0D+00 + mode = 1 + factor = 100.0D+00 + nprint = 0 + info = 0 + nfev = 0 + njev = 0 + ipvt(1:n) = 0 + qtf(1:n) = 0.0D+00 + + call lmstr ( fcn, m, n, x, fvec, fjac, ldfjac, ftol, xtol, gtol, maxfev, & + diag, mode, factor, nprint, info, nfev, njev, ipvt, qtf ) + + if ( info == 8 ) then + info = 4 + end if + + return +end +subroutine qform ( m, n, q, ldq ) + +!*****************************************************************************80 +! +!! QFORM produces the explicit QR factorization of a matrix. +! +! Discussion: +! +! The QR factorization of a matrix is usually accumulated in implicit +! form, that is, as a series of orthogonal transformations of the +! original matrix. This routine carries out those transformations, +! to explicitly exhibit the factorization constructed by QRFAC. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, is a positive integer input variable set +! to the number of rows of A and the order of Q. +! +! Input, integer ( kind = 4 ) N, is a positive integer input variable set +! to the number of columns of A. +! +! Input/output, real ( kind = 8 ) Q(LDQ,M). Q is an M by M array. +! On input the full lower trapezoid in the first min(M,N) columns of Q +! contains the factored form. +! On output, Q has been accumulated into a square matrix. +! +! Input, integer ( kind = 4 ) LDQ, is a positive integer input variable +! not less than M which specifies the leading dimension of the array Q. +! + implicit none + + integer ( kind = 4 ) ldq + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + integer ( kind = 4 ) j + integer ( kind = 4 ) k + integer ( kind = 4 ) l + integer ( kind = 4 ) minmn + real ( kind = 8 ) q(ldq,m) + real ( kind = 8 ) temp + real ( kind = 8 ) wa(m) + + minmn = min ( m, n ) + + do j = 2, minmn + q(1:j-1,j) = 0.0D+00 + end do +! +! Initialize remaining columns to those of the identity matrix. +! + q(1:m,n+1:m) = 0.0D+00 + + do j = n + 1, m + q(j,j) = 1.0D+00 + end do +! +! Accumulate Q from its factored form. +! + do l = 1, minmn + + k = minmn - l + 1 + + wa(k:m) = q(k:m,k) + + q(k:m,k) = 0.0D+00 + q(k,k) = 1.0D+00 + + if ( wa(k) /= 0.0D+00 ) then + + do j = k, m + temp = dot_product ( wa(k:m), q(k:m,j) ) / wa(k) + q(k:m,j) = q(k:m,j) - temp * wa(k:m) + end do + + end if + + end do + + return +end +subroutine qrfac ( m, n, a, lda, pivot, ipvt, lipvt, rdiag, acnorm ) + +!*****************************************************************************80 +! +!! QRFAC computes a QR factorization using Householder transformations. +! +! Discussion: +! +! This function uses Householder transformations with optional column +! pivoting to compute a QR factorization of the +! M by N matrix A. That is, QRFAC determines an orthogonal +! matrix Q, a permutation matrix P, and an upper trapezoidal +! matrix R with diagonal elements of nonincreasing magnitude, +! such that A*P = Q*R. +! +! The Householder transformation for column K, K = 1,2,...,min(M,N), +! is of the form +! +! I - ( 1 / U(K) ) * U * U' +! +! where U has zeros in the first K-1 positions. +! +! The form of this transformation and the method of pivoting first +! appeared in the corresponding LINPACK routine. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, the number of rows of A. +! +! Input, integer ( kind = 4 ) N, the number of columns of A. +! +! Input/output, real ( kind = 8 ) A(LDA,N), the M by N array. +! On input, A contains the matrix for which the QR factorization is to +! be computed. On output, the strict upper trapezoidal part of A contains +! the strict upper trapezoidal part of R, and the lower trapezoidal +! part of A contains a factored form of Q, the non-trivial elements of +! the U vectors described above. +! +! Input, integer ( kind = 4 ) LDA, the leading dimension of A, which must +! be no less than M. +! +! Input, logical PIVOT, is TRUE if column pivoting is to be carried out. +! +! Output, integer ( kind = 4 ) IPVT(LIPVT), defines the permutation matrix P +! such that A*P = Q*R. Column J of P is column IPVT(J) of the identity +! matrix. If PIVOT is false, IPVT is not referenced. +! +! Input, integer ( kind = 4 ) LIPVT, the dimension of IPVT, which should +! be N if pivoting is used. +! +! Output, real ( kind = 8 ) RDIAG(N), contains the diagonal elements of R. +! +! Output, real ( kind = 8 ) ACNORM(N), the norms of the corresponding +! columns of the input matrix A. If this information is not needed, +! then ACNORM can coincide with RDIAG. +! + implicit none + + integer ( kind = 4 ) lda + integer ( kind = 4 ) lipvt + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) a(lda,n) + real ( kind = 8 ) acnorm(n) + real ( kind = 8 ) ajnorm + real ( kind = 8 ) enorm + real ( kind = 8 ) epsmch + integer ( kind = 4 ) i + integer ( kind = 4 ) i4_temp + integer ( kind = 4 ) ipvt(lipvt) + integer ( kind = 4 ) j + integer ( kind = 4 ) k + integer ( kind = 4 ) kmax + integer ( kind = 4 ) minmn + logical pivot + real ( kind = 8 ) r8_temp(m) + real ( kind = 8 ) rdiag(n) + real ( kind = 8 ) temp + real ( kind = 8 ) wa(n) + + epsmch = epsilon ( epsmch ) +! +! Compute the initial column norms and initialize several arrays. +! + do j = 1, n + acnorm(j) = enorm ( m, a(1:m,j) ) + end do + + rdiag(1:n) = acnorm(1:n) + wa(1:n) = acnorm(1:n) + + if ( pivot ) then + do j = 1, n + ipvt(j) = j + end do + end if +! +! Reduce A to R with Householder transformations. +! + minmn = min ( m, n ) + + do j = 1, minmn +! +! Bring the column of largest norm into the pivot position. +! + if ( pivot ) then + + kmax = j + + do k = j, n + if ( rdiag(kmax) < rdiag(k) ) then + kmax = k + end if + end do + + if ( kmax /= j ) then + + r8_temp(1:m) = a(1:m,j) + a(1:m,j) = a(1:m,kmax) + a(1:m,kmax) = r8_temp(1:m) + + rdiag(kmax) = rdiag(j) + wa(kmax) = wa(j) + + i4_temp = ipvt(j) + ipvt(j) = ipvt(kmax) + ipvt(kmax) = i4_temp + + end if + + end if +! +! Compute the Householder transformation to reduce the +! J-th column of A to a multiple of the J-th unit vector. +! + ajnorm = enorm ( m-j+1, a(j,j) ) + + if ( ajnorm /= 0.0D+00 ) then + + if ( a(j,j) < 0.0D+00 ) then + ajnorm = -ajnorm + end if + + a(j:m,j) = a(j:m,j) / ajnorm + a(j,j) = a(j,j) + 1.0D+00 +! +! Apply the transformation to the remaining columns and update the norms. +! + do k = j + 1, n + + temp = dot_product ( a(j:m,j), a(j:m,k) ) / a(j,j) + + a(j:m,k) = a(j:m,k) - temp * a(j:m,j) + + if ( pivot .and. rdiag(k) /= 0.0D+00 ) then + + temp = a(j,k) / rdiag(k) + rdiag(k) = rdiag(k) * sqrt ( max ( 0.0D+00, 1.0D+00-temp ** 2 ) ) + + if ( 0.05D+00 * ( rdiag(k) / wa(k) ) ** 2 <= epsmch ) then + rdiag(k) = enorm ( m-j, a(j+1,k) ) + wa(k) = rdiag(k) + end if + + end if + + end do + + end if + + rdiag(j) = - ajnorm + + end do + + return +end +subroutine qrsolv ( n, r, ldr, ipvt, diag, qtb, x, sdiag ) + +!*****************************************************************************80 +! +!! QRSOLV solves a rectangular linear system A*x=b in the least squares sense. +! +! Discussion: +! +! Given an M by N matrix A, an N by N diagonal matrix D, +! and an M-vector B, the problem is to determine an X which +! solves the system +! +! A*X = B +! D*X = 0 +! +! in the least squares sense. +! +! This function completes the solution of the problem +! if it is provided with the necessary information from the +! QR factorization, with column pivoting, of A. That is, if +! Q*P = Q*R, where P is a permutation matrix, Q has orthogonal +! columns, and R is an upper triangular matrix with diagonal +! elements of nonincreasing magnitude, then QRSOLV expects +! the full upper triangle of R, the permutation matrix p, +! and the first N components of Q'*B. +! +! The system is then equivalent to +! +! R*Z = Q'*B +! P'*D*P*Z = 0 +! +! where X = P*Z. If this system does not have full rank, +! then a least squares solution is obtained. On output QRSOLV +! also provides an upper triangular matrix S such that +! +! P'*(A'*A + D*D)*P = S'*S. +! +! S is computed within QRSOLV and may be of separate interest. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, the order of R. +! +! Input/output, real ( kind = 8 ) R(LDR,N), the N by N matrix. +! On input the full upper triangle must contain the full upper triangle +! of the matrix R. On output the full upper triangle is unaltered, and +! the strict lower triangle contains the strict upper triangle +! (transposed) of the upper triangular matrix S. +! +! Input, integer ( kind = 4 ) LDR, the leading dimension of R, which must be +! at least N. +! +! Input, integer ( kind = 4 ) IPVT(N), defines the permutation matrix P such +! that A*P = Q*R. Column J of P is column IPVT(J) of the identity matrix. +! +! Input, real ( kind = 8 ) DIAG(N), the diagonal elements of the matrix D. +! +! Input, real ( kind = 8 ) QTB(N), the first N elements of the vector Q'*B. +! +! Output, real ( kind = 8 ) X(N), the least squares solution. +! +! Output, real ( kind = 8 ) SDIAG(N), the diagonal elements of the upper +! triangular matrix S. +! + implicit none + + integer ( kind = 4 ) ldr + integer ( kind = 4 ) n + + real ( kind = 8 ) c + real ( kind = 8 ) cotan + real ( kind = 8 ) diag(n) + integer ( kind = 4 ) i + integer ( kind = 4 ) ipvt(n) + integer ( kind = 4 ) j + integer ( kind = 4 ) k + integer ( kind = 4 ) l + integer ( kind = 4 ) nsing + real ( kind = 8 ) qtb(n) + real ( kind = 8 ) qtbpj + real ( kind = 8 ) r(ldr,n) + real ( kind = 8 ) s + real ( kind = 8 ) sdiag(n) + real ( kind = 8 ) sum2 + real ( kind = 8 ) t + real ( kind = 8 ) temp + real ( kind = 8 ) wa(n) + real ( kind = 8 ) x(n) +! +! Copy R and Q'*B to preserve input and initialize S. +! +! In particular, save the diagonal elements of R in X. +! + do j = 1, n + r(j:n,j) = r(j,j:n) + x(j) = r(j,j) + end do + + wa(1:n) = qtb(1:n) +! +! Eliminate the diagonal matrix D using a Givens rotation. +! + do j = 1, n +! +! Prepare the row of D to be eliminated, locating the +! diagonal element using P from the QR factorization. +! + l = ipvt(j) + + if ( diag(l) /= 0.0D+00 ) then + + sdiag(j:n) = 0.0D+00 + sdiag(j) = diag(l) +! +! The transformations to eliminate the row of D +! modify only a single element of Q'*B +! beyond the first N, which is initially zero. +! + qtbpj = 0.0D+00 + + do k = j, n +! +! Determine a Givens rotation which eliminates the +! appropriate element in the current row of D. +! + if ( sdiag(k) /= 0.0D+00 ) then + + if ( abs ( r(k,k) ) < abs ( sdiag(k) ) ) then + cotan = r(k,k) / sdiag(k) + s = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * cotan ** 2 ) + c = s * cotan + else + t = sdiag(k) / r(k,k) + c = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * t ** 2 ) + s = c * t + end if +! +! Compute the modified diagonal element of R and +! the modified element of (Q'*B,0). +! + r(k,k) = c * r(k,k) + s * sdiag(k) + temp = c * wa(k) + s * qtbpj + qtbpj = - s * wa(k) + c * qtbpj + wa(k) = temp +! +! Accumulate the tranformation in the row of S. +! + do i = k+1, n + temp = c * r(i,k) + s * sdiag(i) + sdiag(i) = - s * r(i,k) + c * sdiag(i) + r(i,k) = temp + end do + + end if + + end do + + end if +! +! Store the diagonal element of S and restore +! the corresponding diagonal element of R. +! + sdiag(j) = r(j,j) + r(j,j) = x(j) + + end do +! +! Solve the triangular system for Z. If the system is +! singular, then obtain a least squares solution. +! + nsing = n + + do j = 1, n + + if ( sdiag(j) == 0.0D+00 .and. nsing == n ) then + nsing = j - 1 + end if + + if ( nsing < n ) then + wa(j) = 0.0D+00 + end if + + end do + + do j = nsing, 1, -1 + sum2 = dot_product ( wa(j+1:nsing), r(j+1:nsing,j) ) + wa(j) = ( wa(j) - sum2 ) / sdiag(j) + end do +! +! Permute the components of Z back to components of X. +! + do j = 1, n + l = ipvt(j) + x(l) = wa(j) + end do + + return +end +subroutine r1mpyq ( m, n, a, lda, v, w ) + +!*****************************************************************************80 +! +!! R1MPYQ computes A*Q, where Q is the product of Householder transformations. +! +! Discussion: +! +! Given an M by N matrix A, this function computes A*Q where +! Q is the product of 2*(N - 1) transformations +! +! GV(N-1)*...*GV(1)*GW(1)*...*GW(N-1) +! +! and GV(I), GW(I) are Givens rotations in the (I,N) plane which +! eliminate elements in the I-th and N-th planes, respectively. +! Q itself is not given, rather the information to recover the +! GV, GW rotations is supplied. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, the number of rows of A. +! +! Input, integer ( kind = 4 ) N, the number of columns of A. +! +! Input/output, real ( kind = 8 ) A(LDA,N), the M by N array. +! On input, the matrix A to be postmultiplied by the orthogonal matrix Q. +! On output, the value of A*Q. +! +! Input, integer ( kind = 4 ) LDA, the leading dimension of A, which must not +! be less than M. +! +! Input, real ( kind = 8 ) V(N), W(N), contain the information necessary +! to recover the Givens rotations GV and GW. +! + implicit none + + integer ( kind = 4 ) lda + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) a(lda,n) + real ( kind = 8 ) c + integer ( kind = 4 ) i + integer ( kind = 4 ) j + real ( kind = 8 ) s + real ( kind = 8 ) temp + real ( kind = 8 ) v(n) + real ( kind = 8 ) w(n) +! +! Apply the first set of Givens rotations to A. +! + do j = n - 1, 1, -1 + + if ( 1.0D+00 < abs ( v(j) ) ) then + c = 1.0D+00 / v(j) + s = sqrt ( 1.0D+00 - c ** 2 ) + else + s = v(j) + c = sqrt ( 1.0D+00 - s ** 2 ) + end if + + do i = 1, m + temp = c * a(i,j) - s * a(i,n) + a(i,n) = s * a(i,j) + c * a(i,n) + a(i,j) = temp + end do + + end do +! +! Apply the second set of Givens rotations to A. +! + do j = 1, n - 1 + + if ( abs ( w(j) ) > 1.0D+00 ) then + c = 1.0D+00 / w(j) + s = sqrt ( 1.0D+00 - c ** 2 ) + else + s = w(j) + c = sqrt ( 1.0D+00 - s ** 2 ) + end if + + do i = 1, m + temp = c * a(i,j) + s * a(i,n) + a(i,n) = - s * a(i,j) + c * a(i,n) + a(i,j) = temp + end do + + end do + + return +end +subroutine r1updt ( m, n, s, ls, u, v, w, sing ) + +!*****************************************************************************80 +! +!! R1UPDT re-triangularizes a matrix after a rank one update. +! +! Discussion: +! +! Given an M by N lower trapezoidal matrix S, an M-vector U, and an +! N-vector V, the problem is to determine an orthogonal matrix Q such that +! +! (S + U * V' ) * Q +! +! is again lower trapezoidal. +! +! This function determines Q as the product of 2 * (N - 1) +! transformations +! +! GV(N-1)*...*GV(1)*GW(1)*...*GW(N-1) +! +! where GV(I), GW(I) are Givens rotations in the (I,N) plane +! which eliminate elements in the I-th and N-th planes, +! respectively. Q itself is not accumulated, rather the +! information to recover the GV and GW rotations is returned. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, the number of rows of S. +! +! Input, integer ( kind = 4 ) N, the number of columns of S. +! N must not exceed M. +! +! Input/output, real ( kind = 8 ) S(LS). On input, the lower trapezoidal +! matrix S stored by columns. On output S contains the lower trapezoidal +! matrix produced as described above. +! +! Input, integer ( kind = 4 ) LS, the length of the S array. LS must be at +! least (N*(2*M-N+1))/2. +! +! Input, real ( kind = 8 ) U(M), the U vector. +! +! Input/output, real ( kind = 8 ) V(N). On input, V must contain the +! vector V. On output V contains the information necessary to recover the +! Givens rotations GV described above. +! +! Output, real ( kind = 8 ) W(M), contains information necessary to +! recover the Givens rotations GW described above. +! +! Output, logical SING, is set to TRUE if any of the diagonal elements +! of the output S are zero. Otherwise SING is set FALSE. +! + implicit none + + integer ( kind = 4 ) ls + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) cos + real ( kind = 8 ) cotan + real ( kind = 8 ) giant + integer ( kind = 4 ) i + integer ( kind = 4 ) j + integer ( kind = 4 ) jj + integer ( kind = 4 ) l + real ( kind = 8 ) s(ls) + real ( kind = 8 ) sin + logical sing + real ( kind = 8 ) tan + real ( kind = 8 ) tau + real ( kind = 8 ) temp + real ( kind = 8 ) u(m) + real ( kind = 8 ) v(n) + real ( kind = 8 ) w(m) +! +! GIANT is the largest magnitude. +! + giant = huge ( giant ) +! +! Initialize the diagonal element pointer. +! + jj = ( n * ( 2 * m - n + 1 ) ) / 2 - ( m - n ) +! +! Move the nontrivial part of the last column of S into W. +! + l = jj + do i = n, m + w(i) = s(l) + l = l + 1 + end do +! +! Rotate the vector V into a multiple of the N-th unit vector +! in such a way that a spike is introduced into W. +! + do j = n - 1, 1, -1 + + jj = jj - ( m - j + 1 ) + w(j) = 0.0D+00 + + if ( v(j) /= 0.0D+00 ) then +! +! Determine a Givens rotation which eliminates the +! J-th element of V. +! + if ( abs ( v(n) ) < abs ( v(j) ) ) then + cotan = v(n) / v(j) + sin = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * cotan ** 2 ) + cos = sin * cotan + tau = 1.0D+00 + if ( abs ( cos ) * giant > 1.0D+00 ) then + tau = 1.0D+00 / cos + end if + else + tan = v(j) / v(n) + cos = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * tan ** 2 ) + sin = cos * tan + tau = sin + end if +! +! Apply the transformation to V and store the information +! necessary to recover the Givens rotation. +! + v(n) = sin * v(j) + cos * v(n) + v(j) = tau +! +! Apply the transformation to S and extend the spike in W. +! + l = jj + do i = j, m + temp = cos * s(l) - sin * w(i) + w(i) = sin * s(l) + cos * w(i) + s(l) = temp + l = l + 1 + end do + + end if + + end do +! +! Add the spike from the rank 1 update to W. +! + w(1:m) = w(1:m) + v(n) * u(1:m) +! +! Eliminate the spike. +! + sing = .false. + + do j = 1, n-1 + + if ( w(j) /= 0.0D+00 ) then +! +! Determine a Givens rotation which eliminates the +! J-th element of the spike. +! + if ( abs ( s(jj) ) < abs ( w(j) ) ) then + + cotan = s(jj) / w(j) + sin = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * cotan ** 2 ) + cos = sin * cotan + + if ( 1.0D+00 < abs ( cos ) * giant ) then + tau = 1.0D+00 / cos + else + tau = 1.0D+00 + end if + + else + + tan = w(j) / s(jj) + cos = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * tan ** 2 ) + sin = cos * tan + tau = sin + + end if +! +! Apply the transformation to S and reduce the spike in W. +! + l = jj + do i = j, m + temp = cos * s(l) + sin * w(i) + w(i) = - sin * s(l) + cos * w(i) + s(l) = temp + l = l + 1 + end do +! +! Store the information necessary to recover the Givens rotation. +! + w(j) = tau + + end if +! +! Test for zero diagonal elements in the output S. +! + if ( s(jj) == 0.0D+00 ) then + sing = .true. + end if + + jj = jj + ( m - j + 1 ) + + end do +! +! Move W back into the last column of the output S. +! + l = jj + do i = n, m + s(l) = w(i) + l = l + 1 + end do + + if ( s(jj) == 0.0D+00 ) then + sing = .true. + end if + + return +end +function r8_uniform_01 ( seed ) + +!*****************************************************************************80 +! +!! R8_UNIFORM_01 returns a unit pseudorandom R8. +! +! Discussion: +! +! An R8 is a real ( kind = 8 ) value. +! +! For now, the input quantity SEED is an integer variable. +! +! This routine implements the recursion +! +! seed = 16807 * seed mod ( 2^31 - 1 ) +! r8_uniform_01 = seed / ( 2^31 - 1 ) +! +! The integer arithmetic never requires more than 32 bits, +! including a sign bit. +! +! If the initial seed is 12345, then the first three computations are +! +! Input Output R8_UNIFORM_01 +! SEED SEED +! +! 12345 207482415 0.096616 +! 207482415 1790989824 0.833995 +! 1790989824 2035175616 0.947702 +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 05 July 2006 +! +! Author: +! +! John Burkardt +! +! Reference: +! +! Paul Bratley, Bennett Fox, Linus Schrage, +! A Guide to Simulation, +! Springer Verlag, pages 201-202, 1983. +! +! Pierre L'Ecuyer, +! Random Number Generation, +! in Handbook of Simulation, +! edited by Jerry Banks, +! Wiley Interscience, page 95, 1998. +! +! Bennett Fox, +! Algorithm 647: +! Implementation and Relative Efficiency of Quasirandom +! Sequence Generators, +! ACM Transactions on Mathematical Software, +! Volume 12, Number 4, pages 362-376, 1986. +! +! Peter Lewis, Allen Goodman, James Miller +! A Pseudo-Random Number Generator for the System/360, +! IBM Systems Journal, +! Volume 8, pages 136-143, 1969. +! +! Parameters: +! +! Input/output, integer ( kind = 4 ) SEED, the "seed" value, which should +! NOT be 0. On output, SEED has been updated. +! +! Output, real ( kind = 8 ) R8_UNIFORM_01, a new pseudorandom variate, +! strictly between 0 and 1. +! + implicit none + + integer ( kind = 4 ), parameter :: i4_huge = 2147483647 + integer ( kind = 4 ) k + real ( kind = 8 ) r8_uniform_01 + integer ( kind = 4 ) seed + + if ( seed == 0 ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) 'R8_UNIFORM_01 - Fatal error!' + write ( *, '(a)' ) ' Input value of SEED = 0.' + stop 1 + end if + + k = seed / 127773 + + seed = 16807 * ( seed - k * 127773 ) - k * 2836 + + if ( seed < 0 ) then + seed = seed + i4_huge + end if + + r8_uniform_01 = real ( seed, kind = 8 ) * 4.656612875D-10 + + return +end +subroutine r8mat_print ( m, n, a, title ) + +!*****************************************************************************80 +! +!! R8MAT_PRINT prints an R8MAT. +! +! Discussion: +! +! An R8MAT is an MxN array of R8's, stored by (I,J) -> [I+J*M]. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 12 September 2004 +! +! Author: +! +! John Burkardt +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, the number of rows in A. +! +! Input, integer ( kind = 4 ) N, the number of columns in A. +! +! Input, real ( kind = 8 ) A(M,N), the matrix. +! +! Input, character ( len = * ) TITLE, a title. +! + implicit none + + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) a(m,n) + character ( len = * ) title + + call r8mat_print_some ( m, n, a, 1, 1, m, n, title ) + + return +end +subroutine r8mat_print_some ( m, n, a, ilo, jlo, ihi, jhi, title ) + +!*****************************************************************************80 +! +!! R8MAT_PRINT_SOME prints some of an R8MAT. +! +! Discussion: +! +! An R8MAT is an MxN array of R8's, stored by (I,J) -> [I+J*M]. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 10 September 2009 +! +! Author: +! +! John Burkardt +! +! Parameters: +! +! Input, integer ( kind = 4 ) M, N, the number of rows and columns. +! +! Input, real ( kind = 8 ) A(M,N), an M by N matrix to be printed. +! +! Input, integer ( kind = 4 ) ILO, JLO, the first row and column to print. +! +! Input, integer ( kind = 4 ) IHI, JHI, the last row and column to print. +! +! Input, character ( len = * ) TITLE, a title. +! + implicit none + + integer ( kind = 4 ), parameter :: incx = 5 + integer ( kind = 4 ) m + integer ( kind = 4 ) n + + real ( kind = 8 ) a(m,n) + character ( len = 14 ) ctemp(incx) + integer ( kind = 4 ) i + integer ( kind = 4 ) i2hi + integer ( kind = 4 ) i2lo + integer ( kind = 4 ) ihi + integer ( kind = 4 ) ilo + integer ( kind = 4 ) inc + integer ( kind = 4 ) j + integer ( kind = 4 ) j2 + integer ( kind = 4 ) j2hi + integer ( kind = 4 ) j2lo + integer ( kind = 4 ) jhi + integer ( kind = 4 ) jlo + character ( len = * ) title + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) trim ( title ) + + if ( m <= 0 .or. n <= 0 ) then + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) ' (None)' + return + end if + + do j2lo = max ( jlo, 1 ), min ( jhi, n ), incx + + j2hi = j2lo + incx - 1 + j2hi = min ( j2hi, n ) + j2hi = min ( j2hi, jhi ) + + inc = j2hi + 1 - j2lo + + write ( *, '(a)' ) ' ' + + do j = j2lo, j2hi + j2 = j + 1 - j2lo + write ( ctemp(j2), '(i8,6x)' ) j + end do + + write ( *, '('' Col '',5a14)' ) ctemp(1:inc) + write ( *, '(a)' ) ' Row' + write ( *, '(a)' ) ' ' + + i2lo = max ( ilo, 1 ) + i2hi = min ( ihi, m ) + + do i = i2lo, i2hi + + do j2 = 1, inc + + j = j2lo - 1 + j2 + + if ( a(i,j) == real ( int ( a(i,j) ), kind = 8 ) ) then + write ( ctemp(j2), '(f8.0,6x)' ) a(i,j) + else + write ( ctemp(j2), '(g14.6)' ) a(i,j) + end if + + end do + + write ( *, '(i5,a,5a14)' ) i, ':', ( ctemp(j), j = 1, inc ) + + end do + + end do + + return +end +subroutine r8vec_print ( n, a, title ) + +!*****************************************************************************80 +! +!! R8VEC_PRINT prints an R8VEC. +! +! Discussion: +! +! An R8VEC is a vector of R8's. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 22 August 2000 +! +! Author: +! +! John Burkardt +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, the number of components of the vector. +! +! Input, real ( kind = 8 ) A(N), the vector to be printed. +! +! Input, character ( len = * ) TITLE, a title. +! + implicit none + + integer ( kind = 4 ) n + + real ( kind = 8 ) a(n) + integer ( kind = 4 ) i + character ( len = * ) title + + write ( *, '(a)' ) ' ' + write ( *, '(a)' ) trim ( title ) + write ( *, '(a)' ) ' ' + do i = 1, n + write ( *, '(2x,i8,2x,g16.8)' ) i, a(i) + end do + + return +end +subroutine rwupdt ( n, r, ldr, w, b, alpha, c, s ) + +!*****************************************************************************80 +! +!! RWUPDT computes the decomposition of triangular matrix augmented by one row. +! +! Discussion: +! +! Given an N by N upper triangular matrix R, this function +! computes the QR decomposition of the matrix formed when a row +! is added to R. If the row is specified by the vector W, then +! RWUPDT determines an orthogonal matrix Q such that when the +! N+1 by N matrix composed of R augmented by W is premultiplied +! by Q', the resulting matrix is upper trapezoidal. +! The matrix Q' is the product of N transformations +! +! G(N)*G(N-1)* ... *G(1) +! +! where G(I) is a Givens rotation in the (I,N+1) plane which eliminates +! elements in the (N+1)-st plane. RWUPDT also computes the product +! Q'*C where C is the (N+1)-vector (B,ALPHA). +! +! Q itself is not accumulated, rather the information to recover the G +! rotations is supplied. +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 06 April 2010 +! +! Author: +! +! Original FORTRAN77 version by Jorge More, Burton Garbow, Kenneth Hillstrom. +! FORTRAN90 version by John Burkardt. +! +! Reference: +! +! Jorge More, Burton Garbow, Kenneth Hillstrom, +! User Guide for MINPACK-1, +! Technical Report ANL-80-74, +! Argonne National Laboratory, 1980. +! +! Parameters: +! +! Input, integer ( kind = 4 ) N, the order of R. +! +! Input/output, real ( kind = 8 ) R(LDR,N). On input the upper triangular +! part of R must contain the matrix to be updated. On output R contains the +! updated triangular matrix. +! +! Input, integer ( kind = 4 ) LDR, the leading dimension of the array R. +! LDR must not be less than N. +! +! Input, real ( kind = 8 ) W(N), the row vector to be added to R. +! +! Input/output, real ( kind = 8 ) B(N). On input, the first N elements +! of the vector C. On output the first N elements of the vector Q'*C. +! +! Input/output, real ( kind = 8 ) ALPHA. On input, the (N+1)-st element +! of the vector C. On output the (N+1)-st element of the vector Q'*C. +! +! Output, real ( kind = 8 ) C(N), S(N), the cosines and sines of the +! transforming Givens rotations. +! + implicit none + + integer ( kind = 4 ) ldr + integer ( kind = 4 ) n + + real ( kind = 8 ) alpha + real ( kind = 8 ) b(n) + real ( kind = 8 ) c(n) + real ( kind = 8 ) cotan + integer ( kind = 4 ) i + integer ( kind = 4 ) j + real ( kind = 8 ) r(ldr,n) + real ( kind = 8 ) rowj + real ( kind = 8 ) s(n) + real ( kind = 8 ) tan + real ( kind = 8 ) temp + real ( kind = 8 ) w(n) + + do j = 1, n + + rowj = w(j) +! +! Apply the previous transformations to R(I,J), I=1,2,...,J-1, and to W(J). +! + do i = 1, j - 1 + temp = c(i) * r(i,j) + s(i) * rowj + rowj = - s(i) * r(i,j) + c(i) * rowj + r(i,j) = temp + end do +! +! Determine a Givens rotation which eliminates W(J). +! + c(j) = 1.0D+00 + s(j) = 0.0D+00 + + if ( rowj /= 0.0D+00 ) then + + if ( abs ( r(j,j) ) < abs ( rowj ) ) then + cotan = r(j,j) / rowj + s(j) = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * cotan ** 2 ) + c(j) = s(j) * cotan + else + tan = rowj / r(j,j) + c(j) = 0.5D+00 / sqrt ( 0.25D+00 + 0.25D+00 * tan ** 2 ) + s(j) = c(j) * tan + end if +! +! Apply the current transformation to R(J,J), B(J), and ALPHA. +! + r(j,j) = c(j) * r(j,j) + s(j) * rowj + temp = c(j) * b(j) + s(j) * alpha + alpha = - s(j) * b(j) + c(j) * alpha + b(j) = temp + + end if + + end do + + return +end +subroutine timestamp ( ) + +!*****************************************************************************80 +! +!! TIMESTAMP prints the current YMDHMS date as a time stamp. +! +! Example: +! +! 31 May 2001 9:45:54.872 AM +! +! Licensing: +! +! This code may freely be copied, modified, and used for any purpose. +! +! Modified: +! +! 18 May 2013 +! +! Author: +! +! John Burkardt +! +! Parameters: +! +! None +! + implicit none + + character ( len = 8 ) ampm + integer ( kind = 4 ) d + integer ( kind = 4 ) h + integer ( kind = 4 ) m + integer ( kind = 4 ) mm + character ( len = 9 ), parameter, dimension(12) :: month = (/ & + 'January ', 'February ', 'March ', 'April ', & + 'May ', 'June ', 'July ', 'August ', & + 'September', 'October ', 'November ', 'December ' /) + integer ( kind = 4 ) n + integer ( kind = 4 ) s + integer ( kind = 4 ) values(8) + integer ( kind = 4 ) y + + call date_and_time ( values = values ) + + y = values(1) + m = values(2) + d = values(3) + h = values(5) + n = values(6) + s = values(7) + mm = values(8) + + if ( h < 12 ) then + ampm = 'AM' + else if ( h == 12 ) then + if ( n == 0 .and. s == 0 ) then + ampm = 'Noon' + else + ampm = 'PM' + end if + else + h = h - 12 + if ( h < 12 ) then + ampm = 'PM' + else if ( h == 12 ) then + if ( n == 0 .and. s == 0 ) then + ampm = 'Midnight' + else + ampm = 'AM' + end if + end if + end if + + write ( *, '(i2.2,1x,a,1x,i4,2x,i2,a1,i2.2,a1,i2.2,a1,i3.3,1x,a)' ) & + d, trim ( month(m) ), y, h, ':', n, ':', s, '.', mm, trim ( ampm ) + + return +end \ No newline at end of file diff --git a/python/05_tokamak/SympGPR/plotting.py b/python/05_tokamak/SympGPR/plotting.py new file mode 100644 index 0000000..9839439 --- /dev/null +++ b/python/05_tokamak/SympGPR/plotting.py @@ -0,0 +1,63 @@ +""" +Created: 2018-08-08 +Modified: 2019-03-07 +Author: Christopher Albert +""" +from numpy import cos, sin, empty_like, linspace, roll +from matplotlib.pyplot import figure, plot, xlabel, ylabel, subplot, grid, legend + +def plot_orbit(z): + figure() + plot(z[0,:]*cos(z[1,:]), z[0,:]*sin(z[1,:]),',') + xlabel(r'$R-R_0$') + ylabel(r'$Z$') + + +def plot_cost_function(F, zlast, zold, pthold): + """Plot the cost function""" + r = linspace(-0.2,0.2,100) + fres = empty_like(r) + for kr in range(len(r)): + fres[kr] = F([r[kr]], zold[1:], pthold) + + figure() + subplot(2,1,1) + plot(r,fres) + out_conv = F([zlast[0]], zold[1:], pthold) + plot(zlast[0], out_conv, 'rx') + xlabel('r') + ylabel('F') + grid() + +def plot_cost_function_jac(F, zlast, zold, pthold): + """Plot the cost function with Jacobian""" + + r = linspace(-0.2,0.2,100) + fres = empty_like(r) + jac = empty_like(r) + for kr in range(len(r)): + out = F([r[kr]], zold[1:], pthold) + fres[kr] = out[0] + jac[kr] = out[1][0] + + jacnum = (roll(fres,-1)-roll(fres,1))/(2.*(r[1]-r[0])) + jacnum[0] = jacnum[1] + jacnum[-1] = jacnum[-2] + + figure() + subplot(2,1,1) + plot(r,fres) + out_conv = F([zlast[0]],zold[1:],pthold) + plot(zlast[0],out_conv[0],'rx') + xlabel('r') + ylabel('F') + grid() + + subplot(2,1,2) + plot(r,jac) + plot(r,jacnum,'r--') + plot(zlast[0],out_conv[1],'rx') + xlabel('r') + ylabel('dF/dr') + legend(['analytical', 'numerical']) + grid()