Update datadriven (#37)

* Update manifest * Update scenario 1 * Update scenario 2 * Update hudson bay * Update scenario 3
ChrisRackauckas · Jul 3, 2021 · 9fd95d7 · 9fd95d7
1 parent 266e1e4
commit 9fd95d7
Show file tree

Hide file tree

Showing 31 changed files with 660 additions and 812 deletions.
diff --git a/LotkaVolterra/Manifest.toml b/LotkaVolterra/Manifest.toml
diff --git a/LotkaVolterra/hudson_bay.jl b/LotkaVolterra/hudson_bay.jl
@@ -5,7 +5,7 @@ using Pkg; Pkg.activate("."); Pkg.instantiate()
 using OrdinaryDiffEq
 using ModelingToolkit
 using DataDrivenDiffEq
-using LinearAlgebra, DiffEqSensitivity, Optim
+using LinearAlgebra, Optim
 using DiffEqFlux, Flux
 using Plots
 gr()
@@ -16,6 +16,13 @@ using DelimitedFiles
 using Random
 Random.seed!(5443)
 
+#### NOTE
+# Since the recent release of DataDrivenDiffEq v0.6.0 where a complete overhaul of the optimizers took
+# place, SR3 has been used. Right now, STLSQ performs better and has been changed.
+# Additionally, the behaviour of the optimization has changed slightly. This has been adjusted
+# by decreasing the tolerance of the gradient.
+
+
 svname = "HudsonBay"
 ## Data Preprocessing
 # The data has been taken from https://jmahaffy.sdsu.edu/courses/f00/math122/labs/labj/q3v1.htm
@@ -36,11 +43,12 @@ scatter(t, transpose(Xₙ), xlabel = "t", ylabel = "x(t), y(t)")
 plot!(t, transpose(Xₙ), xlabel = "t", ylabel = "x(t), y(t)")
 
 ## Direct Identification via SINDy + Collocation
-# Create a collocation
-dx̂,x̂ = collocate_data(Xₙ,t, GaussianKernel())
+
+# Create the problem using a gaussian kernel for collocation
+full_problem = ContinuousDataDrivenProblem(Xₙ, t, DataDrivenDiffEq.GaussianKernel())
 # Look at the collocation
-plot(t, dx̂')
-# Perform sindy
+plot(full_problem.t, full_problem.X')
+plot(full_problem.t, full_problem.DX')
 
 # Create a Basis
 @variables u[1:2]
@@ -49,36 +57,17 @@ plot(t, dx̂')
 # and sine
 b = [polynomial_basis(u, 5); sin.(u)]
 basis = Basis(b, u)
-# Create an optimizer for the SINDy problem
-opt = STRRidge()#SR3(Float32(1e-2), Float32(1e-2))
+
 # Create the thresholds which should be used in the search process
-λ = Float32.(exp10.(-7:0.1:3))
-# Target function to choose the results from; x = L0 of coefficients and L2-Error of the model
-g(x) = x[1] < 1 ? Inf : norm(x, 2)
-# Test on derivative data
-Ψ = SINDy(x̂, dx̂, basis, λ,  opt, g = g, maxiter = 50000, normalize = true, denoise = true)
-println(Ψ)
-print_equations(Ψ) # Fails
-b2 = Basis((u,p,t)->Ψ(u,ones(length(parameters(Ψ))),t),u, linear_independent = true)
-Ψ = SINDy(x̂, dx̂, b2, λ,  opt, g = g, maxiter = 50000, normalize = true, denoise = true)
-println(Ψ)
-print_equations(Ψ) # Fails
-parameters(Ψ)
-## UDE Approach
-# Subsample the data in y -> initial fitting strategy (batching)
-# We assume we have only 5 measurements in y, evenly distributed
-ty = collect(t[1]:Float32(t[end]/5):t[end])
-# Create datasets for the different measurements
-XS = zeros(Float32, length(ty)-1, floor(Int64, mean(diff(ty))/mean(diff(t)))+1) # All x data
-TS = zeros(Float32, length(ty)-1, floor(Int64, mean(diff(ty))/mean(diff(t)))+1) # Time data
-YS = zeros(Float32, length(ty)-1, 2) # Just two measurements in y
-
-for i in 1:length(ty)-1
-    idxs = ty[i].<= t .<= ty[i+1]
-    XS[i, :] = Xₙ[1, idxs]
-    TS[i, :] = t[idxs]
-    YS[i, :] = [Xₙ[2, t .== ty[i]]'; Xₙ[2, t .== ty[i+1]]]
-end
+λ = Float32.(exp10.(-7:0.1:5))
+# Create an optimizer for the SINDy problem
+opt = STLSQ(λ)
+
+# Best result so far
+full_res = solve(full_problem, basis, opt, maxiter = 10000, progress = true, denoise = true, normalize = true)
+
+println(full_res)
+println(result(full_res))
 
 ## Define the network
 # Gaussian RBF as activation
@@ -96,7 +85,7 @@ p = [rand(Float32,2); initial_params(U)]
 function ude_dynamics!(du,u, p, t)
     û = U(u, p[3:end]) # Network prediction
     # We assume a linear birth rate for the prey
-    du[1] = p[1]u[1] + û[1]
+    du[1] = p[1]*u[1] + û[1]
     # We assume a linear decay rate for the predator
     du[2] = -p[2]*u[2] + û[2]
 end
@@ -114,31 +103,31 @@ function predict(θ, X = Xₙ[:,1], T = t)
                 ))
 end
 
-# Multiple shooting like loss
-function shooting_loss(θ)
-    # Start with a regularization on the network
-    l = convert(eltype(θ), 1e-3)*sum(abs2, θ[3:end]) ./ length(θ[3:end])
-    for i in 1:size(XS,1)
-        X̂ = predict(θ, [XS[i,1], YS[i,1]], TS[i, :])
-        # Full prediction in x
-        l += sum(abs2, XS[i,:] .- X̂[1,:])
-        # Add the boundary condition in y
-        l += abs2(YS[i, 2] .- X̂[2, end])
-    end
 
-    return l
+# Define parameters for Multiple Shooting
+group_size = 5
+continuity_term = 200.0f0
+
+function loss(data, pred)
+	return sum(abs2, data - pred)
+end
+
+function shooting_loss(p)
+    return multiple_shoot(p, Xₙ, t, prob_nn, loss, Vern7(),
+                          group_size; continuity_term)
 end
 
 function loss(θ)
     X̂ = predict(θ)
-    sum(abs2, Xₙ - X̂) + convert(eltype(θ), 1e-3)*sum(abs2, θ[3:end]) ./ length(θ[3:end])
+    sum(abs2, Xₙ - X̂) / size(Xₙ, 2) + convert(eltype(θ), 1e-3)*sum(abs2, θ[3:end]) ./ length(θ[3:end])
 end
 
 # Container to track the losses
 losses = Float32[]
 
 # Callback to show the loss during training
-callback(θ,l) = begin
+callback(θ,args...) = begin
+	l = loss(θ) # Equivalent L2 loss
     push!(losses, l)
     if length(losses)%5==0
         println("Current loss after $(length(losses)) iterations: $(losses[end])")
@@ -153,7 +142,7 @@ end
 res1 = DiffEqFlux.sciml_train(shooting_loss, p, ADAM(0.1f0), cb=callback, maxiters = 100)
 println("Training loss after $(length(losses)) iterations: $(losses[end])")
 # Train with BFGS to achieve partial fit of the data
-res2 = DiffEqFlux.sciml_train(shooting_loss, res1.minimizer, BFGS(initial_stepnorm=0.01f0), cb=callback, maxiters = 200)
+res2 = DiffEqFlux.sciml_train(shooting_loss, res1.minimizer, BFGS(initial_stepnorm=0.01f0), cb=callback, maxiters = 500)
 println("Training loss after $(length(losses)) iterations: $(losses[end])")
 # Full L2-Loss for full prediction
 res3 = DiffEqFlux.sciml_train(loss, res2.minimizer, BFGS(initial_stepnorm=0.01f0), cb=callback, maxiters = 10000)
@@ -185,41 +174,24 @@ plot!(tsample, transpose(Ŷ), color = :red, lw = 2, style = :dash, label = [not
 savefig(pl_reconstruction, joinpath(pwd(), "plots", "$(svname)_missingterm_reconstruction.pdf"))
 pl_missing = plot(pl_trajectory, pl_reconstruction, layout = (2,1))
 savefig(pl_missing, joinpath(pwd(), "plots", "$(svname)_reconstruction.pdf"))
-## Symbolic regression via sparse regression ( SINDy based )
+## Symbolic regression via sparse regression (SINDy based)
+# We reuse the basis and optimizer defined at the beginning
 
-# Create a Basis
-@variables u[1:2]
-
-# Generate the basis functions, multivariate polynomials up to deg 5
-# and sine
-b = [polynomial_basis(u, 5); sin.(u)]
-basis = Basis(b, u)
-
-# Create an optimizer for the SINDy problem
-opt = STRRidge()
-# Create the thresholds which should be used in the search process
-λ = Float32.(exp10.(-7:0.1:3))
-# Target function to choose the results from; x = L0 of coefficients and L2-Error of the model
-g(x) = x[1] < 1 ? Inf : norm(x, 2)
-
-# Test on uode derivative data
-println("SINDy on learned, partial, available data")
-Ψ = SINDy(X̂, Ŷ, basis, λ,  opt, g = g, maxiter = 50000, normalize = true, denoise = true)
-
-@info "Found equations:"
-print_equations(Ψ)
-# Extract the parameter
-p̂ = parameters(Ψ)
-println("First parameter guess : $(p̂)")
+nn_problem = ContinuousDataDrivenProblem(X̂, tsample, DX = Ŷ)
+nn_res = solve(nn_problem, basis, opt, maxiter = 10000, progress = true, normalize = false, denoise = true)
+println(nn_res)
+println(result(nn_res))
 
 # Define the recovered, hyrid model with the rescaled dynamics
 function recovered_dynamics!(du,u, p, t)
-    û = Ψ(u, p[3:end]) # Network prediction
+    û = nn_res(u, p[3:end]) # Network prediction
     du[1] = p[1]*u[1] + û[1]
     du[2] = -p[2]*u[2] + û[2]
 end
 
-p_model = [p_trained[1:2];p̂]
+
+p_model = [p_trained[1:2];parameters(nn_res)]
+
 estimation_prob = ODEProblem(recovered_dynamics!, Xₙ[:, 1], tspan, p_model)
 # Convert for reuse
 sys = modelingtoolkitize(estimation_prob);
@@ -258,7 +230,7 @@ plot(estimate_long.t, transpose(xscale .* estimate_long[:,:]), xlabel = "t", yla
 ## Save the results
 save(joinpath(pwd(),"results","Hudson_Bay_recovery.jld2"),
     "X", Xₙ, "t" , t, "neural_network" , U, "initial_parameters", p, "trained_parameters" , p_trained, # Training
-    "losses", losses, "result", Ψ, "recovered_parameters", p̂, # Recovery
+    "losses", losses, "result", nn_res, "recovered_parameters", parameters(nn_res), # Recovery
     "model", recovered_dynamics!, "model_parameter", p_model, "fitted_parameter", p_fitted,
     "long_estimate", estimate_long) # Estimation
 

diff --git a/LotkaVolterra/plots/HudsonBay_losses.pdf b/LotkaVolterra/plots/HudsonBay_losses.pdf
diff --git a/LotkaVolterra/plots/HudsonBay_missingterm_reconstruction.pdf b/LotkaVolterra/plots/HudsonBay_missingterm_reconstruction.pdf
diff --git a/LotkaVolterra/plots/HudsonBay_reconstruction.pdf b/LotkaVolterra/plots/HudsonBay_reconstruction.pdf
diff --git a/LotkaVolterra/plots/HudsonBay_trajectory_reconstruction.pdf b/LotkaVolterra/plots/HudsonBay_trajectory_reconstruction.pdf
diff --git a/LotkaVolterra/plots/HudsonBayfull_plot.pdf b/LotkaVolterra/plots/HudsonBayfull_plot.pdf
diff --git a/LotkaVolterra/plots/HudsonBayrecovery_fitting.pdf b/LotkaVolterra/plots/HudsonBayrecovery_fitting.pdf
diff --git a/LotkaVolterra/plots/Scenario_1__losses.pdf b/LotkaVolterra/plots/Scenario_1__losses.pdf
diff --git a/LotkaVolterra/plots/Scenario_1__missingterm_reconstruction.pdf b/LotkaVolterra/plots/Scenario_1__missingterm_reconstruction.pdf
diff --git a/LotkaVolterra/plots/Scenario_1__missingterm_reconstruction_and_error.pdf b/LotkaVolterra/plots/Scenario_1__missingterm_reconstruction_and_error.pdf
diff --git a/LotkaVolterra/plots/Scenario_1__reconstruction.pdf b/LotkaVolterra/plots/Scenario_1__reconstruction.pdf
diff --git a/LotkaVolterra/plots/Scenario_1__trajectory_reconstruction.pdf b/LotkaVolterra/plots/Scenario_1__trajectory_reconstruction.pdf
diff --git a/LotkaVolterra/plots/Scenario_1_full_plot.pdf b/LotkaVolterra/plots/Scenario_1_full_plot.pdf
diff --git a/LotkaVolterra/plots/Scenario_2__losses.pdf b/LotkaVolterra/plots/Scenario_2__losses.pdf
diff --git a/LotkaVolterra/plots/Scenario_2__missingterm_reconstruction.pdf b/LotkaVolterra/plots/Scenario_2__missingterm_reconstruction.pdf
diff --git a/LotkaVolterra/plots/Scenario_2__missingterm_reconstruction_and_error.pdf b/LotkaVolterra/plots/Scenario_2__missingterm_reconstruction_and_error.pdf
diff --git a/LotkaVolterra/plots/Scenario_2__reconstruction.pdf b/LotkaVolterra/plots/Scenario_2__reconstruction.pdf
diff --git a/LotkaVolterra/plots/Scenario_2__trajectory_reconstruction.pdf b/LotkaVolterra/plots/Scenario_2__trajectory_reconstruction.pdf
diff --git a/LotkaVolterra/plots/Scenario_2_full_plot.pdf b/LotkaVolterra/plots/Scenario_2_full_plot.pdf
diff --git a/LotkaVolterra/plots/Scenario_3__missingterm_reconstruction_and_error.pdf b/LotkaVolterra/plots/Scenario_3__missingterm_reconstruction_and_error.pdf
diff --git a/LotkaVolterra/plots/Scenario_3__reconstruction.pdf b/LotkaVolterra/plots/Scenario_3__reconstruction.pdf
diff --git a/LotkaVolterra/plots/Scenario_3__trajectory_reconstruction.pdf b/LotkaVolterra/plots/Scenario_3__trajectory_reconstruction.pdf
diff --git a/LotkaVolterra/plots/Scenario_3_full_data_0.025.pdf b/LotkaVolterra/plots/Scenario_3_full_data_0.025.pdf
diff --git a/LotkaVolterra/results/Hudson_Bay_recovery.jld2 b/LotkaVolterra/results/Hudson_Bay_recovery.jld2
diff --git a/LotkaVolterra/results/Scenario_1_recovery_0.05.jld2 b/LotkaVolterra/results/Scenario_1_recovery_0.05.jld2
diff --git a/LotkaVolterra/results/Scenario_2_recovery_0.01.jld2 b/LotkaVolterra/results/Scenario_2_recovery_0.01.jld2
diff --git a/LotkaVolterra/results/Scenario_3_recovery_0.025.jld2 b/LotkaVolterra/results/Scenario_3_recovery_0.025.jld2
diff --git a/LotkaVolterra/scenario_1.jl b/LotkaVolterra/scenario_1.jl
@@ -6,7 +6,7 @@ using Pkg; Pkg.activate("."); Pkg.instantiate()
 using OrdinaryDiffEq
 using ModelingToolkit
 using DataDrivenDiffEq
-using LinearAlgebra, DiffEqSensitivity, Optim
+using LinearAlgebra, Optim
 using DiffEqFlux, Flux
 using Plots
 gr()
@@ -16,6 +16,10 @@ using Statistics
 using Random
 Random.seed!(1234)
 
+#### NOTE
+# Since the recent release of DataDrivenDiffEq v0.6.0 where a complete overhaul of the optimizers took
+# place, SR3 has been used. Right now, STLSQ performs better and has been changed.
+
 # Create a name for saving ( basically a prefix )
 svname = "Scenario_1_"
 
@@ -48,7 +52,7 @@ scatter!(t, transpose(Xₙ), color = :red, label = ["Noisy Data" nothing])
 # Gaussian RBF as activation
 rbf(x) = exp.(-(x.^2))
 
-# Define the network 2->5->5->5->2
+# Multilayer FeedForward
 U = FastChain(
     FastDense(2,5,rbf), FastDense(5,5, rbf), FastDense(5,5, rbf), FastDense(5,2)
 )
@@ -114,23 +118,24 @@ p_trained = res2.minimizer
 
 ## Analysis of the trained network
 # Plot the data and the approximation
-X̂ = predict(p_trained, Xₙ[:,1], t[1]:0.05f0:t[end])
+ts = first(solution.t):mean(diff(solution.t))/2:last(solution.t)
+X̂ = predict(p_trained, Xₙ[:,1], ts)
 # Trained on noisy data vs real solution
-pl_trajectory = plot(t[1]:0.05f0:t[end], transpose(X̂), xlabel = "t", ylabel ="x(t), y(t)", color = :red, label = ["UDE Approximation" nothing])
-scatter!(t, transpose(Xₙ), color = :black, label = ["Measurements" nothing])
+pl_trajectory = plot(ts, transpose(X̂), xlabel = "t", ylabel ="x(t), y(t)", color = :red, label = ["UDE Approximation" nothing])
+scatter!(solution.t, transpose(Xₙ), color = :black, label = ["Measurements" nothing])
 savefig(pl_trajectory, joinpath(pwd(), "plots", "$(svname)_trajectory_reconstruction.pdf"))
 
 # Ideal unknown interactions of the predictor
 Ȳ = [-p_[2]*(X̂[1,:].*X̂[2,:])';p_[3]*(X̂[1,:].*X̂[2,:])']
 # Neural network guess
 Ŷ = U(X̂,p_trained)
 
-pl_reconstruction = plot(t[1]:0.05f0:t[end], transpose(Ŷ), xlabel = "t", ylabel ="U(x,y)", color = :red, label = ["UDE Approximation" nothing])
-plot!(t[1]:0.05f0:t[end], transpose(Ȳ), color = :black, label = ["True Interaction" nothing])
+pl_reconstruction = plot(ts, transpose(Ŷ), xlabel = "t", ylabel ="U(x,y)", color = :red, label = ["UDE Approximation" nothing])
+plot!(ts, transpose(Ȳ), color = :black, label = ["True Interaction" nothing])
 savefig(pl_reconstruction, joinpath(pwd(), "plots", "$(svname)_missingterm_reconstruction.pdf"))
 
 # Plot the error
-pl_reconstruction_error = plot(t[1]:0.05f0:t[end], norm.(eachcol(Ȳ-Ŷ)), yaxis = :log, xlabel = "t", ylabel = "L2-Error", label = nothing, color = :red)
+pl_reconstruction_error = plot(ts, norm.(eachcol(Ȳ-Ŷ)), yaxis = :log, xlabel = "t", ylabel = "L2-Error", label = nothing, color = :red)
 pl_missing = plot(pl_reconstruction, pl_reconstruction_error, layout = (2,1))
 savefig(pl_missing, joinpath(pwd(), "plots", "$(svname)_missingterm_reconstruction_and_error.pdf"))
 pl_overall = plot(pl_trajectory, pl_missing)
@@ -144,51 +149,38 @@ savefig(pl_overall, joinpath(pwd(), "plots", "$(svname)_reconstruction.pdf"))
 b = [polynomial_basis(u, 5); sin.(u)]
 basis = Basis(b, u)
 
-# Create an optimizer for the SINDy problem
-opt = SR3(Float32(1e-2), Float32(0.1))
 # Create the thresholds which should be used in the search process
-λ = Float32.(exp10.(-7:0.1:5))
-# Target function to choose the results from; x = L0 of coefficients and L2-Error of the model
-g(x) = x[1] < 1 ? Inf : norm(x, 2)
-
+λ = Float32.(exp10.(-7:0.1:0))
+# Create an optimizer for the SINDy problem
+opt = STLSQ(λ)
+# Define different problems for the recovery
+full_problem = ContinuousDataDrivenProblem(solution)
+ideal_problem = ContinuousDataDrivenProblem(X̂, ts, DX = Ȳ)
+nn_problem = ContinuousDataDrivenProblem(X̂, ts, DX = Ŷ)
 # Test on ideal derivative data for unknown function ( not available )
-println("SINDy on partial ideal, unavailable data")
-Ψ = SINDy(X̂, Ȳ, basis, λ, opt, g = g, maxiter = 10000) # Succeed
-println(Ψ)
-print_equations(Ψ)
-
-# Test on uode derivative data
-println("SINDy on learned, partial, available data")
-Ψ = SINDy(X̂, Ŷ, basis, λ,  opt, g = g, maxiter = 50000, normalize = true, denoise = true, convergence_error = Float32(1e-10)) # Succeed
-println(Ψ)
-print_equations(Ψ)
-
-# Extract the parameter
-p̂ = parameters(Ψ)
-println("First parameter guess : $(p̂)")
-
-# Just the equations
-b = Basis((u, p, t)->Ψ(u, [1f0; 1f0], t), u)
-
-# Retune for better parameters -> we could also use DiffEqFlux or other parameter estimation tools here.
-Ψf = SINDy(X̂, Ŷ, b, STRRidge(0.01f0), maxiter = 100, convergence_error = Float32(1e-18)) # Succeed
-println(Ψf)
-p̂ = parameters(Ψf)
-println("Second parameter guess : $(abs.(p̂))")
-println("True parameter : $(p_[2:3])")
+println("Sparse regression")
+full_res = solve(full_problem, basis, opt, maxiter = 10000, progress = true)
+ideal_res = solve(ideal_problem, basis, opt, maxiter = 10000, progress = true)
+nn_res = solve(nn_problem, basis, opt, maxiter = 10000, progress = true)
+# Store the results
+results = [full_res; ideal_res; nn_res]
+# Show the results
+map(println, results)
+# Show the results
+map(println ∘ result, results)
+# Show the identified parameters
+map(println ∘ parameter_map, results)
 
 # Define the recovered, hyrid model
-function recovered_dynamics!(du,u, p, t, p_true)
-    û = Ψf(u, p) # Network prediction
-    du[1] = p_true[1]*u[1] + û[1]
-    du[2] = -p_true[4]*u[2] + û[2]
+function recovered_dynamics!(du,u, p, t)
+    û = nn_res(u, p) # Network prediction
+    du[1] = p_[1]*u[1] + û[1]
+    du[2] = -p_[4]*u[2] + û[2]
 end
 
-# Closure with the known parameter
-estimated_dynamics!(du,u,p,t) = recovered_dynamics!(du,u,p,t,p_)
 
-estimation_prob = ODEProblem(estimated_dynamics!, u0, tspan, p̂)
-estimate = solve(estimation_prob, Tsit5(), saveat = t)
+estimation_prob = ODEProblem(recovered_dynamics!, u0, tspan, parameters(nn_res))
+estimate = solve(estimation_prob, Tsit5(), saveat = solution.t)
 
 # Plot
 plot(solution)
@@ -208,8 +200,8 @@ plot!(true_solution_long)
 
 ## Save the results
 save(joinpath(pwd(), "results" ,"$(svname)recovery_$(noise_magnitude).jld2"),
-    "solution", solution, "X", Xₙ, "t" , t, "neural_network" , U, "initial_parameters", p, "trained_parameters" , p_trained, # Training
-    "losses", losses, "result", Ψf, "recovered_parameters", p̂, # Recovery
+    "solution", solution, "X", Xₙ, "t" , ts, "neural_network" , U, "initial_parameters", p, "trained_parameters" , p_trained, # Training
+    "losses", losses, "result", nn_res, "recovered_parameters", parameters(nn_res), # Recovery
     "long_solution", true_solution_long, "long_estimate", estimate_long) # Estimation