From d7e9a6cb2c289bab6e75fee27cab894f90ac1b16 Mon Sep 17 00:00:00 2001
From: Sikorski <sikorski@zib.de>
Date: Mon, 8 Jul 2024 15:15:43 +0200
Subject: [PATCH] cleanups and small fixes, dependency reordering

---
 .github/workflows/Documenter.yml |  2 +-
 src/ISOKANN.jl                   | 26 ++++++++++++++++----------
 src/IsoMu/IsoMu.jl               |  5 ++---
 src/bonito.jl                    |  2 +-
 src/data.jl                      |  2 +-
 src/extrapolate.jl               |  6 +++---
 src/iso2.jl                      | 12 ++++--------
 src/makie.jl                     |  2 ++
 src/models.jl                    |  3 ++-
 src/molutils.jl                  |  9 +--------
 src/simulation.jl                | 18 ++++++++++++++----
 src/simulators/langevin.jl       |  5 +----
 src/simulators/openmm.jl         |  2 +-
 src/subsample.jl                 |  1 -
 14 files changed, 49 insertions(+), 46 deletions(-)

diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml
index 9ec3d99..a2b7431 100644
--- a/.github/workflows/Documenter.yml
+++ b/.github/workflows/Documenter.yml
@@ -21,7 +21,7 @@ jobs:
       - name: "Manually add Conda.jl folder due to bug in its build step" # https://github.com/JuliaPy/Conda.jl/issues/251
         run: |
           mkdir -p "/home/runner/.julia/conda/3/x86_64"
-      - uses: julia-actions/cache@v1
+      - uses: julia-actions/cache@v2
       - uses: julia-actions/julia-docdeploy@v1
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
diff --git a/src/ISOKANN.jl b/src/ISOKANN.jl
index 05b389e..7a81d14 100644
--- a/src/ISOKANN.jl
+++ b/src/ISOKANN.jl
@@ -3,10 +3,12 @@
 module ISOKANN
 
 #using Startup           # precompiles most used packages
-
 #include("forced/IsoForce.jl")
 
-import Random
+import StochasticDiffEq, Flux, CUDA, PCCAPlus, Plots
+
+using ProgressMeter
+using Plots
 
 using LinearAlgebra: norm, dot, cross, diag, svd
 using StatsBase: mean, sample, mean_and_std
@@ -18,6 +20,9 @@ using Unitful: @u_str, unit
 using SpecialFunctions: erf
 using Plots: plot, plot!, scatter, scatter!
 using MLUtils: numobs, getobs, shuffleobs, unsqueeze
+using StaticArrays: @SVector
+using StochasticDiffEq: StochasticDiffEq
+using LinearAlgebra: pinv, norm, I, schur
 
 import Chemfiles
 import ProgressMeter
@@ -34,6 +39,13 @@ import OrdinaryDiffEq
 import Graphs
 import Optimisers
 import Optim
+import PyCall
+import Random
+import KernelDensity
+import ForwardDiff
+import StatsBase
+import Flux
+import PCCAPlus
 
 import MLUtils: numobs
 import Flux: cpu, gpu
@@ -57,10 +69,6 @@ export getxs, getys
 
 export reactionpath_minimum, reactionpath_ode, writechemfile
 
-using ProgressMeter
-
-
-
 include("subsample.jl")  # adaptive sampling
 include("pairdists.jl")       # pair distances
 include("simulation.jl")      # Interface for simulations
@@ -72,7 +80,6 @@ include("data.jl")            # tools for handling the data (sampling, slicing,
 #include("loggers.jl")     # performance metric loggers
 #include("benchmarks.jl")      # benchmark runs, deprecated by scripts/*
 
-
 include("simulators/langevin.jl")  # for the simulators
 
 #include("isosimple.jl")
@@ -85,7 +92,6 @@ include("simulators/openmm.jl")
 import .OpenMM.OpenMMSimulation
 export OpenMMSimulation
 
-
 #include("dataloader.jl")
 
 #include("precompile.jl") # precompile for faster ttx
@@ -95,8 +101,8 @@ include("extrapolate.jl")
 include("reactionpath.jl")
 include("reactionpath2.jl")
 
-include("IsoMu/IsoMu.jl")
-include("vgv/vgv.jl")
+#include("IsoMu/IsoMu.jl")
+#include("vgv/vgv.jl")
 
 include("makie.jl")
 include("bonito.jl")
diff --git a/src/IsoMu/IsoMu.jl b/src/IsoMu/IsoMu.jl
index 7b69184..2069e6c 100644
--- a/src/IsoMu/IsoMu.jl
+++ b/src/IsoMu/IsoMu.jl
@@ -4,11 +4,10 @@ using LinearAlgebra
 using Plots
 
 using DataFrames: DataFrame
-using ISOKANN
-using ISOKANN: plot_reactive_path, writechemfile, aligntrajectory
+using ..ISOKANN: plot_reactive_path, writechemfile, aligntrajectory, ISOKANN
 using Distances: pairwise, Euclidean
 
-import ISOKANN: reactive_path, save_reactive_path
+import ..ISOKANN: reactive_path, save_reactive_path
 
 #using FileIO
 import BioStructures
diff --git a/src/bonito.jl b/src/bonito.jl
index 4a7d521..7ffd755 100644
--- a/src/bonito.jl
+++ b/src/bonito.jl
@@ -1,7 +1,7 @@
 using Bonito
 using WGLMakie
 
-USEGPU = true
+USEGPU = CUDA.functional()
 ISO = nothing
 ISRUNNING = false
 
diff --git a/src/data.jl b/src/data.jl
index 57f0565..4ba13d9 100644
--- a/src/data.jl
+++ b/src/data.jl
@@ -86,11 +86,11 @@ function adddata(data, model, sim, ny)
     return joindata(data, (xs, ys))
 end
 
-@deprecate joindata (x, y) -> lastcat.(x, y)
 
 lastcat(x::T, y::T) where {N,T<:AbstractArray{<:Any,N}} = cat(x, y, dims=N)
 lastcat(x::T, y) where {T} = lastcat(x, convert(T, y))
 
+@deprecate joindata (x, y) -> lastcat.(x, y)
 #=
 function datastats(data)
     xs, ys = data
diff --git a/src/extrapolate.jl b/src/extrapolate.jl
index 4789d4e..d0418c3 100644
--- a/src/extrapolate.jl
+++ b/src/extrapolate.jl
@@ -44,11 +44,13 @@ function extrapolate(iso, n::Integer, stepsize=0.1, steps=1, minimize=true, maxs
             try
                 x = extrapolate(iso, coords[:, i], dir * stepsize, steps)
                 minimize && (x = energyminimization_chilevel(iso, x))
-                if data.sim.momenta
+                #=
+                if hasfield(typeof(data.sim), :momenta) && data.sim.momenta
                     x = reshape(x, :, 2)
                     #x[:, 2] .= 0
                     x = vec(x)
                 end
+                =#
                 #&& ISOKANN.OpenMM.set_random_velocities!(data.sim, x)
                 push!(xs, x)
             catch e
@@ -90,7 +92,6 @@ function energyminimization_chilevel(iso, x0; f_tol=1e-3, alphaguess=1e-5, itera
     global trace = [x0]
     U(x) = begin
         push!(trace, x)
-        @show OpenMM.potential(sim, x)
     end
     dU(x) = begin
         push!(trace, x)
@@ -105,7 +106,6 @@ function energyminimization_chilevel(iso, x0; f_tol=1e-3, alphaguess=1e-5, itera
 
 
     o = Optim.optimize(U, dU, x, alg, Optim.Options(; iterations, f_tol, show_trace,); inplace=false)
-    return o
     return o.minimizer
 end
 
diff --git a/src/iso2.jl b/src/iso2.jl
index 8e7f944..e33278b 100644
--- a/src/iso2.jl
+++ b/src/iso2.jl
@@ -1,9 +1,4 @@
-import StatsBase
-import Flux
-import PCCAPlus
-import ISOKANN
-using LinearAlgebra: pinv, norm, I, schur
-using Plots
+
 
 @kwdef mutable struct Iso2
     model
@@ -108,8 +103,9 @@ chis(iso::Iso2) = iso.model(getxs(iso.data))
 chicoords(iso::Iso2, xs) = iso.model(features(iso.data, iscuda(iso.model) ? gpu(xs) : xs))
 isotarget(iso::Iso2) = isotarget(iso.model, getobs(iso.data)..., iso.transform)
 
-Optimisers.adjust!(iso::Iso2; kwargs...) = Optimisers.adjust!(iso.opt; kwargs...)
-Optimisers.setup(iso::Iso2) = (iso.opt = Optimisers.setup(iso.opt, iso.model))
+#Optimisers.adjust!(iso::Iso2; kwargs...) = Optimisers.adjust!(iso.opt; kwargs...)
+#Optimisers.setup(iso::Iso2) = (iso.opt = Optimisers.setup(iso.opt, iso.model))
+
 gpu(iso::Iso2) = Iso2(Flux.gpu(iso.model), Flux.gpu(iso.opt), Flux.gpu(iso.data), iso.transform, iso.losses, iso.loggers, iso.minibatch)
 cpu(iso::Iso2) = Iso2(Flux.cpu(iso.model), Flux.cpu(iso.opt), Flux.cpu(iso.data), iso.transform, iso.losses, iso.loggers, iso.minibatch)
 
diff --git a/src/makie.jl b/src/makie.jl
index dbc897b..1731b85 100644
--- a/src/makie.jl
+++ b/src/makie.jl
@@ -257,6 +257,7 @@ function livevis(iso::Iso2)
         notify(col)
     end
 
+    #=
     on(events(fig).mousebutton) do event
 
         a, i = pick(fig)
@@ -265,6 +266,7 @@ function livevis(iso::Iso2)
         x = iso.data.coords[1][:, i] # |> align_to_prev
         o[] = x
     end
+    =#
 
     #allcoords = Observable(reshape(iso.data.coords[1], 3, :))
     #colors = Observable(repeat(iso.model(iso.data.features[1]) |> vec, inner=22))
diff --git a/src/models.jl b/src/models.jl
index 5f0baec..09cc7cd 100644
--- a/src/models.jl
+++ b/src/models.jl
@@ -19,7 +19,8 @@ inputdim(model::Flux.Dense) = size(model.weight, 2)
 outputdim(model::Flux.Chain) = outputdim(model.layers[end])
 outputdim(model::Flux.Dense) = size(model.weight, 1)
 
-iscuda(m::Flux.Chain) = m[2].weight isa CuArray
+#iscuda(m::Flux.Chain) = m[2].weight isa CuArray
+iscuda(m::Flux.Chain) = first(Flux.trainables(m)) isa CuArray
 
 
 
diff --git a/src/molutils.jl b/src/molutils.jl
index 144baa0..d77a094 100644
--- a/src/molutils.jl
+++ b/src/molutils.jl
@@ -139,14 +139,7 @@ end
 
 ### switch between flattened an blown up representation of 3d vectors
 function as3dmatrix(f, x...)
-    merge_first_dimensions(f(split_first_dimension.(x, 3)...))
-end
-
-@deprecate merge_first_dimensions flattenfirst
-
-function merge_first_dimensions(A)
-    new_shape = (prod(size(A)[1:2]), size(A)[3:end]...)
-    return reshape(A, new_shape)
+    flattenfirst(f(split_first_dimension.(x, 3)...))
 end
 
 function split_first_dimension(A, d)
diff --git a/src/simulation.jl b/src/simulation.jl
index df1c1fb..8060881 100644
--- a/src/simulation.jl
+++ b/src/simulation.jl
@@ -1,5 +1,3 @@
-import PyCall
-
 export getcoords
 ## Interface for simulations
 
@@ -110,6 +108,7 @@ nk(d::SimulationData) = size(d.features[2], 2)
 Base.length(d::SimulationData) = size(d.features[1], 2)
 Base.lastindex(d::SimulationData) = length(d)
 
+# facilitates easy indexing into the data, returning a new data object 
 Base.getindex(d::SimulationData, i) = SimulationData(d.sim, getobs(d.features, i), getobs(d.coords, i), d.featurizer)
 
 MLUtils.getobs(d::SimulationData) = d.features
@@ -195,13 +194,24 @@ function datasize((xs, ys)::Tuple)
     return size(xs), size(ys)
 end
 
+"""
+    trajectorydata_linear(sim::IsoSimulation, steps; reverse=false, kwargs...)
+
+Simulate a single long trajectory of `steps` times the lagtime and generate the corresponding ISOKANN data.
+If `reverse` is true, also add the time-reversed transitions
+"""
 function trajectorydata_linear(sim::IsoSimulation, steps; reverse=false, kwargs...)
     xs = laggedtrajectory(sim, steps)
     SimulationData(sim, data_from_trajectory(xs; reverse), kwargs...)
 end
 
-function trajectorydata_bursts(sim, steps, nk; kwargs)
+"""
+    trajectorydata_bursts(sim::IsoSimulation, steps, nk; kwargs...)
+
+Simulate a single long trajectory of `steps` times the lagtime and start `nk` burst trajectories at each step for the Koopman samples.
+"""
+function trajectorydata_bursts(sim::IsoSimulation, steps, nk; kwargs...)
     xs = laggedtrajectory(sim, steps)
     ys = propagate(sim, xs, nk)
-    SimulationData(sim, ys, kwargs...)
+    SimulationData(sim, (xs, ys), kwargs...)
 end
\ No newline at end of file
diff --git a/src/simulators/langevin.jl b/src/simulators/langevin.jl
index 00780e3..f22e731 100644
--- a/src/simulators/langevin.jl
+++ b/src/simulators/langevin.jl
@@ -1,7 +1,4 @@
 #using Parameters
-using StaticArrays: @SVector
-using StochasticDiffEq: StochasticDiffEq
-import ForwardDiff
 
 # Abstract type defining the Overdamped Langevin dynamics
 # mandatory interface methods: potential, sigma, dim, lagtime, dt
@@ -25,7 +22,7 @@ function propagate(l::AbstractLangevin, x0::AbstractMatrix, ny)
     dim, nx = size(x0)
     ys = zeros(dim, ny, nx)
     Threads.@threads for (i, j) in [(i, j) for j in 1:ny, i in 1:nx]
-        ys[:, j, i] = trajectory(l; x0, saveat=lagtime(l))[:, end]
+        ys[:, j, i] = trajectory(l; x0=x0[:, i], saveat=lagtime(l))[:, end]
     end
     return ys
 end
diff --git a/src/simulators/openmm.jl b/src/simulators/openmm.jl
index 644dc38..1d1d21e 100644
--- a/src/simulators/openmm.jl
+++ b/src/simulators/openmm.jl
@@ -4,7 +4,7 @@ using PyCall, CUDA
 using LinearAlgebra: norm
 
 import JLD2
-import ISOKANN: ISOKANN, IsoSimulation,
+import ..ISOKANN: ISOKANN, IsoSimulation,
     propagate, dim, randx0,
     featurizer, defaultmodel,
     savecoords, getcoords, force, pdb,
diff --git a/src/subsample.jl b/src/subsample.jl
index cdf3b8b..723997e 100644
--- a/src/subsample.jl
+++ b/src/subsample.jl
@@ -85,7 +85,6 @@ function pickclosest_test(hs, ns)
 end
 
 
-import KernelDensity
 
 function kde_needles(chis, n=10; padding=0.0, bandwidth)
     needles = []