CliMA · szy21 · Mar 5, 2024 · Apr 1, 2024 · Apr 1, 2024 · Apr 23, 2024
diff --git a/Manifest.toml b/Manifest.toml
diff --git a/examples/Manifest.toml b/examples/Manifest.toml
diff --git a/examples/Project.toml b/examples/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+Bootstrap = "e28b5b4c-05e8-5b66-bc03-6f0c0a0a06e0"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 CliMADatasets = "49523d62-8978-4391-abdd-b8467d4505ae"
@@ -11,7 +12,9 @@ FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
+Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+KernelDensity = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b"
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"

diff --git a/examples/conus404/Experiment_standard_scaling.toml b/examples/conus404/Experiment_standard_scaling.toml
@@ -0,0 +1,51 @@
+[experiment]
+project           = "CliMAgen.jl"
+name              = "conus404_test"
+savedir           = "output_standard_scaling"
+rngseed           = 123
+logging           = true
+nogpu             = false
+
+[data]
+batchsize         = 64
+standard_scaling  = true
+low_pass          = false
+low_pass_k        = 8
+n_pixels          = 128
+
+[model]
+inchannels        = 1
+dropout_p         = 0.0
+sigma_max         = 260.0
+sigma_min         = 1e-2
+mean_bypass       = true
+shift_input       = true
+shift_output      = true
+scale_mean_bypass = true
+gnorm             = true
+proj_kernelsize   = 3
+outer_kernelsize  = 3
+middle_kernelsize = 3
+inner_kernelsize  = 3
+periodic          = false
+
+[optimizer]
+learning_rate     = 2e-4
+epsilon           = 1e-8
+beta_1            = 0.9
+beta_2            = 0.999
+gradnorm          = 1.0
+ema_rate          = 0.999
+nwarmup           = 5000
+
+[training]
+nepochs           = 150
+freq_chckpt       = 50
+
+[sampling]
+nsteps            = 250
+nsamples_analysis = 100
+nsamples_generate = 500
+nimages           = 25
+sampler           = "euler"
+samples_file      = "samples.hdf5"
diff --git a/examples/conus404/Experiment.toml → examples/conus404/Experiment_zmuv.toml b/examples/conus404/Experiment.toml → examples/conus404/Experiment_zmuv.toml
@@ -1,20 +1,22 @@
 [experiment]
 project           = "CliMAgen.jl"
 name              = "conus404_test"
-savedir           = "output"
+savedir           = "output_zmuv"
 rngseed           = 123
 logging           = true
 nogpu             = false
 
 [data]
 batchsize         = 64
 standard_scaling  = false
+low_pass          = false
+low_pass_k        = 8
 n_pixels		  = 128
 
 [model]
 inchannels        = 1
 dropout_p         = 0.0
-sigma_max         = 90.0
+sigma_max         = 580.0
 sigma_min         = 1e-2
 mean_bypass       = true
 shift_input       = true
@@ -41,9 +43,9 @@ nepochs           = 100
 freq_chckpt       = 50
 
 [sampling]
-nsteps            = 1000
+nsteps            = 250
 nsamples_analysis = 100
 nsamples_generate = 100
 nimages           = 25
 sampler           = "euler"
-samples_file      = "samples.hdf5"
+samples_file      = "samples.hdf5"
diff --git a/examples/conus404/analysis.jl b/examples/conus404/analysis.jl
@@ -11,6 +11,7 @@ using TOML
 
 using CliMAgen
 package_dir = pkgdir(CliMAgen)
+include(joinpath(package_dir,"examples/conus404/preprocessing_utils.jl"))
 include(joinpath(package_dir,"examples/utils_data.jl"))
 include(joinpath(package_dir,"examples/utils_analysis.jl"))
 
@@ -21,7 +22,9 @@ function run_analysis(params; FT=Float32)
     nogpu = params.experiment.nogpu
     batchsize = params.data.batchsize
     standard_scaling = params.data.standard_scaling
-    preprocess_params_file = joinpath(savedir, "preprocessing_standard_scaling_$standard_scaling.jld2")
+    # always use the preprocessing parameters derived 
+    # from the training data for this step
+    preprocess_params_file = joinpath(savedir, "preprocessing_standard_scaling_$(standard_scaling)_train.jld2")
     inchannels = params.model.inchannels
     nsamples = params.sampling.nsamples_analysis
     nimages = params.sampling.nimages
@@ -43,9 +46,7 @@ function run_analysis(params; FT=Float32)
     # set up dataset
     dl, _ = get_data_conus404(
         batchsize;
-        standard_scaling=standard_scaling,
         FT=FT,
-        read=true,
         preprocess_params_file=preprocess_params_file
     )
     xtrain = cat([x for x in dl]..., dims=4)
@@ -61,7 +62,7 @@ function run_analysis(params; FT=Float32)
     # set up model
     checkpoint_path = joinpath(savedir, "checkpoint.bson")
     BSON.@load checkpoint_path model model_smooth opt opt_smooth
-    model = device(model)
+    model = device(model_smooth)
 
     # sample from the trained model
     time_steps, Δt, init_x = setup_sampler(

diff --git a/examples/conus404/generate_samples.jl b/examples/conus404/generate_samples.jl
@@ -12,21 +12,18 @@ using TOML
 using CliMAgen
 package_dir = pkgdir(CliMAgen)
 
-function generate_samples(params; FT=Float32, real_space = true)
+function generate_samples(params; FT=Float32)
     # unpack params, including preprocessing numbers
     savedir = params.experiment.savedir
     rngseed = params.experiment.rngseed
     nogpu = params.experiment.nogpu
     batchsize = params.data.batchsize
     n_pixels = params.data.n_pixels
-    standard_scaling = params.data.standard_scaling
-    preprocess_params_file = joinpath(savedir, "preprocessing_standard_scaling_$standard_scaling.jld2")
-    scaling = JLD2.load_object(preprocess_params_file)
     inchannels = params.model.inchannels
     nsamples = params.sampling.nsamples_generate
     nsteps = params.sampling.nsteps
     sampler = params.sampling.sampler
-    samples_file = params.sampling.samples_file
+    samples_file = "samples.hdf5"
 
     # set up rng
     rngseed > 0 && Random.seed!(rngseed)
@@ -43,7 +40,7 @@ function generate_samples(params; FT=Float32, real_space = true)
     # set up model
     checkpoint_path = joinpath(savedir, "checkpoint.bson")
     BSON.@load checkpoint_path model model_smooth opt opt_smooth
-    model = device(model)
+    model = device(model_smooth)
 
     # sample from the trained model
     # first allocate memory to hold the samples
@@ -57,35 +54,31 @@ function generate_samples(params; FT=Float32, real_space = true)
             device,
             n_pixels,
             inchannels;
-            num_images=nsamples,
+            num_images=samples_per_batch,
             num_steps=nsteps,
         )
         batch .= Euler_Maruyama_ld_sampler(model, init_x, time_steps, Δt, rng = MersenneTwister(b))
-        if real_space
-            samples[:,:,:,(b-1)*samples_per_batch+1:b*samples_per_batch] .= cpu(invert_preprocessing(batch, scaling))
-        else
-            samples[:,:,:,(b-1)*samples_per_batch+1:b*samples_per_batch] .= cpu(batch)
-        end
-
+        samples[:,:,:,(b-1)*samples_per_batch+1:b*samples_per_batch] .= cpu(batch)
     end
     samplesdir = savedir
+    samples_file = "samples_smooth.hdf5"
     !ispath(samplesdir) && mkpath(samplesdir)
     hdf5_path=joinpath(samplesdir, samples_file)
     fid = HDF5.h5open(hdf5_path, "w")
     fid["generated_samples"] = samples
     close(fid)
 end
 
-function main(; experiment_toml="Experiment.toml", real_space = true)
+function main(; experiment_toml="Experiment.toml")
     FT = Float32
 
     # read experiment parameters from file
     params = TOML.parsefile(experiment_toml)
     params = CliMAgen.dict2nt(params)
 
-    generate_samples(params; FT=FT, real_space = real_space)
+    generate_samples(params; FT=FT)
 end
 
 if abspath(PROGRAM_FILE) == @__FILE__
-    main(;experiment_toml=ARGS[1], real_space = ARGS[2] == "true")
+    main(;experiment_toml=ARGS[1])
 end
diff --git a/examples/conus404/generate_samples_downscaling.jl b/examples/conus404/generate_samples_downscaling.jl
@@ -0,0 +1,146 @@
+using BSON
+using Flux
+using CUDA
+using cuDNN
+using HDF5
+using JLD2
+using ProgressMeter
+using Random
+using Statistics
+using TOML
+
+using CliMAgen
+package_dir = pkgdir(CliMAgen)
+include(joinpath(package_dir,"examples/conus404/preprocessing_utils.jl"))
+include(joinpath(package_dir,"examples/utils_data.jl")) # for data loading
+
+function setup_sampler_downscaling(x0, tf, model::CliMAgen.AbstractDiffusionModel,
+                                    device,
+                                    tilesize,
+                                    noised_channels;
+                                    num_images = 5,
+                                    num_steps=500,
+                                    ϵ=1.0f-3,
+                                    FT=Float32,
+                                    )
+    t = tf .* ones(FT, num_images) |> device
+    num_steps = Int(ceil(num_steps*tf ))
+    init_z = randn(FT, (tilesize, tilesize, noised_channels, num_images)) |> device
+    _, σ_T = CliMAgen.marginal_prob(model, zero(init_z), t)
+    init_noise = (σ_T .* init_z)
+    time_steps = LinRange(tf, ϵ, num_steps)
+    Δt = time_steps[1] - time_steps[2]
+    return time_steps, Δt, init_noise .+ x0
+end
+
+function generate_samples_downscaling(params; FT=Float32)
+    # unpack params, including preprocessing numbers
+    savedir = params.experiment.savedir
+    rngseed = params.experiment.rngseed
+    nogpu = params.experiment.nogpu
+    batchsize = 64
+    standard_scaling = params.data.standard_scaling
+    n_pixels = params.data.n_pixels
+    inchannels = params.model.inchannels
+    nsamples = params.sampling.nsamples_generate
+    nsteps = params.sampling.nsteps
+    sampler = params.sampling.sampler
+    samples_file = "downscaled_samples.hdf5"
+    # we always train with the preprocessing parameters derived from the
+    # training data.
+    preprocess_params_file_train = joinpath(savedir, "preprocessing_standard_scaling_$(standard_scaling)_train.jld2")
+    preprocess_params_file_test = joinpath(savedir, "preprocessing_standard_scaling_$(standard_scaling)_test.jld2")
+
+    # set up rng
+    rngseed > 0 && Random.seed!(rngseed)
+
+    # set up device
+    if !nogpu && CUDA.has_cuda()
+        device = Flux.gpu
+        @info "Sampling on GPU"
+    else
+        device = Flux.cpu
+        @info "Sampling on CPU"
+    end
+
+    # set up model
+    checkpoint_path = joinpath(savedir, "checkpoint.bson")
+    BSON.@load checkpoint_path model model_smooth opt opt_smooth
+    model = device(model_smooth)
+
+    # sample from the trained model
+    (xtrain, xtest) = get_raw_data_conus404(; FT=Float32)
+    xtrain_lores = lowpass_filter(xtrain, 8) # guess
+    xtest_lores = lowpass_filter(xtest, 8) # guess
+
+    scaling_train = JLD2.load_object(preprocess_params_file_train)
+    scaling_test = JLD2.load_object(preprocess_params_file_test)
+
+    xtrain_pp_lores = apply_preprocessing(xtrain_lores, scaling_train)
+    xtest_pp_lores = apply_preprocessing(xtest_lores, scaling_test)
+
+    idx = Int.(ceil.(rand(batchsize)*size(xtrain)[end]))
+    random_samples  = zeros(FT, (n_pixels, n_pixels, inchannels,batchsize*2))
+    samples_train = zeros(FT, (n_pixels, n_pixels, inchannels,batchsize))
+    samples_test = zeros(FT, (n_pixels, n_pixels, inchannels,batchsize))
+    tf = 0.6f0 # guess 
+    time_steps, Δt, init_x = setup_sampler_downscaling(device(xtrain_pp_lores[:,:,:,idx]), tf,
+            model,
+            device,
+            n_pixels,
+            inchannels;
+            num_images=batchsize,
+            num_steps=nsteps,
+        )
+    samples_train .= cpu(Euler_Maruyama_sampler(model, init_x, time_steps, Δt, rng = MersenneTwister(13)))
+    time_steps, Δt, init_x = setup_sampler_downscaling(device(xtest_pp_lores[:,:,:,idx]), tf,
+            model,
+            device,
+            n_pixels,
+            inchannels;
+            num_images=batchsize,
+            num_steps=nsteps,
+        )
+    samples_test .= cpu(Euler_Maruyama_sampler(model, init_x, time_steps, Δt, rng = MersenneTwister(123)))
+    # Compute random samples for comparison
+    time_steps, Δt, init_x = setup_sampler(
+        model,
+        device,
+        n_pixels,
+        inchannels;
+        num_images=batchsize*2,
+        num_steps=nsteps,
+    )
+    random_samples .= cpu(Euler_Maruyama_sampler(model, init_x, time_steps, Δt, rng = MersenneTwister(2)))
+
+    samplesdir = joinpath(savedir, "downscaling")
+    !ispath(samplesdir) && mkpath(samplesdir)
+
+    samples_file = "samples_downscaled_smooth.hdf5"
+    !ispath(samplesdir) && mkpath(samplesdir)
+    hdf5_path=joinpath(samplesdir, samples_file)
+    fid = HDF5.h5open(hdf5_path, "w")
+    fid["downscaled_samples_train"] = invert_preprocessing(samples_train, scaling_train)
+    fid["downscaled_samples_test"] = invert_preprocessing(samples_test, scaling_test)
+    fid["random_samples_train"] = invert_preprocessing(random_samples[:,:,:,1:batchsize], scaling_train)
+    fid["random_samples_test"] = invert_preprocessing(random_samples[:,:,:,(batchsize+1):end], scaling_test)
+    fid["data_train"] = xtrain[:,:,:,idx]
+    fid["data_test"] = xtest[:,:,:,idx]
+    fid["data_train_lores"] = xtrain_lores[:,:,:,idx]
+    fid["data_test_lores"] = xtest_lores[:,:,:,idx]
+    close(fid)
+end
+
+function main(; experiment_toml="Experiment.toml")
+    FT = Float32
+
+    # read experiment parameters from file
+    params = TOML.parsefile(experiment_toml)
+    params = CliMAgen.dict2nt(params)
+
+    generate_samples_downscaling(params; FT=FT)
+end
+
+if abspath(PROGRAM_FILE) == @__FILE__
+    main(;experiment_toml=ARGS[1])
+end