From 3b80abf6f14d53ac4b316a6213ee9b3164e3bf9a Mon Sep 17 00:00:00 2001 From: "michael d. catchen" Date: Tue, 17 Sep 2024 11:52:59 -0400 Subject: [PATCH] min functionality for canbon example --- Project.toml | 1 + src/BiodiversityObservationNetworks.jl | 3 +- src/balancedacceptance.jl | 42 ++++----- src/fractaltriad.jl | 1 - src/refine.jl | 35 +------- src/simplerandom.jl | 38 +++----- src/spatialstratified.jl | 120 +++++++++++++------------ src/types.jl | 28 +++++- src/uniqueness.jl | 5 -- src/weightedbas.jl | 26 +----- 10 files changed, 128 insertions(+), 171 deletions(-) diff --git a/Project.toml b/Project.toml index 49b3957..a53faaf 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["michael catchen "] version = "0.2.1" [deps] +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" HaltonSequences = "13907d55-377f-55d6-a9d6-25ac19e11b95" HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" diff --git a/src/BiodiversityObservationNetworks.jl b/src/BiodiversityObservationNetworks.jl index 78f4e34..ff633f0 100644 --- a/src/BiodiversityObservationNetworks.jl +++ b/src/BiodiversityObservationNetworks.jl @@ -1,5 +1,6 @@ module BiodiversityObservationNetworks +using SimpleSDMLayers using Distributions using Random using HaltonSequences @@ -38,7 +39,7 @@ include("weightedbas.jl") export WeightedBalancedAcceptance include("adaptivehotspot.jl") -export AdaptiveHotspotDetection +export AdaptiveHotspot include("cubesampling.jl") export CubeSampling diff --git a/src/balancedacceptance.jl b/src/balancedacceptance.jl index 295683a..4f5d03d 100644 --- a/src/balancedacceptance.jl +++ b/src/balancedacceptance.jl @@ -4,20 +4,20 @@ A `BONSeeder` that uses Balanced-Acceptance Sampling (Van-dem-Bates et al. 2017 https://doi.org/10.1111/2041-210X.13003) """ -Base.@kwdef struct BalancedAcceptance{I <: Integer} <: BONSampler +Base.@kwdef struct BalancedAcceptance{I<:Integer} <: BONSampler numsites::I = 30 - mask::BitMatrix = rand(50, 50) .< 0.5 - function BalancedAcceptance(numsites::Integer, mask::BitMatrix) - bas = new{typeof(numsites)}(numsites, mask) + dims::Tuple{I,I} = (50,50) + function BalancedAcceptance(numsites::I, dims::Tuple{I,I}) where I<:Integer + bas = new{I}(numsites, dims) check_arguments(bas) return bas end end BalancedAcceptance(M::Matrix{T}; numsites = 30) where T = BalancedAcceptance(numsites, size(M)) -BalancedAcceptance(l::Layer; numsites = 30) = BalancedAcceptance(numsites, l.layer.indices) +BalancedAcceptance(l::Layer; numsites = 30) = BalancedAcceptance(numsites, size(l)) -maxsites(bas::BalancedAcceptance) = prod(size(bas.mask)) +maxsites(bas::BalancedAcceptance) = prod(bas.dims) function check_arguments(bas::BalancedAcceptance) check(TooFewSites, bas) @@ -26,16 +26,20 @@ function check_arguments(bas::BalancedAcceptance) end function _sample!( - coords::Sites, - ba::BalancedAcceptance, -) + selected::S, + candidates::C, + ba::BalancedAcceptance +) where {S<:Sites,C<:Sites} seed = rand(Int32.(1e0:1e7), 2) n = numsites(ba) - x,y = size(ba.mask) + x,y = ba.dims + + candidate_mask = zeros(Bool, x, y) + candidate_mask[candidates.coordinates] .= 1 # This is sequentially adding points, needs to check if that value is masked # at each step and skip if so - exp_needed = 10 * Int(ceil(sum(ba.mask) / prod(size(ba.mask)) .* n)) + exp_needed = 10 * Int(ceil((length(candidates)/(x*y)) .* n)) ct = 1 for ptct in 1:exp_needed @@ -44,13 +48,12 @@ function _sample!( if ct > n break end - if ba.mask[proposal] - coords[ct] = proposal + if candidate_mask[proposal] + selected[ct] = proposal ct += 1 end end - coords.coordinates = coords.coordinates[1:ct-1] - return coords + return selected end # ==================================================== @@ -78,19 +81,12 @@ end @testitem "BalancedAcceptance can generate points" begin bas = BalancedAcceptance() - coords = seed(bas) + coords = sample(bas) @test typeof(coords) <: Vector{CartesianIndex} @test length(coords) == bas.numsites end -@testitem "BalancedAcceptance can generate a custom number of points as positional argument" begin - numpts = 77 - sz = (50, 50) - bas = BalancedAcceptance(numpts, sz) - coords = seed(bas) - @test numpts == length(coords) -end @testitem "BalancedAcceptance can take number of points as keyword argument" begin N = 40 diff --git a/src/fractaltriad.jl b/src/fractaltriad.jl index 6e753f8..0693c35 100644 --- a/src/fractaltriad.jl +++ b/src/fractaltriad.jl @@ -157,7 +157,6 @@ end # ===================================================== @testitem "FractalTriad is correct subtype" begin - @test FractalTriad <: BONSeeder @test FractalTriad <: BONSampler end diff --git a/src/refine.jl b/src/refine.jl index 231da54..cb23c96 100644 --- a/src/refine.jl +++ b/src/refine.jl @@ -1,3 +1,6 @@ +# No longer used + + """ refine!(cooords::Vector{CartesianIndex}, pool::Vector{CartesianIndex}, sampler::ST) @@ -67,35 +70,3 @@ function refine(sampler::ST) where {ST <: BONRefiner} _inner(p) = refine!(coords, first(p), sampler) return _inner end - -""" - seed!(coords::Vector{CartesianIndex}, sampler::ST, uncertainty::Matrix{T}) - -Puts a set of candidate sampling locations in the preallocated vector `coords` -from a raster `uncertainty` using `sampler`, where `sampler` is a [`BONSeeder`](@ref). - - - Seeder's work on rasters, refiners work on set of coordinates. -""" -function seed!( - coords::Vector{CartesianIndex}, - sampler::ST, -) where {ST <: BONSeeder} - length(coords) != sampler.numsites && - throw( - DimensionMismatch( - "The length of the coordinate vector must match the `numsites` fiel s of the sampler", - ), - ) - return _generate!(coords, sampler) -end - -""" - seed(sampler::ST) - -Produces a set of candidate sampling locations in a vector `coords` of length numsites -from a raster using `sampler`, where `sampler` is a [`BONSeeder`](@ref). -""" -function seed(sampler::ST) where {ST <: BONSeeder} - coords = Vector{CartesianIndex}(undef, sampler.numsites) - return seed!(coords, sampler) -end diff --git a/src/simplerandom.jl b/src/simplerandom.jl index b7e8ca0..7f10c82 100644 --- a/src/simplerandom.jl +++ b/src/simplerandom.jl @@ -3,41 +3,32 @@ Implements Simple Random spatial sampling (each location has equal probability of selection) """ -Base.@kwdef struct SimpleRandom{I<:Integer,S<:Sites} <: BONSampler +Base.@kwdef struct SimpleRandom{I<:Integer} <: BONSampler numsites::I = 30 - pool::S = Sites(vec(collect(CartesianIndices((1:50, 1:50))))) - function SimpleRandom(numsites::I, pool::J) where{I<:Integer,J<:Sites} - srs = new{typeof(numsites), typeof(pool)}(numsites, pool) + function SimpleRandom(numsites::I) where{I<:Integer} + srs = new{typeof(numsites)}(numsites) check_arguments(srs) return srs end end -maxsites(srs::SimpleRandom) = length(pool(srs)) - -function SimpleRandom(layer::Layer, numsites = 50) - candidates = pool(layer) - srs = SimpleRandom(numsites, candidates) - check_arguments(srs) - return srs -end - function check_arguments(srs::SRS) where {SRS <: SimpleRandom} check(TooFewSites, srs) - check(TooManySites, srs) return nothing end +_default_pool(::SimpleRandom) = pool((50,50)) + function _sample!( - sites::Sites{T}, + selections::S, + candidates::C, sampler::SimpleRandom{I}, -) where {T,I} - candidates = coordinates(pool(sampler)) - _coords = Distributions.sample(candidates, sampler.numsites; replace = false) +) where {S<:Sites,C<:Sites,I} + _coords = Distributions.sample(candidates.coordinates, sampler.numsites; replace = false) for (i,c) in enumerate(_coords) - sites[i] = c + selections[i] = c end - return sites + return selections end # ==================================================== @@ -62,10 +53,3 @@ end srs = SimpleRandom(; numsites = N) @test srs.numsites == N end - -@testitem "SimpleRandom throws exception if there are more sites than candidates" begin - numpts, numcandidates = 26, 25 - dims = Int.(floor.((sqrt(numcandidates), sqrt(numcandidates)))) - srs = @test prod(dims) < numpts - @test_throws TooManySites SimpleRandom(; numsites = numpts, dims = dims) -end diff --git a/src/spatialstratified.jl b/src/spatialstratified.jl index e05fe85..e702978 100644 --- a/src/spatialstratified.jl +++ b/src/spatialstratified.jl @@ -1,38 +1,84 @@ """ SpatiallyStratified """ -@kwdef struct SpatiallyStratified{I <: Integer, F <: AbstractFloat} <: BONSampler +@kwdef struct SpatiallyStratified{I<:Integer,L<:Layer,F<:AbstractFloat,N} <: BONSampler numsites::I = 50 - strata::Matrix{I} = _default_strata((50, 50)) - inclusion_probability_by_stratum::Vector{F} = ones(3) ./ 3 - function SpatiallyStratified(numsites, strata, inclusion_probability_by_stratum) - ss = new{typeof(numsites), typeof(inclusion_probability_by_stratum[begin])}( + layer::L = _default_strata((50, 50)) + category_dict::Dict{I,N} = _default_categories() + inclusion_dict::Dict{N,F} = _default_inclusion() + function SpatiallyStratified( + numsites::I, + layer::L, + category_dict::Dict{I,N}, + inclusion_dict::Dict{N,F} + ) where {I<:Integer,L<:Layer,F<:AbstractFloat,N} + ss = new{I,L,F,N}( numsites, - strata, - inclusion_probability_by_stratum, + layer, + category_dict, + inclusion_dict ) check_arguments(ss) return ss end end -maxsites(ss::SpatiallyStratified) = prod(size(ss.strata)) function check_arguments(ss::SpatiallyStratified) check(TooFewSites, ss) - check(TooManySites, ss) - length(unique(ss.strata)) == length(ss.inclusion_probability_by_stratum) || throw( + length(ss.category_dict) == length(ss.inclusion_dict) || throw( ArgumentError( "Inclusion probability vector does not have the same number of strata as there are unique values in the strata matrix", ), ) - sum(ss.inclusion_probability_by_stratum) ≈ 1.0 || - throw(ArgumentError("Inclusion probabilities for each strata do not sum to 1.")) - return nothing + return sum(values(ss.inclusion_dict)) ≈ 1.0 || + throw(ArgumentError("Inclusion probabilities across all strata do not sum to 1.")) end + +function _sample!( + coords::S, + candidates::C, + sampler::SpatiallyStratified{I,L,F,N}, +) where {S<:Sites,C<:Sites,I,L,F,N} + n = sampler.numsites + strata = sampler.layer + categories = sampler.category_dict + + category_ids = sort(collect(keys(categories))) + candidate_ids = [strata.layer[x] for x in coordinates(candidates)] + + cat_idx = Dict() + inclusion_probs = F[] + for k in category_ids + cat_idx[k] = findall(isequal(k), candidate_ids) + if length(cat_idx[k]) > 0 + push!(inclusion_probs, sampler.inclusion_dict[categories[k]]) + else + push!(inclusion_probs, 0) + end + end + + inclusion_probs ./= sum(inclusion_probs) + + # check if there are empty categories, if so set incl prob to 0 and + # renormalize? + selected_cats = rand(Categorical(inclusion_probs), n) + for (i,c) in enumerate(selected_cats) + if length(cat_idx[c]) > 0 + coords[i] = candidates[rand(cat_idx[c])] + end + end + return coords +end + +# Utils +_default_pool(::SpatiallyStratified) = pool(_default_strata((50,50))) +_default_categories() = Dict{Int,String}(1=>"A", 2=>"B", 3=>"C") +_default_inclusion() = Dict{String,Float64}("A"=>0.5, "B"=>0.3, "C"=>0.2) + function _default_strata(sz) mat = zeros(typeof(sz[1]), sz...) @@ -43,26 +89,7 @@ function _default_strata(sz) mat[(x + 1):end, begin:y] .= 2 mat[(x + 1):end, (y + 1):end] .= 3 - return mat -end - -function _generate!( - coords::Vector{CartesianIndex}, - sampler::SpatiallyStratified, -) - strata = sampler.strata - idx_per_strata = [ - findall(i -> strata[i] == x, CartesianIndices(strata)) for - x in unique(sampler.strata) - ] - πᵢ = sampler.inclusion_probability_by_stratum - - strata_per_sample = rand(Categorical(πᵢ), sampler.numsites) - for (i, s) in enumerate(strata_per_sample) - coords[i] = rand(idx_per_strata[s]) - end - - return coords + return Layer(mat) end # ==================================================== @@ -77,8 +104,8 @@ end @testitem "SpatiallyStratified with default arguments can generate points" begin ss = SpatiallyStratified() - coords = seed(ss) - @test typeof(coords) <: Vector{CartesianIndex} + coords = sample(ss) + @test typeof(coords) <: Sites end @testitem "SpatiallyStratified throws error when number of sites is below 2" begin @@ -92,26 +119,3 @@ end ss = SpatiallyStratified(; numsites = NUM_POINTS) @test ss.numsites == NUM_POINTS end - -@testitem "SpatiallyStratified can use custom strata as keyword argument" begin - dims = (42, 30) - strata = rand(1:10, dims...) - inclusion_probability = [0.1 for i in 1:10] - ss = SpatiallyStratified(; - strata = strata, - inclusion_probability_by_stratum = inclusion_probability, - ) - coords = seed(ss) - @test typeof(coords) <: Vector{CartesianIndex} -end - -@testitem "SpatiallyStratified throws error if the number of inclusion probabilities are different than the number of unique strata" begin - dims = (42, 42) - inclusion_probability = [0.5, 0.5] - strata = rand(1:5, dims...) - - @test_throws ArgumentError SpatiallyStratified(; - strata = strata, - inclusion_probability_by_stratum = inclusion_probability, - ) -end diff --git a/src/types.jl b/src/types.jl index 0788d3c..7c3212b 100644 --- a/src/types.jl +++ b/src/types.jl @@ -6,7 +6,6 @@ A `BONSampler` is any algorithm for proposing a set of sampling locations. abstract type BONSampler end numsites(s::BONSampler) = s.numsites -pool(s::BONSampler) = s.pool """ mutable struct Sites{T} @@ -23,15 +22,40 @@ Base.length(s::Sites) = length(coordinates(s)) Base.eachindex(s::Sites) = eachindex(s.coordinates) abstract type LayerType end + abstract type DataLayer <: LayerType end +struct CategoricalData <: DataLayer end +struct ContinousData <: DataLayer end + abstract type InclusionProbability <: LayerType end +abstract type ResultLayer <: LayerType end + + +# Distribution over categorical values at each pixel +# Scalar at each pixel +struct CategoricalResult <: LayerType end +struct ContinuousResult <: LayerType end +# Layer struct Layer{T<:LayerType,L} layer::L end -pool(l::Layer) = Sites(vec(findall(l.layer.indices))) Base.size(l::Layer) = size(l.layer) +function Layer(l::S; categorical = false) where S<:SimpleSDMLayers.SDMLayer + T = categorical ? CategoricalData : ContinousData + Layer{T,S}(l) +end +Layer(m::Matrix{I}) where I<:Integer = Layer{CategoricalData,typeof(m)}(m) + +_indices(m::M) where M<:Matrix = findall(i->!isnothing(m[i]) && !isnan(m[i]) && !ismissing(m[i]), CartesianIndices(m)) +_indices(l::SDMLayer) = findall(l.indices) + +# Pool method +pool(l::Layer) = Sites(vec(_indices(l.layer))) +pool(dims::Tuple{I,I}) where I<:Integer = Sites(vec(collect(CartesianIndices(dims)))) + +# Layer set w/ names struct Stack{T<:LayerType,N,L} layers::Dict{N,Layer{T,L}} end diff --git a/src/uniqueness.jl b/src/uniqueness.jl index 60d856f..9b5033d 100644 --- a/src/uniqueness.jl +++ b/src/uniqueness.jl @@ -78,11 +78,6 @@ end @test_throws TooManySites Uniqueness(numsites = 26, layers = rand(5, 5, 2)) end -@testitem "Uniqueness has correct subtypes" begin - @test Uniqueness <: BONRefiner - @test Uniqueness <: BONSampler -end - @testitem "Uniqueness works with positional constructor" begin @test typeof(Uniqueness(2, rand(5, 5, 5))) <: Uniqueness end diff --git a/src/weightedbas.jl b/src/weightedbas.jl index 4f8e761..60e4d1e 100644 --- a/src/weightedbas.jl +++ b/src/weightedbas.jl @@ -3,12 +3,12 @@ A `BONSeeder` that uses Balanced-Acceptance Sampling [@cite] combined with rejection sampling to create a set of sampling sites that is weighted toward values with higher uncertainty as a function of the parameter α. """ -Base.@kwdef struct WeightedBalancedAcceptance{I <: Integer, F <: Real} <: BONSampler +Base.@kwdef struct WeightedBalancedAcceptance{I <: Integer,L<:Layer, F <: Real} <: BONSampler numsites::I = 30 - uncertainty::Matrix{F} = rand(50, 50) + uncertainty::L = Layer(rand(50, 50)) α::F = 1.0 - function WeightedBalancedAcceptance(numsites, uncertainty, α) - wbas = new{typeof(numsites), typeof(α)}(numsites, uncertainty, α) + function WeightedBalancedAcceptance(numsites, uncertainty::L, α) where L + wbas = new{typeof(numsites), L, typeof(α)}(numsites, uncertainty, α) check_arguments(wbas) return wbas end @@ -103,24 +103,6 @@ end ) end -@testitem "WeightedBalancedAcceptance can generate points" begin - wbas = WeightedBalancedAcceptance() - coords = seed(wbas) - - @test typeof(coords) <: Vector{CartesianIndex} - @test length(coords) == wbas.numsites -end - -@testitem "WeightedBalancedAcceptance can generate a custom number of points with positional arguments" begin - numpts = 77 - sz = (50, 50) - α = 1.0 - uncert = rand(sz...) - wbas = WeightedBalancedAcceptance(numpts, uncert, α) - coords = seed(wbas) - @test numpts == length(coords) -end - @testitem "WeightedBalancedAcceptance can take bias parameter α as keyword argument" begin α = 3.14159 wbas = WeightedBalancedAcceptance(; α = α)