From 78f006b9a2caef679501170681c9742a654554c8 Mon Sep 17 00:00:00 2001 From: "michael d. catchen" Date: Fri, 20 Dec 2024 14:05:30 -0500 Subject: [PATCH 01/10] API Redesign --- Project.toml | 17 +++- _OLD/src/BiodiversityObservationNetworks.jl | 68 +++++++++++++ {src => _OLD/src}/adaptivehotspot.jl | 0 {src => _OLD/src}/balancedacceptance.jl | 0 {src => _OLD/src}/cubesampling.jl | 0 {src => _OLD/src}/entropize.jl | 0 {src => _OLD/src}/exceptions.jl | 0 {src => _OLD/src}/fractaltriad.jl | 0 {src => _OLD/src}/grts.jl | 0 _OLD/src/new_api.md | 32 ++++++ {src => _OLD/src}/refine.jl | 0 _OLD/src/sample.jl | 26 +++++ {src => _OLD/src}/simplerandom.jl | 0 {src => _OLD/src}/spatialstratified.jl | 1 - {src => _OLD/src}/types.jl | 0 {src => _OLD/src}/uniqueness.jl | 0 {src => _OLD/src}/utils.jl | 0 {src => _OLD/src}/weightedbas.jl | 0 docs/make.jl | 2 +- docs/src/index.md | 44 +------- ext/BONsMakieExt/BONsMakieExt.jl | 11 ++ ext/SDTExt.jl | 4 - src/BiodiversityObservationNetworks.jl | 107 ++++++++------------ src/geometry/bon.jl | 24 +++++ src/geometry/polygon.jl | 46 +++++++++ src/geometry/raster.jl | 46 +++++++++ src/geometry/stack.jl | 26 +++++ src/sample.jl | 30 ++---- src/samplers/grid.jl | 21 ++++ src/samplers/kmeans.jl | 35 +++++++ src/samplers/simplerandom.jl | 44 ++++++++ src/samplers/spatiallystratified.jl | 64 ++++++++++++ test/adaptivespatial.jl | 41 -------- test/balancedacceptance.jl | 38 ------- test/cubesampling.jl | 39 ------- test/entropize.jl | 18 ---- test/optimize.jl | 25 ----- test/squish.jl | 1 - test/stack.jl | 21 ---- test/uniqueness.jl | 43 -------- 40 files changed, 510 insertions(+), 364 deletions(-) create mode 100644 _OLD/src/BiodiversityObservationNetworks.jl rename {src => _OLD/src}/adaptivehotspot.jl (100%) rename {src => _OLD/src}/balancedacceptance.jl (100%) rename {src => _OLD/src}/cubesampling.jl (100%) rename {src => _OLD/src}/entropize.jl (100%) rename {src => _OLD/src}/exceptions.jl (100%) rename {src => _OLD/src}/fractaltriad.jl (100%) rename {src => _OLD/src}/grts.jl (100%) create mode 100644 _OLD/src/new_api.md rename {src => _OLD/src}/refine.jl (100%) create mode 100644 _OLD/src/sample.jl rename {src => _OLD/src}/simplerandom.jl (100%) rename {src => _OLD/src}/spatialstratified.jl (99%) rename {src => _OLD/src}/types.jl (100%) rename {src => _OLD/src}/uniqueness.jl (100%) rename {src => _OLD/src}/utils.jl (100%) rename {src => _OLD/src}/weightedbas.jl (100%) create mode 100644 ext/BONsMakieExt/BONsMakieExt.jl delete mode 100644 ext/SDTExt.jl create mode 100644 src/geometry/bon.jl create mode 100644 src/geometry/polygon.jl create mode 100644 src/geometry/raster.jl create mode 100644 src/geometry/stack.jl create mode 100644 src/samplers/grid.jl create mode 100644 src/samplers/kmeans.jl create mode 100644 src/samplers/simplerandom.jl create mode 100644 src/samplers/spatiallystratified.jl delete mode 100644 test/adaptivespatial.jl delete mode 100644 test/balancedacceptance.jl delete mode 100644 test/cubesampling.jl delete mode 100644 test/entropize.jl delete mode 100644 test/optimize.jl delete mode 100644 test/squish.jl delete mode 100644 test/stack.jl delete mode 100644 test/uniqueness.jl diff --git a/Project.toml b/Project.toml index a53faaf..4ea10cf 100644 --- a/Project.toml +++ b/Project.toml @@ -4,18 +4,22 @@ authors = ["michael catchen "] version = "0.2.1" [deps] -CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" +DelaunayTriangulation = "927a84f5-c5f4-47a5-9785-b46e178433df" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +GeoInterface = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" +GeometryOps = "3251bfac-6a57-4b6d-aa61-ac1fef2975ab" HaltonSequences = "13907d55-377f-55d6-a9d6-25ac19e11b95" HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -Requires = "ae029012-a4dd-5104-9daa-d747884805df" SimpleSDMLayers = "2c645270-77db-11e9-22c3-0f302a89c64c" SliceMap = "82cb661a-3f19-5665-9e27-df437c7e54c8" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" +SpeciesDistributionToolkit = "72b53823-5c0b-4575-ad0e-8e97227ad13b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Term = "22787eb5-b846-44ae-b979-8e399b8463ab" @@ -23,17 +27,24 @@ TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" [weakdeps] NeutralLandscapes = "71847384-8354-4223-ac08-659a5128069f" +Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" [extensions] +BONsMakieExt = "Makie" [compat] +Clustering = "0.15.7" +DelaunayTriangulation = "1.6.3" Distributions = "0.25" +GeoInterface = "1.4.0" +GeometryOps = "0.1.13" HaltonSequences = "0.2" HiGHS = "1.5" JuMP = "1.11" +MultivariateStats = "0.10.3" ProgressMeter = "1.7.2" -Requires = "1.3" SliceMap = "0.2.7" SpecialFunctions = "2.1" +SpeciesDistributionToolkit = "1.2.0" StatsBase = "0.34" julia = "1.9" diff --git a/_OLD/src/BiodiversityObservationNetworks.jl b/_OLD/src/BiodiversityObservationNetworks.jl new file mode 100644 index 0000000..ff633f0 --- /dev/null +++ b/_OLD/src/BiodiversityObservationNetworks.jl @@ -0,0 +1,68 @@ +module BiodiversityObservationNetworks + +using SimpleSDMLayers +using Distributions +using Random +using HaltonSequences +using StatsBase +using SpecialFunctions +using ProgressMeter +using SliceMap +using JuMP +using HiGHS +using LinearAlgebra +using Term +using TestItems + +include("types.jl") +export BONSampler +export Sites, numsites, pool +export LayerType, DataLayer, InclusionProbability +export Layer, Stack + +include("sample.jl") +export sample + +include("exceptions.jl") +export BONException, TooFewSites, TooManySites + +include("simplerandom.jl") +export SimpleRandom + +include("spatialstratified.jl") +export SpatiallyStratified + +include("balancedacceptance.jl") +export BalancedAcceptance + +include("weightedbas.jl") +export WeightedBalancedAcceptance + +include("adaptivehotspot.jl") +export AdaptiveHotspot + +include("cubesampling.jl") +export CubeSampling + +include("fractaltriad.jl") +export FractalTriad + +include("grts.jl") +export GeneralizedRandomTessellatedStratified + +include("uniqueness.jl") +export Uniqueness + +#include("seed.jl") +#export seed, seed! + +#include("refine.jl") +#export refine, refine! + +include("entropize.jl") +export entropize, entropize! + +include("utils.jl") +export stack + +end diff --git a/src/adaptivehotspot.jl b/_OLD/src/adaptivehotspot.jl similarity index 100% rename from src/adaptivehotspot.jl rename to _OLD/src/adaptivehotspot.jl diff --git a/src/balancedacceptance.jl b/_OLD/src/balancedacceptance.jl similarity index 100% rename from src/balancedacceptance.jl rename to _OLD/src/balancedacceptance.jl diff --git a/src/cubesampling.jl b/_OLD/src/cubesampling.jl similarity index 100% rename from src/cubesampling.jl rename to _OLD/src/cubesampling.jl diff --git a/src/entropize.jl b/_OLD/src/entropize.jl similarity index 100% rename from src/entropize.jl rename to _OLD/src/entropize.jl diff --git a/src/exceptions.jl b/_OLD/src/exceptions.jl similarity index 100% rename from src/exceptions.jl rename to _OLD/src/exceptions.jl diff --git a/src/fractaltriad.jl b/_OLD/src/fractaltriad.jl similarity index 100% rename from src/fractaltriad.jl rename to _OLD/src/fractaltriad.jl diff --git a/src/grts.jl b/_OLD/src/grts.jl similarity index 100% rename from src/grts.jl rename to _OLD/src/grts.jl diff --git a/_OLD/src/new_api.md b/_OLD/src/new_api.md new file mode 100644 index 0000000..a03e2ae --- /dev/null +++ b/_OLD/src/new_api.md @@ -0,0 +1,32 @@ + + + + + +SDMLayer +Matrix + +struct Layer{T} + layer::T +end + +struct Stack{T} + stack::Vector{Layer{T}} +end + + + +abstract type InclusionType end +struct CategoricalInclusion <: InclusionType + layer::Layer + inclusion_probability::Dict +end +struct ContinuousInclusion <: InclusionType + layer::Layer +end + + + +struct InclusionProbability{<:InclusionType} + +end diff --git a/src/refine.jl b/_OLD/src/refine.jl similarity index 100% rename from src/refine.jl rename to _OLD/src/refine.jl diff --git a/_OLD/src/sample.jl b/_OLD/src/sample.jl new file mode 100644 index 0000000..61c046e --- /dev/null +++ b/_OLD/src/sample.jl @@ -0,0 +1,26 @@ + +function sample(alg::BONSampler) + _sample!( + _allocate_sites(numsites(alg)), + _default_pool(alg), + alg + ) +end + + +function sample(alg::BONSampler, l::L) where L<:Layer + _sample!( + _allocate_sites(numsites(alg)), + l, + alg + ) +end + + +function sample(alg::BONSampler, candidates::C) where C<:Sites + _sample!( + _allocate_sites(numsites(alg)), + candidates, + alg + ) +end diff --git a/src/simplerandom.jl b/_OLD/src/simplerandom.jl similarity index 100% rename from src/simplerandom.jl rename to _OLD/src/simplerandom.jl diff --git a/src/spatialstratified.jl b/_OLD/src/spatialstratified.jl similarity index 99% rename from src/spatialstratified.jl rename to _OLD/src/spatialstratified.jl index e702978..d3c6044 100644 --- a/src/spatialstratified.jl +++ b/_OLD/src/spatialstratified.jl @@ -88,7 +88,6 @@ function _default_strata(sz) mat[begin:x, :] .= 1 mat[(x + 1):end, begin:y] .= 2 mat[(x + 1):end, (y + 1):end] .= 3 - return Layer(mat) end diff --git a/src/types.jl b/_OLD/src/types.jl similarity index 100% rename from src/types.jl rename to _OLD/src/types.jl diff --git a/src/uniqueness.jl b/_OLD/src/uniqueness.jl similarity index 100% rename from src/uniqueness.jl rename to _OLD/src/uniqueness.jl diff --git a/src/utils.jl b/_OLD/src/utils.jl similarity index 100% rename from src/utils.jl rename to _OLD/src/utils.jl diff --git a/src/weightedbas.jl b/_OLD/src/weightedbas.jl similarity index 100% rename from src/weightedbas.jl rename to _OLD/src/weightedbas.jl diff --git a/docs/make.jl b/docs/make.jl index 5609ac0..2419ecf 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -8,7 +8,7 @@ using BiodiversityObservationNetworks bib = CitationBibliography(joinpath(@__DIR__, "BONs.bib")) makedocs( - sitename = "BiodiversityObservationNetwork.jl", + sitename = "BiodiversityObservationNetworks.jl", authors = "Michael D. Catchen, Timothée Poisot, Kari Norman, Hana Mayall, Tom Malpas", modules = [BiodiversityObservationNetworks], format = DocumenterVitepress.MarkdownVitepress( diff --git a/docs/src/index.md b/docs/src/index.md index b481d11..381dd7b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -4,49 +4,7 @@ The purpose of this package is to provide a high-level, extensible, modular interface to the selection of sampling point for biodiversity processes in space. It is based around a collection of types representing point selection algorithms, used to select the most informative sampling points based on raster -data. Specifically, many algorithms work from a layer indicating *entropy* of a -model based prediction at each location. +data. !!! warning "This package is in development" The `BiodiversityObservationNetworks.jl` package is currently under development. The API is not expected to change a lot, but it may change in order to facilitate the integration of new features. - -## High-level types - -```@docs -BONSampler -BONSeeder -BONRefiner -``` - -## Seeder and refiner functions - -```@docs -seed -seed! -``` - -```@docs -refine -refine! -``` - -## Seeder algorithms - -```@docs -BalancedAcceptance -``` - -## Refiner algorithms - -```@docs -AdaptiveSpatial -Uniqueness -``` - -## Helper functions - -```@docs -squish -entropize! -entropize -``` \ No newline at end of file diff --git a/ext/BONsMakieExt/BONsMakieExt.jl b/ext/BONsMakieExt/BONsMakieExt.jl new file mode 100644 index 0000000..fae1a83 --- /dev/null +++ b/ext/BONsMakieExt/BONsMakieExt.jl @@ -0,0 +1,11 @@ +module BONsMakieExt + +@info "Loading Makie Extension for BiodiversityObservationNetworks.jl..." + +@static if isdefined(Base, :get_extension) + using Makie, BiodiversityObservationNetworks +else + using ..Makie, ..BiodiversityObservationNetworks +end + +end \ No newline at end of file diff --git a/ext/SDTExt.jl b/ext/SDTExt.jl deleted file mode 100644 index d1ce98f..0000000 --- a/ext/SDTExt.jl +++ /dev/null @@ -1,4 +0,0 @@ -module SDTExt - - -end \ No newline at end of file diff --git a/src/BiodiversityObservationNetworks.jl b/src/BiodiversityObservationNetworks.jl index ff633f0..118f9fe 100644 --- a/src/BiodiversityObservationNetworks.jl +++ b/src/BiodiversityObservationNetworks.jl @@ -1,68 +1,41 @@ module BiodiversityObservationNetworks - -using SimpleSDMLayers -using Distributions -using Random -using HaltonSequences -using StatsBase -using SpecialFunctions -using ProgressMeter -using SliceMap -using JuMP -using HiGHS -using LinearAlgebra -using Term -using TestItems - -include("types.jl") -export BONSampler -export Sites, numsites, pool -export LayerType, DataLayer, InclusionProbability -export Layer, Stack - -include("sample.jl") -export sample - -include("exceptions.jl") -export BONException, TooFewSites, TooManySites - -include("simplerandom.jl") -export SimpleRandom - -include("spatialstratified.jl") -export SpatiallyStratified - -include("balancedacceptance.jl") -export BalancedAcceptance - -include("weightedbas.jl") -export WeightedBalancedAcceptance - -include("adaptivehotspot.jl") -export AdaptiveHotspot - -include("cubesampling.jl") -export CubeSampling - -include("fractaltriad.jl") -export FractalTriad - -include("grts.jl") -export GeneralizedRandomTessellatedStratified - -include("uniqueness.jl") -export Uniqueness - -#include("seed.jl") -#export seed, seed! - -#include("refine.jl") -#export refine, refine! - -include("entropize.jl") -export entropize, entropize! - -include("utils.jl") -export stack - -end + using Clustering + using DelaunayTriangulation + using Distributions + using GeometryOps + using GeoInterface + using MultivariateStats + using SpeciesDistributionToolkit + using Random + + import GeoInterface as GI + import GeometryOps as GO + import SpeciesDistributionToolkit as SDT + import SpeciesDistributionToolkit.GeoJSON as GJSON + + export BiodiversityObservationNetwork + export Node + export Polygon + export Raster + export RasterStack + + export BONSampler + export SimpleRandom, Grid, KMeans, SpatiallyStratified + export sample + export datatype + export nonempty + export is_polygonizable, is_rasterizable, is_bonifyable + + include(joinpath("geometry", "bon.jl")) + include(joinpath("geometry", "polygon.jl")) + include(joinpath("geometry", "raster.jl")) + include(joinpath("geometry", "stack.jl")) + + include("sample.jl") + include(joinpath("samplers", "simplerandom.jl")) + include(joinpath("samplers", "grid.jl")) + include(joinpath("samplers", "kmeans.jl")) + include(joinpath("samplers", "spatiallystratified.jl")) + + +end \ No newline at end of file diff --git a/src/geometry/bon.jl b/src/geometry/bon.jl new file mode 100644 index 0000000..02dada0 --- /dev/null +++ b/src/geometry/bon.jl @@ -0,0 +1,24 @@ +const __COORDINATE_TYPES = Union{Tuple{<:Real, <:Real}, CartesianIndex} + +# TODO: extent this to be GeoInterface.PointLike compat +struct Node{C <: __COORDINATE_TYPES} + coordinate::C +end +Base.show(io::IO, node::Node) = print(io, "Node at $(node.coordinate)") +Base.getindex(node::Node, i) = getindex(node.coordinate, i) + + +is_bonifyable(::T) where T = T <: Union{<:__COORDINATE_TYPES,Vector{<:__COORDINATE_TYPES}} + + +struct BiodiversityObservationNetwork{N <: Node} + nodes::Vector{N} +end +Base.show(io::IO, bon::BiodiversityObservationNetwork) = + print(io, "BiodiversityObservationNetwork with $(length(bon)) nodes") +Base.getindex(bon::BiodiversityObservationNetwork, i::Integer) = bon.nodes[i] +Base.length(bon::BiodiversityObservationNetwork) = length(bon.nodes) +Base.iterate(bon::BiodiversityObservationNetwork, i) = iterate(bon.nodes, i) +Base.iterate(bon::BiodiversityObservationNetwork) = iterate(bon.nodes) + +Base.vcat(bons::Vector{<:BiodiversityObservationNetwork}) = BiodiversityObservationNetwork(vcat([b.nodes for b in bons]...)) diff --git a/src/geometry/polygon.jl b/src/geometry/polygon.jl new file mode 100644 index 0000000..ee32953 --- /dev/null +++ b/src/geometry/polygon.jl @@ -0,0 +1,46 @@ +struct Polygon{T, G} + geometry::G + function Polygon(::T, geom::G) where {T <: GI.AbstractTrait, G} + return new{T, G}(geom) + end +end +Base.show(io, ::Polygon) = print(io, "Polygon") +Base.show(io, vec::Vector{<:Polygon}) = print(io, "Vector of $(length(vec)) Polygons") + + +GI.isgeometry(::Polygon)::Bool = true +GI.geomtrait(::Polygon)::DataType = GI.MultiPolygonTrait +GI.ngeom(::GI.MultiPolygonTrait, geom::Polygon)::Integer = ngeom(geom.geometry) +GI.getgeom(::GI.MultiPolygonTrait, geom::Polygon, i) = getgeom(GI.MultiPolygonTrait, geom.geometry, i) +GI.extent(geom::Polygon) = GI.extent(geom.geometry) + +GO.contains(geom::Polygon, coord) = GO.contains(geom.geometry, coord) +GO.area(geom::Polygon) = GO.area(geom.geometry) + +Base.convert(::Type{Polygon}, foo) = _convert_to_bons_polygon(foo) + +const __POLYGONIZABLE_TYPES = Union{<:GJSON.FeatureCollection,<:GJSON.MultiPolygon,Vector{<:GJSON.MultiPolygon}} + +is_polygonizable(::T) where T = T <: __POLYGONIZABLE_TYPES + + +# ================================================================= +# GeoJSON handlers +# +function _convert_to_bons_polygon(geom::SDT.GeoJSON.MultiPolygon) + Polygon(GI.trait(geom), geom) +end +function _convert_to_bons_polygon(vec::Vector{<:SDT.GeoJSON.MultiPolygon}) + if length(vec) > 1 + return [_convert_to_bons_polygon(vec[i]) for i in eachindex(vec)] + else + return _convert_to_bons_polygon(first(vec)) + end +end +function _convert_to_bons_polygon(fc::SDT.GeoJSON.FeatureCollection) + try + return _convert_to_bons_polygon(fc.geometry) + catch e + @error e + end +end diff --git a/src/geometry/raster.jl b/src/geometry/raster.jl new file mode 100644 index 0000000..118f692 --- /dev/null +++ b/src/geometry/raster.jl @@ -0,0 +1,46 @@ +const __RASTER_TYPES = Union{SDMLayer, Matrix{<:Real}} + +struct Raster{R <: __RASTER_TYPES} + raster::R +end +Base.show(io::IO, r::Raster) = print(io, "Raster with dimensions $(size(r))") +Base.size(r::Raster) = size(r.raster) + +Raster(sdmlayer::SDMLayer) = Raster{typeof(sdmlayer)}(sdmlayer) + +datatype(::Raster{T}) where T = T.parameters[begin] + +Base.convert(::Type{Raster}, sdmlayer::SDMLayer) = Raster(SDMLayer) +Base.convert(::Type{Raster}, m::Matrix) = Raster(m) + +is_rasterizable(::T) where T = T <: __RASTER_TYPES + + +nonempty(r::Raster{<:SDMLayer}) = findall(r.raster.indices); +nonempty(r::Raster{<:Matrix}) = findall(x-> !isnothing(x) && !isnan(x) && !ismissing(x), r.raster) + +_get_raster_extent(raster::Raster{<:SDMLayer}) = begin + bbox = SDT.boundingbox(raster.raster) + GI.Extent(X=(bbox.left, bbox.right), Y=(bbox.bottom, bbox.top)) +end +_get_raster_extent(::Raster{<:Matrix}) = GI.Extent(X=(0,1), Y=(0,1)) # rent on the unit square is out of control + +_get_raster_crs(raster::Raster{<:SDMLayer}) = GI.crs(raster.raster) +_get_raster_crs(::Raster{<:Matrix}) = nothing + + +SDT.eastings(r::Raster{<:Matrix}) = 0:(1/size(r)[2]):1 +SDT.northings(r::Raster{<:Matrix}) = 0:(1/size(r)[1]):1 + +SDT.eastings(r::Raster{<:SDMLayer}) = eastings(r.raster) +SDT.northings(r::Raster{<:SDMLayer}) = northings(r.raster) + + +GI.isgeometry(::Raster)::Bool = true +GI.geomtrait(::Raster)::DataType = GI.RasterTrait() +GI.israster(::Raster)::Bool = true +GI.trait(::Raster) = RasterTrait() + + +GI.extent(::RasterTrait, raster::Raster)::GI.Extents.Extent = _get_raster_extent(raster) +GI.crs(::RasterTrait, raster::Raster)::GeoFormatTypes.CoordinateReferenceSystem = _get_raster_crs(raster) \ No newline at end of file diff --git a/src/geometry/stack.jl b/src/geometry/stack.jl new file mode 100644 index 0000000..521ae73 --- /dev/null +++ b/src/geometry/stack.jl @@ -0,0 +1,26 @@ +struct RasterStack{R<:Raster} + stack::Vector{R} +end +Base.show(io::IO, ls::RasterStack) = print(io, "RasterStack with $(length(ls.stack)) layers") +Base.getindex(ls::RasterStack, i::Integer)= ls.stack[i] +Base.length(layers::RasterStack) = length(layers.stack) +Base.iterate(layers::RasterStack, i) = iterate(layers.stack, i) +Base.iterate(layers::RasterStack) = iterate(layers.stack) + +function _common_mask!(sdmlayers::Vector{S}) where S<:SDMLayer + mask_grid = reduce(.&, [l.indices for l in sdmlayers]) + for layer in sdmlayers + layer.indices .= mask_grid + end +end + +function RasterStack(sdmlayers::Vector{S}) where S<:SDMLayer + _common_mask!(sdmlayers) + RasterStack(Raster.(sdmlayers)) +end + +SDT.eastings(layers::RasterStack) = eastings(first(layers)) +SDT.northings(layers::RasterStack) = northings(first(layers)) + + +features(layers::RasterStack) = findall(layers[1].raster.indices), hcat([layer.raster.grid[layer.raster.indices] for layer in layers]...)' diff --git a/src/sample.jl b/src/sample.jl index 61c046e..ff20b41 100644 --- a/src/sample.jl +++ b/src/sample.jl @@ -1,26 +1,18 @@ +abstract type BONSampler end -function sample(alg::BONSampler) - _sample!( - _allocate_sites(numsites(alg)), - _default_pool(alg), - alg - ) +function _what_did_you_pass(geom) + is_polygonizable(geom) && return Polygon + is_rasterizable(geom) && return Raster + is_bonifyable(geom) && return BiodiversityObservationNetwork end +const __BON_DOMAINS = Union{Raster, RasterStack, Polygon, Vector{<:Polygon}, BiodiversityObservationNetwork} -function sample(alg::BONSampler, l::L) where L<:Layer - _sample!( - _allocate_sites(numsites(alg)), - l, - alg - ) +function sample(sampler::BONSampler, geom::Any) + T = _what_did_you_pass(geom) + sample(sampler, Base.convert(T, geom)) end - -function sample(alg::BONSampler, candidates::C) where C<:Sites - _sample!( - _allocate_sites(numsites(alg)), - candidates, - alg - ) +function sample(sampler::BONSampler, geom::__BON_DOMAINS) + _sample(sampler, geom) end diff --git a/src/samplers/grid.jl b/src/samplers/grid.jl new file mode 100644 index 0000000..ce5bc51 --- /dev/null +++ b/src/samplers/grid.jl @@ -0,0 +1,21 @@ +Base.@kwdef struct Grid{F<:Real} <: BONSampler + longitude_spacing::F = 1. # in wgs84 coordinates + latitude_spacing::F = 1. +end + + +function _generate_grid(sampler::Grid, domain) + x, y = GeoInterface.extent(domain) + x_step, y_step = sampler.longitude_spacing, sampler.latitude_spacing + BiodiversityObservationNetwork([Node((i,j)) for i in x[1]:x_step:x[2], j in y[1]:y_step:y[2] if GeometryOps.contains(domain, (i,j))]) +end + + +function _sample(sampler::Grid, domain::T) where T + if GeoInterface.isgeometry(domain) + return _generate_grid(sampler, domain) + elseif GeoInterface.israster(domain) + return + end + @warn "Can't use Grid on a $T" +end diff --git a/src/samplers/kmeans.jl b/src/samplers/kmeans.jl new file mode 100644 index 0000000..28ad88b --- /dev/null +++ b/src/samplers/kmeans.jl @@ -0,0 +1,35 @@ +struct KMeans{I<:Integer} <: BONSampler + k::I +end + + +function _sample(::KMeans, ::T) where T + @error "Can't use KMeans on a $T" +end + +function _sample(sampler::KMeans, layers::RasterStack) + cartesian_idx_pool, X = features(layers) + clusters = Clustering.kmeans(X, sampler.k) + + min_dists = [Inf for _ in 1:sampler.k] + min_dist_idxs = [0 for _ in 1:sampler.k] + + group_id = clusters.assignments + μ = clusters.centers + for i in eachindex(group_id) + xᵢ = X[:,i] + gᵢ = group_id[i] + μᵢ = μ[:,gᵢ] + + dᵢ = sqrt(sum((xᵢ .- μᵢ).^2)) + if dᵢ < min_dists[gᵢ] + min_dist_idxs[gᵢ] = i + min_dists[gᵢ] = dᵢ + end + end + + Es, Ns = eastings(layers), northings(layers) + + coords = [(Es[I[2]], Ns[I[1]]) for I in cartesian_idx_pool[min_dist_idxs]] # Cartesian Indices are always in the order (Lat, Long) + BiodiversityObservationNetwork(Node.(coords)) +end diff --git a/src/samplers/simplerandom.jl b/src/samplers/simplerandom.jl new file mode 100644 index 0000000..cdf8cdc --- /dev/null +++ b/src/samplers/simplerandom.jl @@ -0,0 +1,44 @@ +struct SimpleRandom{I<:Integer} <: BONSampler + number_of_nodes::I +end + +function _sample(sampler::SimpleRandom, polygon::Polygon) + x, y = GI.extent(polygon) + N = sampler.number_of_nodes + + selected_points = Node[] + ct = 0 + while ct < N + candidate = (rand(Uniform(x...)), rand(Uniform(y...))) + if GeometryOps.contains(polygon, candidate) + push!(selected_points, Node(candidate)) + ct += 1 + end + end + return BiodiversityObservationNetwork(selected_points) +end + +# Sample w/o replacement from non-empty CIs, then return the long/lat associated +# with it +function _sample(sampler::SimpleRandom, raster::Raster) + cart_idxs = nonempty(raster) + + node_cidxs = Distributions.sample(cart_idxs, sampler.number_of_nodes, replace=false) + + E,N = SDT.eastings(raster), SDT.northings(raster) + + BiodiversityObservationNetwork([Node((E[I[2]], N[I[1]])) for I in node_cidxs]) +end + +_sample(sampler::SimpleRandom, rasters::RasterStack) = _sample(sampler, first(rasters)) + +function _sample(sampler::SimpleRandom, domain::Vector{<:Polygon}) + @info "You passed a Vector of Polygons." + @info "Note by default SimpleRandom applies to each Polygon separately" + @info "To use SimpleRandom on the whole extent, merge the polygons." + + vcat([_sample(sampler, p) for p in domain]) +end + + +_sample(::SimpleRandom, ::T) where T = throw(ArgumentError("Can't use SimpleRandom on a $T")) diff --git a/src/samplers/spatiallystratified.jl b/src/samplers/spatiallystratified.jl new file mode 100644 index 0000000..e7b469f --- /dev/null +++ b/src/samplers/spatiallystratified.jl @@ -0,0 +1,64 @@ +struct SpatiallyStratified{I<:Integer} <: BONSampler + number_of_nodes::I +end + +function _sample( + sampler::SpatiallyStratified, + raster::Raster; + kwargs... +) + datatype(raster) <: Integer || throw(ArgumentError("Raster containing spatial strata must be discrete (integer-valued)")) + +end + + + +function _sample( + sampler::SpatiallyStratified, + domain::Vector{<:Polygon}; + kwargs... +) + @info "By default, the number of points within each polygon stratum is proportional to the stratum's area" + + areas = GO.area.(domain) + areas ./= sum(areas) + + _sample(sampler, domain, areas; kwargs...) +end + +function _assign_fixed_inclusions(num_nodes, weights) + pts_per_statum = Int.(floor.(weights .* num_nodes)) + leftover = num_nodes - sum(pts_per_statum) + added_idx = rand(1:length(pts_per_statum), leftover) + for i in added_idx + pts_per_statum[i] += 1 + end + pts_per_statum +end + +function _sample_inclusions(num_nodes, weights) + stratum_id = rand(Categorical(weights), num_nodes) + pts_per_statum = zeros(Int, length(weights)) + for i in stratum_id + pts_per_statum[i] += 1 + end + pts_per_statum +end + +function _sample( + sampler::SpatiallyStratified, + polygons::Vector{<:Polygon}, + weights::Vector{<:Real}; + fixed_weights = true, +) + length(polygons) == length(weights) || throw(ArgumentError("Number of provided Polygons is not the same as number of provided weights")) + + N = sampler.number_of_nodes + + nodes_per_stratum = fixed_weights ? _assign_fixed_inclusions(N, weights) : _sample_inclusions(N, weights) + + vcat([_sample(SimpleRandom(nodes_per_stratum[i]), polygons[i]) for i in eachindex(polygons) if nodes_per_stratum[i] > 0]) +end + + + diff --git a/test/adaptivespatial.jl b/test/adaptivespatial.jl deleted file mode 100644 index 945ea18..0000000 --- a/test/adaptivespatial.jl +++ /dev/null @@ -1,41 +0,0 @@ -module BONTestAdaptiveSpatial - -using BiodiversityObservationNetworks -using Test - -# Must have one point or more -@test_throws ArgumentError AdaptiveSpatial(0) - -# Correct subtype -@test typeof(AdaptiveSpatial(10)) <: BONRefiner -@test typeof(AdaptiveSpatial(10)) <: BONSampler - -# Test with a random uncertainty matrix -U = rand(20, 20) -pack = seed(BalancedAcceptance(; numsites = 30), U) -c = Vector{CartesianIndex}(undef, 15) -smpl = AdaptiveSpatial(length(c)) - -# Length and element type -@test length(first(refine(pack, smpl))) == smpl.numsites -@test eltype(first(refine(first(pack), smpl, U))) == CartesianIndex - -# Test with an existing coordinates vector -@test_throws DimensionMismatch refine!( - c, - first(pack), - AdaptiveSpatial(; numsites = length(c) - 1), - last(pack), -) - -# Test the curried version -@test length(first(refine(AdaptiveSpatial(; numsites = 12))(pack...))) == 12 - -# Test the curried allocating version -@test length(first(refine!(c, AdaptiveSpatial(; numsites = length(c)))(pack...))) == - length(c) -@test_throws DimensionMismatch refine!(c, AdaptiveSpatial(; numsites = length(c) - 1))( - pack..., -) - -end \ No newline at end of file diff --git a/test/balancedacceptance.jl b/test/balancedacceptance.jl deleted file mode 100644 index e2590c4..0000000 --- a/test/balancedacceptance.jl +++ /dev/null @@ -1,38 +0,0 @@ -module BONTestBalancedAcceptance - -using BiodiversityObservationNetworks -using Test - -# Must have one point or more -@test_throws TooFewSites BalancedAcceptance(0, 1.0) - -@test_throws TooManySites seed(BalancedAcceptance(; numsites = 26), rand(5, 5)) - -# Must have a positive alpha -@test_throws ArgumentError BalancedAcceptance(1, -0.01) - -# Parametric constructor -@test typeof(BalancedAcceptance(2, 0.2f0)) == BalancedAcceptance{typeof(2), Float32} - -# Correct subtype -@test typeof(BalancedAcceptance(2, 0.2)) <: BONSeeder -@test typeof(BalancedAcceptance(2, 0.2f0)) <: BONSampler - -# Test with a random uncertainty matrix -U = rand(20, 20) -@test length(first(seed(BalancedAcceptance(; numsites = 10), U))) == 10 -@test length(first(seed(BalancedAcceptance(; numsites = 20), U))) == 20 -@test eltype(first(seed(BalancedAcceptance(; numsites = 10), U))) == CartesianIndex - -# Test with an existing coordinates vector -c = Vector{CartesianIndex}(undef, 20) -@test_throws DimensionMismatch seed!(c, BalancedAcceptance(; numsites = length(c) - 1), U) - -# Test the curried version -@test length(first(seed(BalancedAcceptance(; numsites = 12))(U))) == 12 - -# Test the curried allocating version -@test length(first(seed!(c, BalancedAcceptance(; numsites = length(c)))(U))) == length(c) -@test_throws DimensionMismatch seed!(c, BalancedAcceptance(; numsites = length(c) - 1))(U) - -end diff --git a/test/cubesampling.jl b/test/cubesampling.jl deleted file mode 100644 index a29df73..0000000 --- a/test/cubesampling.jl +++ /dev/null @@ -1,39 +0,0 @@ -module BONTestCubeSampling - -using BiodiversityObservationNetworks -using Test - -# Must have one point or more -@test_throws ArgumentError CubeSampling(numsites = 0) - -# Must select fewer points than number of candidate points -@test_throws ArgumentError CubeSampling(numsites = 20, πₖ = zeros(10), x = rand(0:4, 2, 10)) - -# Must have the same number of inclusion probabilites as auxillary variables -@test_throws DimensionMismatch CubeSampling(numsites = 5, πₖ = zeros(15), x = rand(0:4, 2, 10)) - -# Correct subtype -@test typeof(CubeSampling(numsites = 5, x = rand(0:4, 2, 10))) <: BONRefiner -@test typeof(CubeSampling(numsites = 5, x = rand(0:4, 2, 10)))<: BONSampler - -# Test with a random uncertainty matrix -N = 100 -U = rand(20, 20) -pack = seed(BalancedAcceptance(; numsites = N), U) -c = Vector{CartesianIndex}(undef, 15) -x_mat = rand(0:4, 2, N) -smpl = CubeSampling(numsites = length(c), x = x_mat) - -# Length and element type -@test length(first(refine(pack, smpl))) == smpl.numsites -@test eltype(first(refine(first(pack), smpl, U))) == CartesianIndex - -# Test with an existing coordinates vector -@test_throws DimensionMismatch refine!( - c, - first(pack), - CubeSampling(; numsites = length(c) - 1, x = x_mat), - last(pack), -) - -end \ No newline at end of file diff --git a/test/entropize.jl b/test/entropize.jl deleted file mode 100644 index 79733e0..0000000 --- a/test/entropize.jl +++ /dev/null @@ -1,18 +0,0 @@ -module BONTestEntropize - -using BiodiversityObservationNetworks -using Test - -# Test with a random uncertainty matrix -U = rand(20, 20) -E = entropize(U) - -@test minimum(E) == zero(eltype(E)) -@test maximum(E) == one(eltype(E)) - -entropize!(E, U) - -@test minimum(E) == zero(eltype(E)) -@test maximum(E) == one(eltype(E)) - -end \ No newline at end of file diff --git a/test/optimize.jl b/test/optimize.jl deleted file mode 100644 index 4501c0d..0000000 --- a/test/optimize.jl +++ /dev/null @@ -1,25 +0,0 @@ -module BONTestOptimize - using BiodiversityObservationNetworks - using Test - using Zygote - using StatsBase - - #= # empty method returns missing - @test ismissing(optimize()) - - layers = rand(50,50,5) - loss = (layers, W, α) -> StatsBase.entropy(squish(layers, W, α))/prod(size(layers[:,:,1])) - - W,α,lossvals = optimize(layers, loss) - @test typeof(W) <: Matrix - @test typeof(α) <: Vector - @test typeof(lossvals) <: Vector - - @test_throws ArgumentError optimize(layers, loss; learningrate = -0.1) - @test_throws ArgumentError optimize(layers, loss; numsteps = 0) - @test_throws ArgumentError optimize(zeros(50,50), loss; numsteps = 0) - @test_throws ArgumentError optimize(layers, 5; numsteps = 0)=# - - - -end \ No newline at end of file diff --git a/test/squish.jl b/test/squish.jl deleted file mode 100644 index 8b13789..0000000 --- a/test/squish.jl +++ /dev/null @@ -1 +0,0 @@ - diff --git a/test/stack.jl b/test/stack.jl deleted file mode 100644 index b3cae6e..0000000 --- a/test/stack.jl +++ /dev/null @@ -1,21 +0,0 @@ -module BONTestStack -using BiodiversityObservationNetworks -# using SpeciesDistributionToolkit -using Test - -#=nl = 10 -layers = [rand(50,50) for i in 1:nl] - -@test typeof(BiodiversityObservationNetworks.stack(layers)) <: Array{T,3} where T -@test size(BiodiversityObservationNetworks.stack(layers),3) == nl - -bbox = (left=-83.0, bottom=46.4, right=-55.2, top=63.7) -temp, precip, elevation = - SimpleSDMPredictor(rand(50,50); bbox...), - SimpleSDMPredictor(rand(50,50); bbox...), - SimpleSDMPredictor(rand(50,50); bbox...) - -layers = [temp,precip,elevation] -@test typeof(BiodiversityObservationNetworks.stack(layers)) <: Array{T,3} where T -=# -end diff --git a/test/uniqueness.jl b/test/uniqueness.jl deleted file mode 100644 index 4f4a0a3..0000000 --- a/test/uniqueness.jl +++ /dev/null @@ -1,43 +0,0 @@ -module BONTestUniqueness - -using BiodiversityObservationNetworks -using Test - -# Must have one point or more -@test_throws ArgumentError Uniqueness(0, zeros(5, 5, 5)) -@test_throws ArgumentError Uniqueness(0, zeros(5, 5)) - -# Parametric constructor -@test typeof(Uniqueness(Int32(2), zeros(Float32, 5, 5, 5))) == Uniqueness{Int32,Float32} - -# Correct subtype -@test typeof(Uniqueness(2, zeros(5, 5, 5))) <: BONRefiner -@test typeof(Uniqueness(2, zeros(5, 5, 5))) <: BONSampler - -# Test with a random uncertainty matrix -layers = rand(20, 20, 5) -np = 25 -pack = seed(BalancedAcceptance(; numsites=np), rand(20, 20)) - -@test length(first(refine(pack, Uniqueness(; numsites=10, layers=layers)))) == 10 -@test length(first(refine(pack, Uniqueness(; numsites=20, layers=layers)))) == 20 -@test eltype(first(refine(pack, Uniqueness(; numsites=10, layers=layers)))) == CartesianIndex - -# Test with an existing coordinates vector -c = Vector{CartesianIndex}(undef, np) -@test_throws DimensionMismatch refine!( - c, - first(pack), - Uniqueness(; numsites=length(c) - 1, layers=layers), - last(pack), -) -# Test the curried allocating version -@test length(first(refine(Uniqueness(; numsites=12, layers=layers))(pack...))) == 12 - -# Test the curried allocating version -@test length(first(refine!(c, Uniqueness(; numsites=length(c), layers=layers))(pack...))) == - length(c) -@test_throws DimensionMismatch refine!(c, Uniqueness(; numsites=length(c) - 1, layers=layers))( - pack..., -) -end From dc2c4414d966b47c53fbed3112fed76de78f6455 Mon Sep 17 00:00:00 2001 From: "michael d. catchen" Date: Fri, 20 Dec 2024 14:19:22 -0500 Subject: [PATCH 02/10] methods to add --- _OLD/src/BiodiversityObservationNetworks.jl | 68 ---------- _OLD/src/entropize.jl | 38 ------ _OLD/src/exceptions.jl | 29 ----- _OLD/src/new_api.md | 32 ----- _OLD/src/refine.jl | 72 ----------- _OLD/src/sample.jl | 26 ---- _OLD/src/simplerandom.jl | 66 ---------- _OLD/src/spatialstratified.jl | 120 ------------------ _OLD/src/types.jl | 64 ---------- _OLD/src/uniqueness.jl | 83 ------------ _OLD/src/utils.jl | 13 -- _OLD/src/weightedbas.jl | 101 --------------- {_OLD/src => src/samplers}/adaptivehotspot.jl | 2 + .../samplers}/balancedacceptance.jl | 3 + .../cubesampling.jl => src/samplers/cube.jl | 3 + {_OLD/src => src/samplers}/fractaltriad.jl | 3 + {_OLD/src => src/samplers}/grts.jl | 3 + 17 files changed, 14 insertions(+), 712 deletions(-) delete mode 100644 _OLD/src/BiodiversityObservationNetworks.jl delete mode 100644 _OLD/src/entropize.jl delete mode 100644 _OLD/src/exceptions.jl delete mode 100644 _OLD/src/new_api.md delete mode 100644 _OLD/src/refine.jl delete mode 100644 _OLD/src/sample.jl delete mode 100644 _OLD/src/simplerandom.jl delete mode 100644 _OLD/src/spatialstratified.jl delete mode 100644 _OLD/src/types.jl delete mode 100644 _OLD/src/uniqueness.jl delete mode 100644 _OLD/src/utils.jl delete mode 100644 _OLD/src/weightedbas.jl rename {_OLD/src => src/samplers}/adaptivehotspot.jl (99%) rename {_OLD/src => src/samplers}/balancedacceptance.jl (99%) rename _OLD/src/cubesampling.jl => src/samplers/cube.jl (99%) rename {_OLD/src => src/samplers}/fractaltriad.jl (99%) rename {_OLD/src => src/samplers}/grts.jl (99%) diff --git a/_OLD/src/BiodiversityObservationNetworks.jl b/_OLD/src/BiodiversityObservationNetworks.jl deleted file mode 100644 index ff633f0..0000000 --- a/_OLD/src/BiodiversityObservationNetworks.jl +++ /dev/null @@ -1,68 +0,0 @@ -module BiodiversityObservationNetworks - -using SimpleSDMLayers -using Distributions -using Random -using HaltonSequences -using StatsBase -using SpecialFunctions -using ProgressMeter -using SliceMap -using JuMP -using HiGHS -using LinearAlgebra -using Term -using TestItems - -include("types.jl") -export BONSampler -export Sites, numsites, pool -export LayerType, DataLayer, InclusionProbability -export Layer, Stack - -include("sample.jl") -export sample - -include("exceptions.jl") -export BONException, TooFewSites, TooManySites - -include("simplerandom.jl") -export SimpleRandom - -include("spatialstratified.jl") -export SpatiallyStratified - -include("balancedacceptance.jl") -export BalancedAcceptance - -include("weightedbas.jl") -export WeightedBalancedAcceptance - -include("adaptivehotspot.jl") -export AdaptiveHotspot - -include("cubesampling.jl") -export CubeSampling - -include("fractaltriad.jl") -export FractalTriad - -include("grts.jl") -export GeneralizedRandomTessellatedStratified - -include("uniqueness.jl") -export Uniqueness - -#include("seed.jl") -#export seed, seed! - -#include("refine.jl") -#export refine, refine! - -include("entropize.jl") -export entropize, entropize! - -include("utils.jl") -export stack - -end diff --git a/_OLD/src/entropize.jl b/_OLD/src/entropize.jl deleted file mode 100644 index 413f24f..0000000 --- a/_OLD/src/entropize.jl +++ /dev/null @@ -1,38 +0,0 @@ -""" - entropize!(U::Matrix{AbstractFloat}, A::Matrix{Number}) - -This function turns a matrix `A` (storing measurement values) into pixel-wise -entropy values, stored in a matrix `U` (that is previously allocated). - -Pixel-wise entropy is determined by measuring the empirical probability of -randomly picking a value in the matrix that is either lower or higher than the -pixel value. The entropy of both these probabilities are calculated using the --p×log(2,p) formula. The entropy of the pixel is the *sum* of the two entropies, -so that it is close to 1 for values close to the median, and close to 0 for -values close to the extreme of the distribution. -""" -function entropize!(U::Matrix{RT}, A::Matrix{T}) where {RT <: AbstractFloat, T <: Number} - for i in findall(!isnan, A) - p_high = mean(skipmissing(A .< A[i])) - p_low = mean(skipmissing(A .>= A[i])) - e_high = p_high .* log2(p_high) - e_low = p_low .* log2(p_low) - U[i] = -e_high .- e_low - end - U[findall(isnan, U)] .= zero(eltype(U)) - m = maximum(U) - for i in eachindex(U) - U[i] /= m - end - return U -end - -""" - entropize(A::Matrix{Number}) - -Allocation version of `entropize!`. -""" -function entropize(A::Matrix{T}) where {T <: Number} - U = zeros(size(A)) - return entropize!(U, A) -end \ No newline at end of file diff --git a/_OLD/src/exceptions.jl b/_OLD/src/exceptions.jl deleted file mode 100644 index 5894a0f..0000000 --- a/_OLD/src/exceptions.jl +++ /dev/null @@ -1,29 +0,0 @@ -abstract type BONException <: Base.Exception end -abstract type SeederException <: BONException end - -Base.showerror(io::IO, e::E) where {E <: BONException} = - tprint( - io, - "{bold red}$(supertype(E)){/bold red} | {bold yellow}$(e.message){/bold yellow}\n", - ) - -function _check_arguments(sampler::BONSampler) - sampler.numsites > 1 || throw(TooFewSites()) - return nothing -end - -@kwdef struct TooFewSites <: BONException - message = "Number of sites to select must be at least two." -end -function check(::Type{TooFewSites}, sampler) - sampler.numsites > 1 || throw(TooFewSites()) - return nothing -end - -@kwdef struct TooManySites <: BONException - message = "Cannot select more sites than there are candidates." -end -function check(::Type{TooManySites}, sampler) - sampler.numsites <= maxsites(sampler) || throw(TooManySites()) - return nothing -end diff --git a/_OLD/src/new_api.md b/_OLD/src/new_api.md deleted file mode 100644 index a03e2ae..0000000 --- a/_OLD/src/new_api.md +++ /dev/null @@ -1,32 +0,0 @@ - - - - - -SDMLayer -Matrix - -struct Layer{T} - layer::T -end - -struct Stack{T} - stack::Vector{Layer{T}} -end - - - -abstract type InclusionType end -struct CategoricalInclusion <: InclusionType - layer::Layer - inclusion_probability::Dict -end -struct ContinuousInclusion <: InclusionType - layer::Layer -end - - - -struct InclusionProbability{<:InclusionType} - -end diff --git a/_OLD/src/refine.jl b/_OLD/src/refine.jl deleted file mode 100644 index cb23c96..0000000 --- a/_OLD/src/refine.jl +++ /dev/null @@ -1,72 +0,0 @@ -# No longer used - - -""" - refine!(cooords::Vector{CartesianIndex}, pool::Vector{CartesianIndex}, sampler::ST) - -Refines a set of candidate sampling locations in the preallocated vector `coords` -from a vector of coordinates `pool` using `sampler`, where `sampler` is a [`BONRefiner`](@ref). -""" -function refine!( - coords::Vector{CartesianIndex}, - pool::Vector{CartesianIndex}, - sampler::ST, -) where {ST <: BONRefiner} - if length(coords) != sampler.numsites - throw( - DimensionMismatch( - "The length of the coordinate vector must match the `numsites` fields of the sampler", - ), - ) - end - if length(coords) > length(pool) - throw( - DimensionMismatch( - "The number of refined points must be at least the number of seeded points", - ), - ) - end - return _generate!(coords, copy(pool), sampler) -end - -""" - refine!(cooords::Vector{CartesianIndex}, pool::Vector{CartesianIndex}, sampler::ST) - -The curried version of `refine!`, which returns a function that acts on the input -coordinate pool passed to the curried function (`p` below). -""" -function refine!(coords::Vector{CartesianIndex}, sampler::ST) where {ST <: BONRefiner} - if length(coords) != sampler.numsites - throw( - DimensionMismatch( - "The length of the coordinate vector must match the `numsites` fields of the sampler", - ), - ) - end - return (p) -> refine!(coords, copy(p), sampler) -end - -""" - refine(pool::Vector{CartesianIndex}, sampler::ST) - -Refines a set of candidate sampling locations and returns a vector `coords` of length numsites -from a vector of coordinates `pool` using `sampler`, where `sampler` is a [`BONRefiner`](@ref). -""" -function refine( - pool::Vector{CartesianIndex}, - sampler::ST, -) where {ST <: BONRefiner} - coords = Vector{CartesianIndex}(undef, sampler.numsites) - return refine!(coords, copy(pool), sampler) -end - -""" - refine(sampler::BONRefiner) - -Returns a curried function of `refine` -""" -function refine(sampler::ST) where {ST <: BONRefiner} - coords = Vector{CartesianIndex}(undef, sampler.numsites) - _inner(p) = refine!(coords, first(p), sampler) - return _inner -end diff --git a/_OLD/src/sample.jl b/_OLD/src/sample.jl deleted file mode 100644 index 61c046e..0000000 --- a/_OLD/src/sample.jl +++ /dev/null @@ -1,26 +0,0 @@ - -function sample(alg::BONSampler) - _sample!( - _allocate_sites(numsites(alg)), - _default_pool(alg), - alg - ) -end - - -function sample(alg::BONSampler, l::L) where L<:Layer - _sample!( - _allocate_sites(numsites(alg)), - l, - alg - ) -end - - -function sample(alg::BONSampler, candidates::C) where C<:Sites - _sample!( - _allocate_sites(numsites(alg)), - candidates, - alg - ) -end diff --git a/_OLD/src/simplerandom.jl b/_OLD/src/simplerandom.jl deleted file mode 100644 index ce853db..0000000 --- a/_OLD/src/simplerandom.jl +++ /dev/null @@ -1,66 +0,0 @@ -""" - SimpleRandom - -Implements Simple Random spatial sampling (each location has equal probability of selection) -""" -Base.@kwdef struct SimpleRandom{I<:Integer} <: BONSampler - numsites::I = 30 - function SimpleRandom(numsites::I) where{I<:Integer} - srs = new{typeof(numsites)}(numsites) - check_arguments(srs) - return srs - end -end - -function check_arguments(srs::SRS) where {SRS <: SimpleRandom} - check(TooFewSites, srs) - return nothing -end - -_default_pool(::SimpleRandom) = pool((50,50)) - -function sample(::SimpleRandom, ::Layer) -end -function sample(::SimpleRandom, ::Matrix) -end -function sample(sr::SimpleRandom, T::Tuple{I,I}) where I <: Integer - -end - - - - -function _sample!( - selections::S, - candidates::C, - sampler::SimpleRandom{I}, -) where {S<:Sites,C<:Sites,I} - _coords = Distributions.sample(candidates.coordinates, sampler.numsites; replace = false) - for (i,c) in enumerate(_coords) - selections[i] = c - end - return selections -end - -# ==================================================== -# -# Tests -# -# ===================================================== - -@testitem "SimpleRandom constructor with no arguments works" begin - sr = SimpleRandom() - @test typeof(sr) <: SimpleRandom -end - -@testitem "SimpleRandom must have more than one point" begin - @test_throws TooFewSites SimpleRandom(numsites = -1) - @test_throws TooFewSites SimpleRandom(numsites = 0) - @test_throws TooFewSites SimpleRandom(numsites = 1) -end - -@testitem "SimpleRandom allows keyword arguments for number of points" begin - N = 314 - srs = SimpleRandom(; numsites = N) - @test srs.numsites == N -end diff --git a/_OLD/src/spatialstratified.jl b/_OLD/src/spatialstratified.jl deleted file mode 100644 index d3c6044..0000000 --- a/_OLD/src/spatialstratified.jl +++ /dev/null @@ -1,120 +0,0 @@ -""" - SpatiallyStratified -""" -@kwdef struct SpatiallyStratified{I<:Integer,L<:Layer,F<:AbstractFloat,N} <: BONSampler - numsites::I = 50 - layer::L = _default_strata((50, 50)) - category_dict::Dict{I,N} = _default_categories() - inclusion_dict::Dict{N,F} = _default_inclusion() - function SpatiallyStratified( - numsites::I, - layer::L, - category_dict::Dict{I,N}, - inclusion_dict::Dict{N,F} - ) where {I<:Integer,L<:Layer,F<:AbstractFloat,N} - ss = new{I,L,F,N}( - numsites, - layer, - category_dict, - inclusion_dict - ) - check_arguments(ss) - return ss - end -end - - -function check_arguments(ss::SpatiallyStratified) - check(TooFewSites, ss) - - length(ss.category_dict) == length(ss.inclusion_dict) || throw( - ArgumentError( - "Inclusion probability vector does not have the same number of strata as there are unique values in the strata matrix", - ), - ) - - return sum(values(ss.inclusion_dict)) ≈ 1.0 || - throw(ArgumentError("Inclusion probabilities across all strata do not sum to 1.")) -end - - -function _sample!( - coords::S, - candidates::C, - sampler::SpatiallyStratified{I,L,F,N}, -) where {S<:Sites,C<:Sites,I,L,F,N} - n = sampler.numsites - strata = sampler.layer - categories = sampler.category_dict - - category_ids = sort(collect(keys(categories))) - candidate_ids = [strata.layer[x] for x in coordinates(candidates)] - - cat_idx = Dict() - inclusion_probs = F[] - for k in category_ids - cat_idx[k] = findall(isequal(k), candidate_ids) - if length(cat_idx[k]) > 0 - push!(inclusion_probs, sampler.inclusion_dict[categories[k]]) - else - push!(inclusion_probs, 0) - end - end - - inclusion_probs ./= sum(inclusion_probs) - - # check if there are empty categories, if so set incl prob to 0 and - # renormalize? - selected_cats = rand(Categorical(inclusion_probs), n) - for (i,c) in enumerate(selected_cats) - if length(cat_idx[c]) > 0 - coords[i] = candidates[rand(cat_idx[c])] - end - end - return coords -end - -# Utils -_default_pool(::SpatiallyStratified) = pool(_default_strata((50,50))) -_default_categories() = Dict{Int,String}(1=>"A", 2=>"B", 3=>"C") -_default_inclusion() = Dict{String,Float64}("A"=>0.5, "B"=>0.3, "C"=>0.2) - -function _default_strata(sz) - mat = zeros(typeof(sz[1]), sz...) - - x = sz[1] ÷ 2 - y = sz[2] ÷ 3 - - mat[begin:x, :] .= 1 - mat[(x + 1):end, begin:y] .= 2 - mat[(x + 1):end, (y + 1):end] .= 3 - return Layer(mat) -end - -# ==================================================== -# -# Tests -# -# ===================================================== - -@testitem "SpatiallyStratified default constructor works" begin - @test typeof(SpatiallyStratified()) <: SpatiallyStratified -end - -@testitem "SpatiallyStratified with default arguments can generate points" begin - ss = SpatiallyStratified() - coords = sample(ss) - @test typeof(coords) <: Sites -end - -@testitem "SpatiallyStratified throws error when number of sites is below 2" begin - @test_throws TooFewSites SpatiallyStratified(numsites = -1) - @test_throws TooFewSites SpatiallyStratified(numsites = 0) - @test_throws TooFewSites SpatiallyStratified(numsites = 1) -end - -@testitem "SpatiallyStratified can use custom number of points as keyword argument" begin - NUM_POINTS = 42 - ss = SpatiallyStratified(; numsites = NUM_POINTS) - @test ss.numsites == NUM_POINTS -end diff --git a/_OLD/src/types.jl b/_OLD/src/types.jl deleted file mode 100644 index 358594a..0000000 --- a/_OLD/src/types.jl +++ /dev/null @@ -1,64 +0,0 @@ -""" - abstract type BONSampler end - -A `BONSampler` is any algorithm for proposing a set of sampling locations. -""" -abstract type BONSampler end - -numsites(s::BONSampler) = s.numsites - -""" - mutable struct Sites{T} - -""" -mutable struct Sites{T} - coordinates::Vector{T} -end -_allocate_sites(n) = Sites(Array{CartesianIndex}(undef, n)) -coordinates(s::Sites) = s.coordinates -Base.getindex(s::Sites, i::Integer) = getindex(coordinates(s), i) -Base.getindex(s::Sites, i::UnitRange) = getindex(coordinates(s), i) - -Base.setindex!(s::Sites, v, i::Integer) = setindex!(coordinates(s), v,i) -Base.length(s::Sites) = length(coordinates(s)) -Base.eachindex(s::Sites) = eachindex(s.coordinates) - -abstract type LayerType end - -abstract type DataLayer <: LayerType end -struct CategoricalData <: DataLayer end -struct ContinousData <: DataLayer end - -abstract type InclusionProbability <: LayerType end -abstract type ResultLayer <: LayerType end - - -# Distribution over categorical values at each pixel -# Scalar at each pixel -struct CategoricalResult <: LayerType end -struct ContinuousResult <: LayerType end - -# Layer -struct Layer{T<:LayerType,L} - layer::L -end -Base.size(l::Layer) = size(l.layer) -function Layer(l::S; categorical = false) where S<:SimpleSDMLayers.SDMLayer - T = categorical ? CategoricalData : ContinousData - Layer{T,S}(l) -end -Layer(m::Matrix{I}) where I<:Integer = Layer{CategoricalData,typeof(m)}(m) -Layer(m::Matrix{R}) where R<:Real = Layer{ContinousData,typeof(m)}(m) - -_indices(m::M) where M<:Matrix = findall(i->!isnothing(m[i]) && !isnan(m[i]) && !ismissing(m[i]), CartesianIndices(m)) -_indices(l::SDMLayer) = findall(l.indices) - -# Pool method -pool(l::Layer) = Sites(vec(_indices(l.layer))) -pool(dims::Tuple{I,I}) where I<:Integer = Sites(vec(collect(CartesianIndices(dims)))) - - -# Layer set w/ names -struct Stack{T<:LayerType,N,L} - layers::Dict{N,Layer{T,L}} -end diff --git a/_OLD/src/uniqueness.jl b/_OLD/src/uniqueness.jl deleted file mode 100644 index 9b5033d..0000000 --- a/_OLD/src/uniqueness.jl +++ /dev/null @@ -1,83 +0,0 @@ -""" - Uniqueness - -A `BONSampler` -""" -Base.@kwdef struct Uniqueness{I <: Integer, T <: AbstractFloat} <: BONSampler - numsites::I = 30 - layers::Array{T, 3} = rand(50, 50, 3) - function Uniqueness(numsites, layers) - uniq = new{typeof(numsites), typeof(layers[begin])}(numsites, layers) - check_arguments(uniq) - return uniq - end -end - -maxsites(uniq::Uniqueness) = prod(size(uniq.layers)[1:2]) - -function check_arguments(uniq::Uniqueness) - check(TooFewSites, uniq) - check(TooManySites, uniq) - - length(size(uniq.layers)) == 3 || - throw( - ArgumentError( - "You cannot have a Uniqueness sampler without layers passed as a 3-dimenisional array, where the first two axes are latitude and longitude.", - ), - ) - - return nothing -end - -function _generate!( - coords::Vector{CartesianIndex}, - pool::Vector{CartesianIndex}, - sampler::Uniqueness, -) - layers = sampler.layers - ndims(layers) <= 2 && - throw(ArgumentError("Uniqueness needs more than one layer to work.")) - size(uncertainty) != (size(layers, 1), size(layers, 2)) && - throw(DimensionMismatch("Layers are not the same dimension as uncertainty")) - - total_covariance = zeros(length(pool)) - for (i, p1) in enumerate(pool) - v1 = layers[p1[1], p1[2], :] - for (j, p2) in enumerate(pool) - v2 = layers[p2[1], p2[2], :] - if p1 != p2 - total_covariance[i] += abs(cov(v1, v2)) - end - end - end - - np = sampler.numsites - sortedvals = sortperm(vec(total_covariance)) - - coords[:] .= pool[sortedvals[1:np]] - return coords -end - -# ==================================================== -# -# Tests -# -# ===================================================== - -@testitem "Uniqueness default constructor works" begin - @test typeof(Uniqueness()) <: Uniqueness -end - -@testitem "Uniqueness requires more than one point" begin - @test_throws TooFewSites Uniqueness(numsites = -1) - @test_throws TooFewSites Uniqueness(numsites = 0) - @test_throws TooFewSites Uniqueness(numsites = 1) -end - -@testitem "Uniqueness throws error if more points are requested than are possible" begin - @test_throws TooManySites Uniqueness(numsites = 26, layers = rand(5, 5, 2)) -end - -@testitem "Uniqueness works with positional constructor" begin - @test typeof(Uniqueness(2, rand(5, 5, 5))) <: Uniqueness -end diff --git a/_OLD/src/utils.jl b/_OLD/src/utils.jl deleted file mode 100644 index d856673..0000000 --- a/_OLD/src/utils.jl +++ /dev/null @@ -1,13 +0,0 @@ -function stack(layers::Vector{<:AbstractMatrix}) - # assert all layers are the same size if we add this function to BONs.jl - - mat = zeros(size(first(layers))..., length(layers)) - for (l, layer) in enumerate(layers) - nonnanvals = vec(layer[findall(!isnan, layer)]) - thismin, thismax = findmin(nonnanvals)[1], findmax(nonnanvals)[1] - - mat[:, :, l] .= - broadcast(x -> isnan(x) ? NaN : (x - thismin) / (thismax - thismin), layer) - end - return mat -end diff --git a/_OLD/src/weightedbas.jl b/_OLD/src/weightedbas.jl deleted file mode 100644 index 8c33fe4..0000000 --- a/_OLD/src/weightedbas.jl +++ /dev/null @@ -1,101 +0,0 @@ -""" - WeightedBalancedAcceptance - -A `BONSeeder` that uses Balanced-Acceptance Sampling [@cite] combined with rejection sampling to create a set of sampling sites that is weighted toward values with higher uncertainty as a function of the parameter α. -""" -Base.@kwdef struct WeightedBalancedAcceptance{I <: Integer, F <: Real} <: BONSampler - numsites::I = 30 - α::F = 1.0 - function WeightedBalancedAcceptance(numsites, α) - wbas = new{typeof(numsites), typeof(α)}(numsites, α) - check_arguments(wbas) - return wbas - end -end - - -function check_arguments(wbas::WeightedBalancedAcceptance) - check(TooFewSites, wbas) - wbas.α > 0 || - throw( - ArgumentError("WeightedBalancedAcceptance requires α to be greater than 0 "), - ) - return nothing -end - -function _sample!( - coords::S, - candidates::C, - sampler::WeightedBalancedAcceptance{I, T}, - weights::L -) where {S<:Sites,C<:Sites,I <: Integer, T <: AbstractFloat,L<:Layer} - seed = rand(I.(1e0:1e7), 2) - α = sampler.α - x, y = size(weights) - - nonnan_indices = findall(!isnan, weights) - stduncert = similar(weights) - - uncert_values = weights[nonnan_indices] - stduncert_values = similar(uncert_values) - zfit = nothing - if var(uncert_values) > 0 - zfit = StatsBase.fit(ZScoreTransform, uncert_values) - stduncert_values = StatsBase.transform(zfit, uncert_values) - end - - nonnan_counter = 1 - for i in eachindex(weights) - if isnan(weights[i]) - stduncert[i] = NaN - elseif !isnothing(zfit) - stduncert[i] = stduncert_values[nonnan_counter] - nonnan_counter += 1 - else - stduncert[i] = 1.0 - end - end - - reluncert = broadcast(x -> isnan(x) ? NaN : exp(α * x) / (1 + exp(α * x)), stduncert) - ptct = 1 - addedpts = 1 - while addedpts <= length(coords) - i, j = haltonvalue(seed[1] + ptct, 2), haltonvalue(seed[2] + ptct, 3) - candcoord = CartesianIndex(convert.(Int32, [ceil(x * i), ceil(y * j)])...) - prob = reluncert[candcoord] - if !isnan(prob) && rand() < prob - coords[addedpts] = candcoord - addedpts += 1 - end - ptct += 1 - end - - return coords -end - -# ==================================================== -# -# Tests -# -# ===================================================== - -@testitem "WeightedBalancedAcceptance default constructor works" begin - @test typeof(WeightedBalancedAcceptance()) <: WeightedBalancedAcceptance -end - -@testitem "WeightedBalancedAcceptance requires positive number of sites" begin - @test_throws TooFewSites WeightedBalancedAcceptance(numsites = 0) - @test_throws TooFewSites WeightedBalancedAcceptance(numsites = 1) -end - -@testitem "WeightedBalancedAcceptance can take bias parameter α as keyword argument" begin - α = 3.14159 - wbas = WeightedBalancedAcceptance(; α = α) - @test wbas.α == α -end - -@testitem "WeightedBalancedAcceptance can take number of points as keyword argument" begin - N = 40 - wbas = WeightedBalancedAcceptance(; numsites = N) - @test wbas.numsites == N -end diff --git a/_OLD/src/adaptivehotspot.jl b/src/samplers/adaptivehotspot.jl similarity index 99% rename from _OLD/src/adaptivehotspot.jl rename to src/samplers/adaptivehotspot.jl index c3c9afb..0e067cd 100644 --- a/_OLD/src/adaptivehotspot.jl +++ b/src/samplers/adaptivehotspot.jl @@ -1,3 +1,4 @@ +#= """ AdaptiveHotspot @@ -96,3 +97,4 @@ end @test_throws TooFewSites AdaptiveHotspot(numsites = 0) @test_throws TooFewSites AdaptiveHotspot(numsites = -1) end +=# \ No newline at end of file diff --git a/_OLD/src/balancedacceptance.jl b/src/samplers/balancedacceptance.jl similarity index 99% rename from _OLD/src/balancedacceptance.jl rename to src/samplers/balancedacceptance.jl index 42386f6..706e277 100644 --- a/_OLD/src/balancedacceptance.jl +++ b/src/samplers/balancedacceptance.jl @@ -1,3 +1,4 @@ +#= """ BalancedAcceptance @@ -93,3 +94,5 @@ end bas = BalancedAcceptance(; numsites = N) @test bas.numsites == N end + +=# \ No newline at end of file diff --git a/_OLD/src/cubesampling.jl b/src/samplers/cube.jl similarity index 99% rename from _OLD/src/cubesampling.jl rename to src/samplers/cube.jl index 91a591a..a284092 100644 --- a/_OLD/src/cubesampling.jl +++ b/src/samplers/cube.jl @@ -1,3 +1,4 @@ +#= """ CubeSampling @@ -451,3 +452,5 @@ end @test_throws TooFewSites CubeSampling(numsites = 0) @test_throws TooFewSites CubeSampling(numsites = 1) end + +=# \ No newline at end of file diff --git a/_OLD/src/fractaltriad.jl b/src/samplers/fractaltriad.jl similarity index 99% rename from _OLD/src/fractaltriad.jl rename to src/samplers/fractaltriad.jl index abc8172..792e1b7 100644 --- a/_OLD/src/fractaltriad.jl +++ b/src/samplers/fractaltriad.jl @@ -1,3 +1,4 @@ +#= """ FractalTriad @@ -187,3 +188,5 @@ end @test_throws TooFewSites FractalTriad(; numsites = -1) @test_throws TooFewSites FractalTriad(; numsites = 0) end + +=# \ No newline at end of file diff --git a/_OLD/src/grts.jl b/src/samplers/grts.jl similarity index 99% rename from _OLD/src/grts.jl rename to src/samplers/grts.jl index 9551aa9..dcac9ae 100644 --- a/_OLD/src/grts.jl +++ b/src/samplers/grts.jl @@ -1,3 +1,4 @@ +#= """ GeneralizedRandomTessellatedStratified @@ -70,3 +71,5 @@ function _generate!( CartesianIndices(code_numbers)[sort_idx], )[1:(grts.numsites)] end + +=# \ No newline at end of file From 1ff6e763b51feb670db8bc4d8f2fa294c2a17bd3 Mon Sep 17 00:00:00 2001 From: "michael d. catchen" Date: Fri, 20 Dec 2024 14:20:24 -0500 Subject: [PATCH 03/10] scratch dir for testing --- _SCRATCH/Project.toml | 4 ++++ _SCRATCH/scratchpad.jl | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 _SCRATCH/Project.toml create mode 100644 _SCRATCH/scratchpad.jl diff --git a/_SCRATCH/Project.toml b/_SCRATCH/Project.toml new file mode 100644 index 0000000..36411d2 --- /dev/null +++ b/_SCRATCH/Project.toml @@ -0,0 +1,4 @@ +[deps] +BiodiversityObservationNetworks = "a5b868d3-191d-4bba-a38a-ad28190da010" +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6" diff --git a/_SCRATCH/scratchpad.jl b/_SCRATCH/scratchpad.jl new file mode 100644 index 0000000..f7e9b93 --- /dev/null +++ b/_SCRATCH/scratchpad.jl @@ -0,0 +1,33 @@ +using Pkg +Pkg.activate(@__DIR__) + +using BiodiversityObservationNetworks +using CairoMakie, GeoMakie + +import BiodiversityObservationNetworks as BONs +import BiodiversityObservationNetworks.SpeciesDistributionToolkit as SDT +import BiodiversityObservationNetworks.GeoInterface as GI +import BiodiversityObservationNetworks.GeometryOps as GO + + +col = SDT.gadm("COL") +col_states = SDT.gadm("COL", 1) + +bioclim = RasterStack([SDT.mask!(SDT.SDMLayer(SDT.RasterData(SDT.WorldClim2, SDT.BioClim); layer=i, SDT.boundingbox(col)...), col) for i in 1:19]) + +bon = sample(SimpleRandom(50), bioclim) +bon = sample(SimpleRandom(10), col_states) +bon = sample(SpatiallyStratified(100), col_states) +bon = BONs.sample(Grid(), col) +bon = BONs.sample(KMeans(75), bioclim) + +begin + f = Figure(size=(900,900)) + ga = GeoAxis(f[1,1]) + for (i,st) in enumerate(convert(Polygon, col_states)) + poly!(ga, st.geometry, strokewidth=2, color=Symbol("grey", string(20+2*i))) + scatter!(ga, [node[1] for node in bon], [node[2] for node in bon], color=(:red)) + end + f +end + From a888d027f0130595e5a1f16e5788d766b5566997 Mon Sep 17 00:00:00 2001 From: "michael d. catchen" Date: Fri, 20 Dec 2024 15:13:55 -0500 Subject: [PATCH 04/10] :pencil: type docs --- docs/src/index.md | 5 ++ docs/src/vignettes/entropize.md | 34 ------------- docs/src/vignettes/overview.md | 79 ----------------------------- docs/src/vignettes/uniqueness.md | 54 -------------------- src/geometry/bon.jl | 28 +++++++++- src/geometry/polygon.jl | 6 +++ src/geometry/raster.jl | 16 ++++-- src/geometry/stack.jl | 6 +++ src/sample.jl | 6 +++ src/samplers/grid.jl | 13 ++++- src/samplers/kmeans.jl | 12 ++++- src/samplers/simplerandom.jl | 7 +++ src/samplers/spatiallystratified.jl | 5 ++ 13 files changed, 95 insertions(+), 176 deletions(-) delete mode 100644 docs/src/vignettes/entropize.md delete mode 100644 docs/src/vignettes/overview.md delete mode 100644 docs/src/vignettes/uniqueness.md diff --git a/docs/src/index.md b/docs/src/index.md index 381dd7b..7b04028 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -8,3 +8,8 @@ data. !!! warning "This package is in development" The `BiodiversityObservationNetworks.jl` package is currently under development. The API is not expected to change a lot, but it may change in order to facilitate the integration of new features. + +```@autodocs +Modules = [BiodiversityObservationNetworks] +Order = [:type, :function] +``` \ No newline at end of file diff --git a/docs/src/vignettes/entropize.md b/docs/src/vignettes/entropize.md deleted file mode 100644 index 6734c88..0000000 --- a/docs/src/vignettes/entropize.md +++ /dev/null @@ -1,34 +0,0 @@ -# Getting the entropy matrix - -For some applications, we want to place points to capture the maximum amount of -information, which is to say that we want to sample a balance of *entropy* -values, as opposed to *absolute* values. In this vignette, we will walk through -an example using the `entropize` function to convert raw data to entropy values. - - -```@example 1 -using BiodiversityObservationNetworks -using NeutralLandscapes -using CairoMakie -``` - -!!! warning "Entropy is problem-specific" - The solution presented in this vignette is a least-assumption solution based - on the empirical values given in a matrix of measurements. In a lot of - situations, this is not the entropy that you want. For example, if your pixels are storing probabilities of Bernoulli events, you can directly use the entropy of the events in the entropy matrix. - -We start by generating a random matrix of measurements: - -```@example 1 -measurements = rand(MidpointDisplacement(), (200, 200)) .* 100 -heatmap(measurements) -``` - -Using the `entropize` function will convert these values into entropy at the -pixel scale: - -```@example 1 -U = entropize(measurements) -locations = - seed(BalancedAcceptance(; numsites = 100, uncertainty=U)) -``` diff --git a/docs/src/vignettes/overview.md b/docs/src/vignettes/overview.md deleted file mode 100644 index 8183204..0000000 --- a/docs/src/vignettes/overview.md +++ /dev/null @@ -1,79 +0,0 @@ -# An introduction to BiodiversityObservationNetworks - -In this vignette, we will walk through the basic functionalities of the package, -by generating a random uncertainty matrix, and then using a *seeder* and a -*refiner* to decide which locations should be sampled in order to gain more -insights about the process generating this entropy. - -```@example 1 -using BiodiversityObservationNetworks -using NeutralLandscapes -using CairoMakie -``` - -In order to simplify the process, we will use the *NeutralLandscapes* package to -generate a 100×100 pixels landscape, where each cell represents the entropy (or -information content) in a unit we can sample: - -```@example 1 -U = rand(MidpointDisplacement(0.5), (100, 100)) -heatmap(U) -``` - -In practice, this uncertainty matrix is likely to be derived from an application of the hyper-parameters optimization step, which is detailed in other vignettes. - -The first step of defining a series of locations to sample is to use a -`BONSeeder`, which will generate a number of relatively coarse proposals that -cover the entire landscape, and have a balanced distribution in space. We do so -using the `BalancedAcceptance` sampler, which can be tweaked to capture more (or -less) uncertainty. To start with, we will extract 200 candidate points, *i.e.* -200 possible locations which will then be refined. - - -```@example 1 -candidates = seed(BalancedAcceptance(; numsites = 200)); -``` - -We can have a look at the first five points: - -```@example 1 -candidates[1:5] -``` - -The positions of locations to sample are given as a vector of `CartesianIndex`, -which are coordinates in the uncertainty matrix. Once we have generated a -candidate proposal, we can further refine it using a `BONRefiner` -- in this -case, `AdaptiveSpatial`, which performs adaptive spatial sampling (maximizing -the distribution of entropy while minimizing spatial auto-correlation). - -```@example 1 -locations = refine(candidates, AdaptiveSpatial(; numsites = 50, uncertainty=U)) -locations[1:5] -``` - - -The reason we start from a candidate set of points is that some algorithms -struggle with full landscapes, and work much better with a sub-sample of them. -There is no hard rule (or no heuristic) to get a sense for how many points should be generated at the seeding step, and so experimentation is a must! - -The previous code examples used a version of the `seed` and `refine` functions -that is very useful if you want to change arguments between steps, or examine -the content of the candidate pool of points. In addition to this syntax, both -functions have a curried version that allows chaining them together using pipes -(`|>`): - -```@example 1 -locations = - seed(BalancedAcceptance(; numsites = 200)) |> - refine(AdaptiveSpatial(; numsites = 50, uncertainty=U)) -``` - -This works because `seed` and `refine` have curried versions that can be used -directly in a pipeline. Proposed sampling locations can then be overlayed onto -the original uncertainty matrix: - -```@example 1 -plt = heatmap(U) -scatter!(plt, [x[1] for x in locations], [x[2] for x in locations]) -current_figure() -``` diff --git a/docs/src/vignettes/uniqueness.md b/docs/src/vignettes/uniqueness.md deleted file mode 100644 index 756f11f..0000000 --- a/docs/src/vignettes/uniqueness.md +++ /dev/null @@ -1,54 +0,0 @@ -# Selecting environmentally unique locations - -For some applications, we want to sample a set of locations that cover a broad -range of values in environment space. Another way to rephrase this problem is to -say we want to find the set of points with the _least_ covariance in their -environmental values. - -To do this, we use a `BONRefiner` called `Uniqueness`. We'll start by loading the required packages. - -```@example 1 -using BiodiversityObservationNetworks -using SpeciesDistributionToolkit -using StatsBase -using NeutralLandscapes -using CairoMakie -``` - -!!! warning "Consider setting your SDMLAYERS_PATH" - When accessing data using `SimpleSDMDatasets.jl`, it is best to set the `SDM_LAYERSPATH` environmental variable to tell `SimpleSDMDatasets.jl` where to download data. This can be done by setting `ENV["SDMLAYERS_PATH"] = "/home/user/Data/"` or similar in the `~/.julia/etc/julia/startup.jl` file. (Note this will be different depending on where `julia` is installed.) - -```@example 1 -bbox = (left=-83.0, bottom=46.4, right=-55.2, top=63.7); -temp, precip, elevation = - convert(Float32, SimpleSDMPredictor(RasterData(WorldClim2, AverageTemperature); bbox...)), - convert(Float32, SimpleSDMPredictor(RasterData(WorldClim2, Precipitation); bbox...)), - convert(Float32, SimpleSDMPredictor(RasterData(WorldClim2, Elevation); bbox...)); -``` - -Now we'll use the `stack` function to combine our four environmental layers into a single, 3-dimensional array, which we'll pass to our `Uniqueness` refiner. - -```@example 1 -layers = BiodiversityObservationNetworks.stack([temp,precip,elevation]); -``` - -```@example 1 -uncert = rand(MidpointDisplacement(0.8), size(temp), mask=temp); -heatmap(uncert) -``` - -Now we'll get a set of candidate points from a BalancedAcceptance seeder that has no bias toward higher uncertainty values. - -```@example 1 -candpts = seed(BalancedAcceptance(numsites=100)); -``` - -Now we'll `refine` our `100` candidate points down to the 30 most environmentally unique. - -```@example 1 -finalpts = refine(candpts, Uniqueness(;numsites=30, layers=layers)) -heatmap(uncert) -scatter!([p[1] for p in candpts], [p[2] for p in candpts], color=:white) -scatter!([p[1] for p in finalpts], [p[2] for p in finalpts], color=:dodgerblue, msc=:white) -current_figure() -``` diff --git a/src/geometry/bon.jl b/src/geometry/bon.jl index 02dada0..de0b87e 100644 --- a/src/geometry/bon.jl +++ b/src/geometry/bon.jl @@ -1,24 +1,48 @@ const __COORDINATE_TYPES = Union{Tuple{<:Real, <:Real}, CartesianIndex} -# TODO: extent this to be GeoInterface.PointLike compat +""" + Node + +A single sampling location within a [`BiodiversityObservationNetwork`](@ref), represented as a coordinate. + +A `Node` extends the GeoInterface PointTrait type. +""" struct Node{C <: __COORDINATE_TYPES} coordinate::C end Base.show(io::IO, node::Node) = print(io, "Node at $(node.coordinate)") Base.getindex(node::Node, i) = getindex(node.coordinate, i) +GI.isgeometry(::Node)::Bool = true +GI.geomtrait(::Node)::DataType = PointTrait() +GI.ncoord(::PointTrait, ::Node)::Integer = 1 +GI.getcoord(::PointTrait, node::Node, i)::Real = node[i] is_bonifyable(::T) where T = T <: Union{<:__COORDINATE_TYPES,Vector{<:__COORDINATE_TYPES}} +""" + BiodiversityObservationNetwork + +A set of [`Node`](@ref)s that together create a sampling design for monitoring +biodiversity. + +`BiodiversityObservationNetwork` extends the GeoInterface MultiPointTrait type. +""" struct BiodiversityObservationNetwork{N <: Node} nodes::Vector{N} end + Base.show(io::IO, bon::BiodiversityObservationNetwork) = print(io, "BiodiversityObservationNetwork with $(length(bon)) nodes") Base.getindex(bon::BiodiversityObservationNetwork, i::Integer) = bon.nodes[i] Base.length(bon::BiodiversityObservationNetwork) = length(bon.nodes) Base.iterate(bon::BiodiversityObservationNetwork, i) = iterate(bon.nodes, i) Base.iterate(bon::BiodiversityObservationNetwork) = iterate(bon.nodes) - Base.vcat(bons::Vector{<:BiodiversityObservationNetwork}) = BiodiversityObservationNetwork(vcat([b.nodes for b in bons]...)) + + +GI.isgeometry(::BiodiversityObservationNetwork)::Bool = true +GI.geomtrait(::BiodiversityObservationNetwork)::DataType = MultiPointTrait() +GI.ngeom(::MultiPointTrait, bon::BiodiversityObservationNetwork)::Integer = length(bon) +GI.getgeom(::MultiPointTrait, bon::BiodiversityObservationNetwork, i) = bon[i] \ No newline at end of file diff --git a/src/geometry/polygon.jl b/src/geometry/polygon.jl index ee32953..683f413 100644 --- a/src/geometry/polygon.jl +++ b/src/geometry/polygon.jl @@ -1,3 +1,9 @@ +""" + Polygon + +A Polygon extends the GeoInterface API for both PolygonTrait and +MultiPolygonTraits. +""" struct Polygon{T, G} geometry::G function Polygon(::T, geom::G) where {T <: GI.AbstractTrait, G} diff --git a/src/geometry/raster.jl b/src/geometry/raster.jl index 118f692..45ba0e5 100644 --- a/src/geometry/raster.jl +++ b/src/geometry/raster.jl @@ -1,5 +1,15 @@ const __RASTER_TYPES = Union{SDMLayer, Matrix{<:Real}} +""" + Raster + +A `Raster` stores gridded data. The internal representation of this data can be +an SDMLayer (from +[SimpleSDMLayers.jl](https://github.com/PoisotLab/SpeciesDistributionToolkit.jl)) +or a Matrix. + +`Raster` extends the RasterTrait type from GeoInterface. +""" struct Raster{R <: __RASTER_TYPES} raster::R end @@ -29,18 +39,16 @@ _get_raster_crs(raster::Raster{<:SDMLayer}) = GI.crs(raster.raster) _get_raster_crs(::Raster{<:Matrix}) = nothing +# SpeciesDistributionToolkit Overloads SDT.eastings(r::Raster{<:Matrix}) = 0:(1/size(r)[2]):1 SDT.northings(r::Raster{<:Matrix}) = 0:(1/size(r)[1]):1 - SDT.eastings(r::Raster{<:SDMLayer}) = eastings(r.raster) SDT.northings(r::Raster{<:SDMLayer}) = northings(r.raster) - +# GeoInterface overloads GI.isgeometry(::Raster)::Bool = true GI.geomtrait(::Raster)::DataType = GI.RasterTrait() GI.israster(::Raster)::Bool = true GI.trait(::Raster) = RasterTrait() - - GI.extent(::RasterTrait, raster::Raster)::GI.Extents.Extent = _get_raster_extent(raster) GI.crs(::RasterTrait, raster::Raster)::GeoFormatTypes.CoordinateReferenceSystem = _get_raster_crs(raster) \ No newline at end of file diff --git a/src/geometry/stack.jl b/src/geometry/stack.jl index 521ae73..e5788f6 100644 --- a/src/geometry/stack.jl +++ b/src/geometry/stack.jl @@ -1,3 +1,9 @@ +""" + RasterStack + +A `RasterStack` is a set of [`Raster`](@ref)s, which all have the same +resolution and extent, for storing multiple raster-like environmental variables +""" struct RasterStack{R<:Raster} stack::Vector{R} end diff --git a/src/sample.jl b/src/sample.jl index ff20b41..b62e734 100644 --- a/src/sample.jl +++ b/src/sample.jl @@ -1,3 +1,9 @@ +""" + BONSampler + +An abstract type that is the supertype for all methods for sampling +BiodiversityObservationNetworks. +""" abstract type BONSampler end function _what_did_you_pass(geom) diff --git a/src/samplers/grid.jl b/src/samplers/grid.jl index ce5bc51..1119a05 100644 --- a/src/samplers/grid.jl +++ b/src/samplers/grid.jl @@ -1,16 +1,25 @@ +""" + Grid + +`Grid` is a type of [`BONSampler`](@ref) for generating +[`BiodiversityObservationNetwork`](@ref)s with [`Node`](@ref)s structured as a +systematic grid over the study area. + +*Arguments*: +- `longitude_spacing` +- `latitude_spacing` +""" Base.@kwdef struct Grid{F<:Real} <: BONSampler longitude_spacing::F = 1. # in wgs84 coordinates latitude_spacing::F = 1. end - function _generate_grid(sampler::Grid, domain) x, y = GeoInterface.extent(domain) x_step, y_step = sampler.longitude_spacing, sampler.latitude_spacing BiodiversityObservationNetwork([Node((i,j)) for i in x[1]:x_step:x[2], j in y[1]:y_step:y[2] if GeometryOps.contains(domain, (i,j))]) end - function _sample(sampler::Grid, domain::T) where T if GeoInterface.isgeometry(domain) return _generate_grid(sampler, domain) diff --git a/src/samplers/kmeans.jl b/src/samplers/kmeans.jl index 28ad88b..d3f2ff9 100644 --- a/src/samplers/kmeans.jl +++ b/src/samplers/kmeans.jl @@ -1,8 +1,18 @@ +""" + KMeans + +`KMeans` is a [`BONSampler`](@ref) that generates +[`BiodiversityObservationNetwork`](@ref)s that aim to be representative of the +distribution of environmental variables across an extent represented as a +[`RasterStack`](@ref). + +*Arguments*: +- `k`: the number of sampling sites +""" struct KMeans{I<:Integer} <: BONSampler k::I end - function _sample(::KMeans, ::T) where T @error "Can't use KMeans on a $T" end diff --git a/src/samplers/simplerandom.jl b/src/samplers/simplerandom.jl index cdf8cdc..2c1a0dd 100644 --- a/src/samplers/simplerandom.jl +++ b/src/samplers/simplerandom.jl @@ -1,3 +1,10 @@ +""" + SimpleRandom + +`SimpleRandom` is a [`BONSampler`](@ref) for sampling +[`BiodiversityObservationNetwork`](@ref)s where each location in the spatial +extent has the same probability of inclusion. +""" struct SimpleRandom{I<:Integer} <: BONSampler number_of_nodes::I end diff --git a/src/samplers/spatiallystratified.jl b/src/samplers/spatiallystratified.jl index e7b469f..1f84249 100644 --- a/src/samplers/spatiallystratified.jl +++ b/src/samplers/spatiallystratified.jl @@ -1,3 +1,8 @@ +""" + SpatiallyStratified + +`SpatiallyStratified` is a [`BONSampler`](@ref) for choosing sites across a set of different spatial stratum. +""" struct SpatiallyStratified{I<:Integer} <: BONSampler number_of_nodes::I end From 7c80e23e38b5b7c1c3e321feda41daf706b54add Mon Sep 17 00:00:00 2001 From: "michael d. catchen" Date: Fri, 20 Dec 2024 15:31:48 -0500 Subject: [PATCH 05/10] sample docstrings --- src/sample.jl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/sample.jl b/src/sample.jl index b62e734..ad2bcfa 100644 --- a/src/sample.jl +++ b/src/sample.jl @@ -14,11 +14,23 @@ end const __BON_DOMAINS = Union{Raster, RasterStack, Polygon, Vector{<:Polygon}, BiodiversityObservationNetwork} -function sample(sampler::BONSampler, geom::Any) - T = _what_did_you_pass(geom) - sample(sampler, Base.convert(T, geom)) +""" + sample + +Sample from `geom`. This is highest level dispatch which assumes nothing about +the geometry the user is trying to apply [`BONSampler`](@ref) to. +""" +function sample(sampler::BONSampler, geom::T) where T + GEOM_TYPE = _what_did_you_pass(geom) + isnothing(GEOM_TYPE) && throw(ArgumentError("$T cannot be coerced to a valid Geometry")) + sample(sampler, Base.convert(GEOM_TYPE, geom)) end +""" + sample + +Attempt to use `BONSampler` to sample from a valid `geom` +""" function sample(sampler::BONSampler, geom::__BON_DOMAINS) _sample(sampler, geom) end From 25bc0b5250b6f4fb5d2752ff252f7bd85e10ec84 Mon Sep 17 00:00:00 2001 From: "michael d. catchen" Date: Fri, 20 Dec 2024 17:07:27 -0500 Subject: [PATCH 06/10] cornerplot for rasterstacks --- _SCRATCH/scratchpad.jl | 6 ++++++ ext/BONsMakieExt/BONsMakieExt.jl | 2 ++ src/BiodiversityObservationNetworks.jl | 4 ++++ src/plotting.jl | 3 +++ 4 files changed, 15 insertions(+) create mode 100644 src/plotting.jl diff --git a/_SCRATCH/scratchpad.jl b/_SCRATCH/scratchpad.jl index f7e9b93..7cbcd83 100644 --- a/_SCRATCH/scratchpad.jl +++ b/_SCRATCH/scratchpad.jl @@ -4,6 +4,7 @@ Pkg.activate(@__DIR__) using BiodiversityObservationNetworks using CairoMakie, GeoMakie + import BiodiversityObservationNetworks as BONs import BiodiversityObservationNetworks.SpeciesDistributionToolkit as SDT import BiodiversityObservationNetworks.GeoInterface as GI @@ -21,6 +22,11 @@ bon = sample(SpatiallyStratified(100), col_states) bon = BONs.sample(Grid(), col) bon = BONs.sample(KMeans(75), bioclim) + +cornerplot(bioclim) + + + begin f = Figure(size=(900,900)) ga = GeoAxis(f[1,1]) diff --git a/ext/BONsMakieExt/BONsMakieExt.jl b/ext/BONsMakieExt/BONsMakieExt.jl index fae1a83..72e6f01 100644 --- a/ext/BONsMakieExt/BONsMakieExt.jl +++ b/ext/BONsMakieExt/BONsMakieExt.jl @@ -8,4 +8,6 @@ else using ..Makie, ..BiodiversityObservationNetworks end +include(joinpath(@__DIR__, "recipes.jl")) + end \ No newline at end of file diff --git a/src/BiodiversityObservationNetworks.jl b/src/BiodiversityObservationNetworks.jl index 118f9fe..01231d0 100644 --- a/src/BiodiversityObservationNetworks.jl +++ b/src/BiodiversityObservationNetworks.jl @@ -26,6 +26,8 @@ module BiodiversityObservationNetworks export nonempty export is_polygonizable, is_rasterizable, is_bonifyable + export cornerplot + include(joinpath("geometry", "bon.jl")) include(joinpath("geometry", "polygon.jl")) include(joinpath("geometry", "raster.jl")) @@ -37,5 +39,7 @@ module BiodiversityObservationNetworks include(joinpath("samplers", "kmeans.jl")) include(joinpath("samplers", "spatiallystratified.jl")) + include("plotting.jl") + end \ No newline at end of file diff --git a/src/plotting.jl b/src/plotting.jl new file mode 100644 index 0000000..4d88b76 --- /dev/null +++ b/src/plotting.jl @@ -0,0 +1,3 @@ +function cornerplot() + @error("Please load `Makie.jl` and before using `bonplot`.") +end \ No newline at end of file From 10fe3a816e3073a8a59278384a802fc02a32142c Mon Sep 17 00:00:00 2001 From: "michael d. catchen" Date: Sat, 21 Dec 2024 09:46:11 -0500 Subject: [PATCH 07/10] overloads and cornerplot --- Project.toml | 2 +- src/geometry/raster.jl | 11 +++++++---- src/geometry/stack.jl | 3 +++ 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Project.toml b/Project.toml index 4ea10cf..888cf4e 100644 --- a/Project.toml +++ b/Project.toml @@ -26,8 +26,8 @@ Term = "22787eb5-b846-44ae-b979-8e399b8463ab" TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe" [weakdeps] -NeutralLandscapes = "71847384-8354-4223-ac08-659a5128069f" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" +NeutralLandscapes = "71847384-8354-4223-ac08-659a5128069f" [extensions] BONsMakieExt = "Makie" diff --git a/src/geometry/raster.jl b/src/geometry/raster.jl index 45ba0e5..e4e2c77 100644 --- a/src/geometry/raster.jl +++ b/src/geometry/raster.jl @@ -15,14 +15,17 @@ struct Raster{R <: __RASTER_TYPES} end Base.show(io::IO, r::Raster) = print(io, "Raster with dimensions $(size(r))") Base.size(r::Raster) = size(r.raster) +Base.convert(::Type{Raster}, sdmlayer::SDMLayer) = Raster(SDMLayer) +Base.convert(::Type{Raster}, m::Matrix) = Raster(m) + +Base.getindex(r::Raster, i::Integer) = getindex(r.raster, i) +Base.getindex(r::Raster, i::CartesianIndex) = getindex(r.raster, i) +Base.getindex(r::Raster, node::Node) = r.raster[node.coordinate...] +Base.getindex(r::Raster, bon::BiodiversityObservationNetwork) = [r.raster[node.coordinate...] for node in bon.nodes] Raster(sdmlayer::SDMLayer) = Raster{typeof(sdmlayer)}(sdmlayer) datatype(::Raster{T}) where T = T.parameters[begin] - -Base.convert(::Type{Raster}, sdmlayer::SDMLayer) = Raster(SDMLayer) -Base.convert(::Type{Raster}, m::Matrix) = Raster(m) - is_rasterizable(::T) where T = T <: __RASTER_TYPES diff --git a/src/geometry/stack.jl b/src/geometry/stack.jl index e5788f6..b0ab046 100644 --- a/src/geometry/stack.jl +++ b/src/geometry/stack.jl @@ -8,7 +8,10 @@ struct RasterStack{R<:Raster} stack::Vector{R} end Base.show(io::IO, ls::RasterStack) = print(io, "RasterStack with $(length(ls.stack)) layers") +Base.getindex(ls::RasterStack, i::Vector{<:Integer})= RasterStack(ls.stack[i]) Base.getindex(ls::RasterStack, i::Integer)= ls.stack[i] +Base.getindex(rs::RasterStack, bon::BiodiversityObservationNetwork) = hcat([[r.raster[node.coordinate...] for r in rs.stack] for node in bon.nodes]...) + Base.length(layers::RasterStack) = length(layers.stack) Base.iterate(layers::RasterStack, i) = iterate(layers.stack, i) Base.iterate(layers::RasterStack) = iterate(layers.stack) From d425fd163a18c4f0a7b1054104af82541beba395 Mon Sep 17 00:00:00 2001 From: "michael d. catchen" Date: Sat, 21 Dec 2024 09:46:58 -0500 Subject: [PATCH 08/10] makie recipes --- ext/BONsMakieExt/recipes.jl | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 ext/BONsMakieExt/recipes.jl diff --git a/ext/BONsMakieExt/recipes.jl b/ext/BONsMakieExt/recipes.jl new file mode 100644 index 0000000..0d70b8f --- /dev/null +++ b/ext/BONsMakieExt/recipes.jl @@ -0,0 +1,33 @@ +const MAX_CORNERPLOT_DIMS_BEFORE_PCA = 20 + +function BiodiversityObservationNetworks.cornerplot( + layers::RasterStack; + pca_layers = false +) + + _, mat = BiodiversityObservationNetworks.features(layers) + num_layers = length(layers) + if num_layers > MAX_CORNERPLOT_DIMS_BEFORE_PCA || pca_layers + pca = BiodiversityObservationNetworks.MultivariateStats.fit(BiodiversityObservationNetworks.MultivariateStats.PCA, mat) + @info length(pca.prinvars) + num_layers = length(pca.prinvars) + mat = BiodiversityObservationNetworks.MultivariateStats.transform(pca, mat) + end + + f = Figure(size=(900,900)) + for i in 1:num_layers-1, j in 1:num_layers + if j > i + ax = Axis( + f[j-1,i], + xlabel = j == num_layers ? "$i" : "", + ylabel = i == 1 ? "$j" : "", + xticksvisible=false, + yticksvisible=false, + xticklabelsvisible=false, + yticklabelsvisible=false, + ) + hexbin!(ax, mat[i,:], mat[j,:], bins=40) + end + end + f +end \ No newline at end of file From f90421f23b2736cd8fa0d918431df6bcf4ce3ec8 Mon Sep 17 00:00:00 2001 From: "michael d. catchen" Date: Fri, 3 Jan 2025 10:03:44 -0500 Subject: [PATCH 09/10] draft of comparing jensen-shannon of sampled to total --- _SCRATCH/scratchpad.jl | 68 +++++++++++++++++++++----- ext/BONsMakieExt/recipes.jl | 51 ++++++++++++++++++- src/BiodiversityObservationNetworks.jl | 3 +- src/plotting.jl | 4 ++ 4 files changed, 111 insertions(+), 15 deletions(-) diff --git a/_SCRATCH/scratchpad.jl b/_SCRATCH/scratchpad.jl index 7cbcd83..fa27c0d 100644 --- a/_SCRATCH/scratchpad.jl +++ b/_SCRATCH/scratchpad.jl @@ -11,29 +11,73 @@ import BiodiversityObservationNetworks.GeoInterface as GI import BiodiversityObservationNetworks.GeometryOps as GO +can = SDT.gadm("CAN") col = SDT.gadm("COL") col_states = SDT.gadm("COL", 1) + +bioclim = SDT.SDMLayer[SDT.SDMLayer(SDT.RasterData(SDT.WorldClim2, SDT.BioClim); layer=i, SDT.boundingbox(col)...) for i in 1:19] +bioclim = RasterStack(SDT.SimpleSDMLayers.mask!(bioclim, col)) -bioclim = RasterStack([SDT.mask!(SDT.SDMLayer(SDT.RasterData(SDT.WorldClim2, SDT.BioClim); layer=i, SDT.boundingbox(col)...), col) for i in 1:19]) + +cornerplot(bioclim, ) bon = sample(SimpleRandom(50), bioclim) -bon = sample(SimpleRandom(10), col_states) +bon = sample(SimpleRandom(5), col_states) bon = sample(SpatiallyStratified(100), col_states) -bon = BONs.sample(Grid(), col) -bon = BONs.sample(KMeans(75), bioclim) +bon = sample(Grid(), col) + +findfirst(x->nothing in x, eachcol(bioclim[bon])) -cornerplot(bioclim) +f = Figure(size=(800, 1200)) +bonplot(f[1,1], bon, col, axistype=GeoAxis) +f +# How to measure the distance between a BON and the whole env space in layer +# stack? +# Marginals instead of MvNormal bc everything gets fucky +# But can penalize w/ weighted distance between Covariance matrices -begin - f = Figure(size=(900,900)) - ga = GeoAxis(f[1,1]) - for (i,st) in enumerate(convert(Polygon, col_states)) - poly!(ga, st.geometry, strokewidth=2, color=Symbol("grey", string(20+2*i))) - scatter!(ga, [node[1] for node in bon], [node[2] for node in bon], color=(:red)) +function _js_thing(rs::RasterStack, bon::BiodiversityObservationNetwork) + function _js(P,Q) + M = BONs.Distributions.MixtureModel([P,Q], [0.5,0.5]) + div = 0.5*BONs.Distributions.kldivergence(P,M) + 0.5 * BONs.Distributions.kldivergence(Q,M) + return sqrt(div / log(2)) end - f + _, Xfull = BONs.features(rs) + Xsampled = rs[bon] + + nlayers = length(rs) + + Σ₁, Σ₂ = zeros(nlayers, nlayers), zeros(nlayers, nlayers) + + 𝓛_js = 0. + for i in axes(Xfull,1) + 𝓝₁ = BONs.Distributions.fit(BONs.Distributions.Normal, Xfull[i,:]) + 𝓝₂ = BONs.Distributions.fit(BONs.Distributions.Normal, Xsampled[i,:]) + 𝓛_js += _js(𝓝₁, 𝓝₂) + for j in i+1:nlayers + Σ₁[i,j] = BONs.Distributions.cov(Xfull[:,i],Xfull[:,j]) + Σ₁[j,i] = Σ₁[i,j] + + Σ₂[i,j] = BONs.Distributions.cov(Xsampled[:,i],Xsampled[:,j]) + Σ₂[j,i] = Σ₂[i,j] + end + end + 𝓛_covariance = sqrt(sum((Σ₁ .- Σ₂).^2)) + + 𝓛_js, 𝓛_covariance end + +n_reps = 500 +n_nodes = [2^i for i in 4:9] +begin + f = Figure() + ax = Axis(f[1,1]) + for n in n_nodes + density!(ax, [_js_thing(bioclim, sample(SimpleRandom(n), bioclim))[1] for _ in 1:n_reps]) + end + f +end \ No newline at end of file diff --git a/ext/BONsMakieExt/recipes.jl b/ext/BONsMakieExt/recipes.jl index 0d70b8f..3d68d4f 100644 --- a/ext/BONsMakieExt/recipes.jl +++ b/ext/BONsMakieExt/recipes.jl @@ -1,8 +1,20 @@ const MAX_CORNERPLOT_DIMS_BEFORE_PCA = 20 +# NOTES: +# The easiest way to make this compatable with GeoMakie (if loaded) or +# else default back to normal Axis is to have it work on a ::Makie.GridPosition, +# which + + +# TODO: pass any transform. Annoying because its a kwarg and could be of type +# <:StatsBase.AbstractDataTransform (as are some things provided by StatsBase, +# e.g. ZScoreTransform, and some things provided by MVStats, e.g. Whitening), +# but also of type MVStats.AbstractDimensionalityReductoin (e.g. PCA, PPCA) + function BiodiversityObservationNetworks.cornerplot( layers::RasterStack; - pca_layers = false + pca_layers = false, + sz = (1600,1600) ) _, mat = BiodiversityObservationNetworks.features(layers) @@ -14,7 +26,7 @@ function BiodiversityObservationNetworks.cornerplot( mat = BiodiversityObservationNetworks.MultivariateStats.transform(pca, mat) end - f = Figure(size=(900,900)) + f = Figure(size=sz) for i in 1:num_layers-1, j in 1:num_layers if j > i ax = Axis( @@ -30,4 +42,39 @@ function BiodiversityObservationNetworks.cornerplot( end end f +end + + +function BiodiversityObservationNetworks.bonplot( + position::GridPosition, + bon::BiodiversityObservationNetwork; + axistype = Makie.Axis +) + ax = axistype(position) + plot = scatter!(ax, [node[1] for node in bon], [node[2] for node in bon], color=(:red)) + Makie.AxisPlot(ax, plot) +end + + +function BiodiversityObservationNetworks.bonplot( + position::GridPosition, + bon::BiodiversityObservationNetwork, + poly::Any; + axistype = Makie.Axis +) + bonspoly = BiodiversityObservationNetworks._convert_to_bons_polygon(poly) + BiodiversityObservationNetworks.bonplot(position, bon, bonspoly; axistype=axistype) +end + + +function BiodiversityObservationNetworks.bonplot( + position::GridPosition, + bon::BiodiversityObservationNetwork, + poly::Polygon; + axistype=Makie.Axis +) + ax = axistype(position) + poly!(ax, poly.geometry, strokewidth=2, color=(:grey, 0.1)) + plot = scatter!(ax, [node[1] for node in bon], [node[2] for node in bon], color=(:red)) + Makie.AxisPlot(ax, plot) end \ No newline at end of file diff --git a/src/BiodiversityObservationNetworks.jl b/src/BiodiversityObservationNetworks.jl index 01231d0..04ac421 100644 --- a/src/BiodiversityObservationNetworks.jl +++ b/src/BiodiversityObservationNetworks.jl @@ -6,6 +6,7 @@ module BiodiversityObservationNetworks using GeoInterface using MultivariateStats using SpeciesDistributionToolkit + using StatsBase using Random import GeoInterface as GI @@ -26,7 +27,7 @@ module BiodiversityObservationNetworks export nonempty export is_polygonizable, is_rasterizable, is_bonifyable - export cornerplot + export cornerplot, bonplot include(joinpath("geometry", "bon.jl")) include(joinpath("geometry", "polygon.jl")) diff --git a/src/plotting.jl b/src/plotting.jl index 4d88b76..98c5bba 100644 --- a/src/plotting.jl +++ b/src/plotting.jl @@ -1,3 +1,7 @@ function cornerplot() + @error("Please load `Makie.jl` and before using `cornerplot`.") +end + +function bonplot() @error("Please load `Makie.jl` and before using `bonplot`.") end \ No newline at end of file From 69b8e169cfdb5551459c2f24b9f0ba147cf50257 Mon Sep 17 00:00:00 2001 From: "michael d. catchen" Date: Fri, 3 Jan 2025 11:21:37 -0500 Subject: [PATCH 10/10] BAS --- _SCRATCH/scratchpad.jl | 3 +- src/BiodiversityObservationNetworks.jl | 5 ++- src/samplers/balancedacceptance.jl | 53 ++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/_SCRATCH/scratchpad.jl b/_SCRATCH/scratchpad.jl index fa27c0d..c7c08f9 100644 --- a/_SCRATCH/scratchpad.jl +++ b/_SCRATCH/scratchpad.jl @@ -26,8 +26,7 @@ bon = sample(SimpleRandom(5), col_states) bon = sample(SpatiallyStratified(100), col_states) bon = sample(Grid(), col) - -findfirst(x->nothing in x, eachcol(bioclim[bon])) +bon = sample(BalancedAcceptance(grid_size=(50,50)), col) f = Figure(size=(800, 1200)) bonplot(f[1,1], bon, col, axistype=GeoAxis) diff --git a/src/BiodiversityObservationNetworks.jl b/src/BiodiversityObservationNetworks.jl index 04ac421..dff1b32 100644 --- a/src/BiodiversityObservationNetworks.jl +++ b/src/BiodiversityObservationNetworks.jl @@ -4,6 +4,7 @@ module BiodiversityObservationNetworks using Distributions using GeometryOps using GeoInterface + using HaltonSequences using MultivariateStats using SpeciesDistributionToolkit using StatsBase @@ -21,7 +22,7 @@ module BiodiversityObservationNetworks export RasterStack export BONSampler - export SimpleRandom, Grid, KMeans, SpatiallyStratified + export SimpleRandom, Grid, KMeans, SpatiallyStratified, BalancedAcceptance export sample export datatype export nonempty @@ -39,6 +40,8 @@ module BiodiversityObservationNetworks include(joinpath("samplers", "grid.jl")) include(joinpath("samplers", "kmeans.jl")) include(joinpath("samplers", "spatiallystratified.jl")) + include(joinpath("samplers", "balancedacceptance.jl")) + include("plotting.jl") diff --git a/src/samplers/balancedacceptance.jl b/src/samplers/balancedacceptance.jl index 706e277..1253120 100644 --- a/src/samplers/balancedacceptance.jl +++ b/src/samplers/balancedacceptance.jl @@ -1,3 +1,56 @@ +""" + BalancedAcceptance + +`BalancedAcceptance` is a type of [`BONSampler`](@ref) for generating +[`BiodiversityObservationNetwork`](@ref)s with spatial spreading. + +*Arguments*: +- `number_of_nodes`: the number of sites to select +- `grid_size`: +""" +Base.@kwdef struct BalancedAcceptance{I<:Integer} <: BONSampler + number_of_nodes::I = 100 + grid_size::Tuple{I,I} = (250, 250) +end + +_sample(::BalancedAcceptance, ::T) where T = throw(ArgumentError("Can't use BalancedAcceptance on a $T")) + +function _sample(sampler::BalancedAcceptance, polygon::Polygon) + x, y = GI.extent(polygon) + grid_size = sampler.grid_size + N = sampler.number_of_nodes + + + Δx = (x[2]-x[1])/grid_size[1] + Δy = (y[2]-y[1])/grid_size[2] + + Es = [x[1] + i*Δx for i in 1:grid_size[1]] + Ns = [y[1] + i*Δy for i in 1:grid_size[2]] + + seed = rand(Int.(1e0:1e7), 2) + + + + selected_points = Node[] + ct = 0 + candct = 0 + while ct < N + i, j = haltonvalue(seed[1] + candct, 2), haltonvalue(seed[2] + candct, 3) + candct += 1 + + # this gets the latlong by gridding the extent with the dims provided in sampler + candx, candy = convert.(Int, [ceil(grid_size[1] * i), ceil(grid_size[2] * j)]) + candidate = (Es[candx], Ns[candy]) + if GeometryOps.contains(polygon, candidate) + push!(selected_points, Node(candidate)) + ct += 1 + end + end + BiodiversityObservationNetwork(selected_points) + +end + + #= """ BalancedAcceptance