Skip to content

Commit

Permalink
Blocking function added
Browse files Browse the repository at this point in the history
  • Loading branch information
8Ginette8 committed Jan 7, 2025
1 parent 412d6c4 commit 421652b
Show file tree
Hide file tree
Showing 22 changed files with 332 additions and 78 deletions.
2 changes: 1 addition & 1 deletion .Rproj.user/DBBE3EC9/pcs/source-pane.pper
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"activeTab": 0
"activeTab": -1
}
2 changes: 1 addition & 1 deletion .Rproj.user/DBBE3EC9/pcs/windowlayoutstate.pper
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"left": {
"splitterpos": 383,
"topwindowstate": "NORMAL",
"topwindowstate": "HIDE",
"panelheight": 958,
"windowheight": 996
},
Expand Down
11 changes: 0 additions & 11 deletions .Rproj.user/shared/notebooks/paths
Original file line number Diff line number Diff line change
@@ -1,12 +1 @@
C:/Users/am92guke/Documents/iDiv/code/gbif_range/gbif.range/.github/workflows/R-CMD-check-all.yml="1A84B01C"
C:/Users/am92guke/Documents/iDiv/code/gbif_range/gbif.range/.github/workflows/R-CMD-check-month-test.yml="D4BBA76F"
C:/Users/am92guke/Documents/iDiv/code/gbif_range/gbif.range/.gitignore="ECF04EF5"
C:/Users/am92guke/Documents/iDiv/code/gbif_range/gbif.range/NAMESPACE="79356337"
C:/Users/am92guke/Documents/iDiv/code/gbif_range/gbif.range/R/conv_function.R="99CB140E"
C:/Users/am92guke/Documents/iDiv/code/gbif_range/gbif.range/README.md="F2158441"
C:/Users/am92guke/Documents/iDiv/code/gbif_range/gbif.range/inst/examples/get_range_help.R="C765AE8E"
C:/Users/am92guke/Documents/iDiv/code/gbif_range/gbif.range/inst/examples/make_ecoregion_help.R="DBD0841D"
C:/Users/am92guke/Documents/iDiv/code/gbif_range/gbif.range/man/get_gbif.Rd="5FF8208F"
C:/Users/am92guke/Documents/iDiv/code/gbif_range/gbif.range/man/get_range.Rd="D136AD48"
C:/Users/am92guke/Documents/iDiv/code/gbif_range/gbif.range/man/make_ecoregion.Rd="D4D98882"
D:/OneDrive - Eidg. Forschungsanstalt WSL/gitlab/gbif.range/DESCRIPTION="D306FE16"
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: gbif.range
Version: 1.0
Title: A toolbox to generate species range maps based on ecoregions and a user-friendly GBIF wrapper
Depends: R (>= 4.0.0)
Imports: terra, rgbif, CoordinateCleaner, sf, ClusterR, FNN, geometry, cluster, rnaturalearth, mclust, methods, utils, zip
Imports: terra, rgbif, CoordinateCleaner, sf, ClusterR, FNN, geometry, cluster, rnaturalearth, mclust, methods, utils, zip, class, NMOF
Description: This package provides a workflow to generate species range maps from scratch based on ecoregions and a user-friendly GBIF wrapper. It includes functions to create taxa range maps using ecoregions and to interact with the Global Biodiversity Information Facility (GBIF) database (retrieving species records, backbone taxonomy and IUCN conservation status). The package facilitates the process of accessing GBIF data and integrating it with ecoregion information for species distribution analysis.
License: GPL (>= 3) | file LICENCE
BugReports: https://github.com/8Ginette8/gbif.range/issues
Expand Down Expand Up @@ -32,3 +32,4 @@ Collate:
'make_ecoregion.R'
'get_bioreg.R'
'evaluate_range.R'
'make_blocks.R'
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export(get_doi)
export(get_gbif)
export(get_range)
export(get_status)
export(make_blocks)
export(make_ecoregion)
export(make_tiles)
export(obs_filter)
Expand All @@ -17,6 +18,8 @@ importFrom(ClusterR,KMeans_rcpp)
importFrom(CoordinateCleaner,cd_ddmm)
importFrom(CoordinateCleaner,cd_round)
importFrom(FNN,knn.dist)
importFrom(NMOF,gridSearch)
importFrom(class,knn)
importFrom(cluster,clara)
importFrom(grDevices,dev.off)
importFrom(grDevices,pdf)
Expand Down
180 changes: 180 additions & 0 deletions R/make_blocks.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
### =========================================================================
### cv blocks
### =========================================================================
#' Block-wise split data into training and testing
#'
#' Creates a stratum vector based on a data.frame with n columns. If the data.frame
#' has one column, strata vector is created based on clusters separated by quantiles. If
#' the data.frame has two or more columns, strata vector is created based on 'Clustering
#' Large Applications' (function 'clara' from package cluster). Also, instead of a data.frame
#' the argument 'npoints' can be provided, which create groups by random sampling. An
#' opitimization algorithm (function 'gridSearch' from package NMOF) optimizes for equal stratum sizes.
#'
#' @param nfolds Numeric. Number of approximately equal-sized classes (folds) to separate groups in block-cross validation
#' @param df Object of class 'data.frame' with n columns containing critera for cluster building.
#' Not necessary if argument npoints is supplied
#' @param nblocks Number of clusters (blocks) based on the number of folds that should be built.
#' Minimum is the same number as 'nFolds'. Maximum is nrow(df)/10.
#' @param pres Binary vector. Optional argument. If 'df' is supplied, this argument can be used to
#' save processing time. '1' stands for the points on which CLARA is appplied (most likely the species
#' observations), and '0' stands for the points on which K-nearest neighbors is applied relative to
#' the '1' (most likely the absences, background points...). If 'df' is not supplied, for which points
#' should random sampling be made?
#' @param npoints Optional argument if 'df' is not supplied. For how many points should random sampling be made?
#' @return Object of class 'vector' of length nrow(df) or 'npoints', with integers defining
#' different folds
#' @references
#' Brun, P., Thuiller, W., Chauvier, Y., Pellissier, L., Wüest, R. O., Wang, Z., & Zimmermann, N. E. (2020).
#' Model complexity affects species distribution projections under climate change.
#' Journal of Biogeography, 47(1), 130-142.
#' @author Philipp Brun
#' @example inst/examples/make_blocks_help.R
#' @export
#' @importFrom cluster clara
#' @importFrom class knn
#' @importFrom NMOF gridSearch
make_blocks<-function(nfolds = 4,
df = data.frame(),
nblocks = nfolds*5,
npoints = NA,
pres = numeric()){

### ------------------------
### check input data
### ------------------------

if (nrow(df)==0 & is.na(npoints)){
stop("Please supply number of points if no data.frame is supplied")
}

### ------------------------
### generate clusters
### ------------------------

if (nrow(df)==0){

### do ordinary sampling if no strata are supplied
out.strat=sample(rep(1:nfolds,ceiling(npoints/nfolds)),size=npoints)

} else {

# check for reasonable number of boxes
if (nrow(df)<4*nblocks){
stop("Too many boxes required!")
}

if (ncol(df)==1){

### do quantile-based clustering if df contains only one column
rngi = quantile(df[,1],probs=0:(nblocks)/(nblocks))
rngi[1] = rngi[1]-1
rngi[length(rngi)] = rngi[length(rngi)]+1
clist = as.numeric(cut(df[,1],breaks=rngi,right=TRUE))

} else {

# Scale input data
scd = apply(df,2,scale)

if (length(pres)==0){

# do kmedoid clustering for 2 or more columns in df
kmed = clara(scd,k=nblocks,metric="euclidean")

# get clusters
clist = kmed$clustering

} else {

# do kmedoid clustering for 2 or more columns in df
kmed = clara(scd[which(pres==1),],k=nblocks,metric="manhattan")
knnab = knn(train=scd[which(pres==1),],test=scd[which(pres==0),],cl=kmed$clustering)
clist = kmed$clustering
cliful = rep(NA,nrow(scd))
cliful[which(pres==1)] = kmed$clustering
cliful[which(pres==0)] = as.numeric(knnab)
}
}

# sort obtained clusters
tbl = sort(table(clist),decreasing = TRUE)

### ------------------------
### regularly assign clusters to strata
### ------------------------

if (nblocks != nfolds){

# prepare strata layers
grps = rep(list(numeric()),nfolds)

# for the clusters with many observations
# distribute them regularly among strata but keep
# last six clusters for estimating most
# regular distribution

if (length(tbl)>6){

for (i in 1:(length(tbl)-6)){

# determine to which stratum the cluster should be
# added
fl=(floor((i-1)/nfolds))
if (fl%%2==0){

j = round(1+nfolds*((i-1)/nfolds-(floor((i-1)/nfolds))))

} else {

j = (nfolds+1)-round(1+nfolds*((i-1)/nfolds-(floor((i-1)/nfolds))))
}
# add cluster
grps[[j]] = append(grps[[j]],tbl[i])
}
}

# prepare for optimal distribution of last 6 clusters
vlis = factor(1:nfolds,levels=1:nfolds)
prs = rep(list(vlis),min(length(tbl),6))
sstab = tbl[max(1,(length(tbl)-5)):length(tbl)]

# Run brute-forcing gridSearch obtimization
srch = gridSearch(levels=prs,fun=optme,nms=as.vector(sstab),grps=grps,tot=sum(tbl))

# pull out results
wi = as.numeric(as.character(srch$minlevels))

# combine results with predistributed clusters
for (i in 1:length(grps)){
grps[[i]] = append(grps[[i]],sstab[wi==i])
}

# define vector with output strata
out.strat = rep(NA,nrow(df))
for (i in 1:length(grps)){

if (length(pres)==0){

out.strat[which(as.character(clist)%in%names(grps[[i]]))] = i

} else {

out.strat[which(as.character(cliful)%in%names(grps[[i]]))] = i
}
}

} else {

# if as many strata as clusters are required, simply return clusters
if (length(pres)==0){

out.strat = clist
} else{

out.strat = cliful
}
}
}
# return result
return(out.strat)
}
8 changes: 4 additions & 4 deletions inst/examples/get_doi_help.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ obsam <- get_gbif("Ailuropoda melanoleuca")

\dontrun{
# Retrieve DOI for only one get_gbif() output
get_doi(obspt,title="GBIF_test1",description="A small example 1",
source_url="https://example.com/",user="",pwd="") # Use your own GBIF credentials here
get_doi(obspt, title = "GBIF_test1", description = "A small example 1",
source_url = "https://example.com/", user = "", pwd = "") # Use your own GBIF credentials here

# Retrieve DOIs for several get_gbif() outputs
get_doi(list(obspt,obsam),title="GBIF_test2",description="A small example 2",
source_url="https://example.com/",user="",pwd="") # Use your own GBIF credentials here
get_doi(list(obspt,obsam),title = "GBIF_test2",description = "A small example 2",
source_url = "https://example.com/",user = "",pwd = "") # Use your own GBIF credentials here
}
12 changes: 6 additions & 6 deletions inst/examples/get_gbif_help.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# Downloading worldwide the observations of Panthera tigris
obs.pt <- get_gbif("Panthera tigris",
basis=c("OBSERVATION",
basis = c("OBSERVATION",
"HUMAN_OBSERVATION",
"MACHINE_OBSERVATION"))
countries <- terra::vect(rnaturalearth::ne_countries(type="countries", returnclass = "sf"))
terra::plot(countries,col="#bcbddc")
points(obs.pt[,c("decimalLongitude","decimalLatitude")], pch=20, col="#238b4550", cex=4)
countries <- terra::vect(rnaturalearth::ne_countries(type = "countries", returnclass = "sf"))
terra::plot(countries,col = "#bcbddc")
points(obs.pt[,c("decimalLongitude","decimalLatitude")], pch = 20, col = "#238b4550", cex = 4)

\dontrun{
# Downloading worldwide the observations of Ailuropoda melanoleuca (with a 100km grain, after 1990
# and by keeping duplicates and by adding the name of the person who collected the panda records)
obs.am <- get_gbif("Ailuropoda melanoleuca", grain = 100000 , duplicates = TRUE,
time_period = c(1990,3000), add_infos = c("recordedBy","issue"))
terra::plot(countries,col="#bcbddc")
graphics::points(obs.am[,c("decimalLongitude","decimalLatitude")],pch=20,col="#238b4550",cex=4)
terra::plot(countries, col = "#bcbddc")
graphics::points(obs.am[,c("decimalLongitude","decimalLatitude")], pch = 20, col = "#238b4550", cex = 4)

# Downloading worlwide the observations of Phascolarctos cinereus (with a 1km grain, after 1980,
# and keeping raster centroids)
Expand Down
11 changes: 5 additions & 6 deletions inst/examples/get_range_help.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
\dontrun{
# Load available ecoregions
eco_terra = read_bioreg(bioreg_name = "eco_terra", save_dir = NULL)
eco_terra <- read_bioreg(bioreg_name = "eco_terra", save_dir = NULL)

# First download the worldwide observations of Panthera tigris from GBIF
occ <- get_gbif("Panthera tigris",
basis=c("OBSERVATION",
basis = c("OBSERVATION",
"HUMAN_OBSERVATION",
"MACHINE_OBSERVATION"))

Expand All @@ -14,11 +14,10 @@ occ <- get_gbif("Panthera tigris",
range <- get_range(occ, eco_terra,"ECO_NAME")

# Plot
terra::plot(range, axes = FALSE, box = FALSE, legend=FALSE, col="chartreuse4")
terra::plot(range, axes = FALSE, box = FALSE, legend=FALSE, col = "chartreuse4")

# Plot political world boundaries
terra::plot(rnaturalearth::ne_countries(returnclass = "sf")[1], add=T, col=NA)
terra::plot(rnaturalearth::ne_countries(returnclass = "sf")[1], add = TRUE, col = NA)
# Plot the occurance points
graphics::points(occ[,c("decimalLongitude","decimalLatitude")],pch=20,col="#99340470",cex=1.5)

graphics::points(occ[,c("decimalLongitude","decimalLatitude")],pch = 20,col = "#99340470",cex = 1.5)
}
4 changes: 2 additions & 2 deletions inst/examples/get_status_help.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# Get the taxonomy and IUCN status of Panthera tigris (with and without related names)
get_status("Cypripedium calceolus",all=FALSE)
get_status("Cypripedium calceolus",all=TRUE)
get_status("Cypripedium calceolus", all = FALSE)
get_status("Cypripedium calceolus", all = TRUE)
16 changes: 16 additions & 0 deletions inst/examples/make_blocks_help.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
\dontrun{
# Downloading worldwide the observations of Panthera tigris
obs.pt <- get_gbif("Panthera tigris",
basis = c("OBSERVATION",
"HUMAN_OBSERVATION",
"MACHINE_OBSERVATION"))

# Create a vector of folds (n = 5) spatially blocked (n = 10)
block.pt <- make_blocks(nfolds = 5, df = obs.pt[,c("decimalLatitude","decimalLongitude")], nblocks = 10)

# Plot one colour per fold
countries <- terra::vect(rnaturalearth::ne_countries(type = "countries", returnclass = "sf"))
countries.focus <- crop(countries, ext(60,100,0,40))
terra::plot(countries.focus, col = "#bcbddc")
graphic::points(obs.pt[,c("decimalLongitude","decimalLatitude")], pch = 20, col = block.pt, cex = 1)
}
13 changes: 6 additions & 7 deletions inst/examples/make_ecoregion_help.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
#TODO transfer to data()
# Open
rst.path <- paste0(system.file(package = "gbif.range"),"/extdata/rst_enl.tif")
rst <- terra::rast(rst.path)
shp.path <- paste0(system.file(package = "gbif.range"),"/extdata/shp_lonlat.shp")
shp.lonlat <- terra::vect(shp.path)
rst <- terra::crop(rst, shp.lonlat)
#plot(crp)

# Apply the function by infering 50 classes of environments
my.eco <- make_ecoregion(rst,50)
terra::plot(my.eco)

# Downloading in the European Alps the observations of one plant species
obs.arcto <- get_gbif("Arctostaphylos alpinus",geo=shp.lonlat)
obs.arcto <- get_gbif("Arctostaphylos alpinus", geo = shp.lonlat)

# Create the range map based on our custom ecoregion
range.arcto <- get_range(obs.arcto, my.eco, "EcoRegion", res=20)
range.arcto <- get_range(obs.arcto, my.eco, "EcoRegion", res = 20)

# Plot
terra::plot(shp.lonlat, col="grey")
terra::plot(range.arcto,add=TRUE,col=rgb(0.2,1,0.2,0.5,1))
graphics::points(obs.arcto[,c("decimalLongitude","decimalLatitude")],pch=20,col="orange1",cex=1)
terra::plot(shp.lonlat, col = "grey")
terra::plot(range.arcto, add = TRUE, col = rgb(0.2,1,0.2,0.5,1))
graphics::points(obs.arcto[,c("decimalLongitude","decimalLatitude")], pch = 20, col = "orange1", cex = 1)
2 changes: 1 addition & 1 deletion inst/examples/make_tiles_help.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ shp.path <- paste0(system.file(package = "gbif.range"),"/extdata/shp_lonlat.shp"
shp.lonlat <- terra::vect(shp.path)

# Apply the function to divide the extent in ~20 fragments
mt = make_tiles(geo=shp.lonlat,Ntiles=20,sext=TRUE)
mt <- make_tiles(geo = shp.lonlat, Ntiles = 20, sext = TRUE)
Loading

0 comments on commit 421652b

Please sign in to comment.