diff --git a/CITATION.cff b/CITATION.cff index c4b77eb1..3f20f0ff 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,19 +1,22 @@ -# ----------------------------------------------------------- -# CITATION file created with {cffr} R package, v0.5.0 +# -------------------------------------------- +# CITATION file created with {cffr} R package # See also: https://docs.ropensci.org/cffr/ -# ----------------------------------------------------------- +# -------------------------------------------- cff-version: 1.2.0 message: 'To cite package "spatsoc" in publications use:' type: software license: GPL-3.0-only title: 'spatsoc: Group Animal Relocation Data by Spatial and Temporal Relationship' -version: 0.2.2 +version: 0.2.4.9000 +identifiers: +- type: doi + value: 10.32614/CRAN.package.spatsoc abstract: Detects spatial and temporal groups in GPS relocations (Robitaille et al. - (2019) ). It can be used to convert GPS relocations - to gambit-of-the-group format to build proximity-based social networks In addition, - the randomizations function provides data-stream randomization methods suitable - for GPS data. + (2019) ). It can be used to convert GPS + relocations to gambit-of-the-group format to build proximity-based social networks + In addition, the randomizations function provides data-stream randomization methods + suitable for GPS data. authors: - family-names: Robitaille given-names: Alec L. @@ -63,11 +66,10 @@ references: url: https://www.R-project.org/ authors: - name: R Core Team - location: - name: Vienna, Austria - year: '2023' institution: name: R Foundation for Statistical Computing + address: Vienna, Austria + year: '2024' version: '>= 3.4' - type: software title: adehabitatHR @@ -79,7 +81,8 @@ references: given-names: Clement - family-names: Fortmann-Roe given-names: contributions from Scott - year: '2023' + year: '2024' + doi: 10.32614/CRAN.package.adehabitatHR version: '>= 0.4.21' - type: software title: data.table @@ -88,13 +91,24 @@ references: url: https://r-datatable.com repository: https://CRAN.R-project.org/package=data.table authors: + - family-names: Barrett + given-names: Tyson + email: t.barrett88@gmail.com - family-names: Dowle given-names: Matt email: mattjdowle@gmail.com - family-names: Srinivasan given-names: Arun email: asrini@pm.me - year: '2023' + - family-names: Gorecki + given-names: Jan + - family-names: Chirico + given-names: Michael + - family-names: Hocking + given-names: Toby + orcid: https://orcid.org/0000-0002-3146-0865 + year: '2024' + doi: 10.32614/CRAN.package.data.table version: '>= 1.10.5' - type: software title: igraph @@ -128,7 +142,8 @@ references: given-names: Kirill email: kirill@cynkra.com orcid: https://orcid.org/0000-0002-1416-3412 - year: '2023' + year: '2024' + doi: 10.32614/CRAN.package.igraph - type: software title: sf abstract: 'sf: Simple Features for R' @@ -140,18 +155,18 @@ references: given-names: Edzer email: edzer.pebesma@uni-muenster.de orcid: https://orcid.org/0000-0001-8049-7069 - year: '2023' + year: '2024' + doi: 10.32614/CRAN.package.sf - type: software title: stats abstract: 'R: A Language and Environment for Statistical Computing' notes: Imports authors: - name: R Core Team - location: - name: Vienna, Austria - year: '2023' institution: name: R Foundation for Statistical Computing + address: Vienna, Austria + year: '2024' - type: software title: units abstract: 'units: Measurement Units for R Vectors' @@ -172,7 +187,8 @@ references: given-names: IƱaki email: iucar@fedoraproject.org orcid: https://orcid.org/0000-0001-6403-5550 - year: '2023' + year: '2024' + doi: 10.32614/CRAN.package.units - type: software title: asnipe abstract: 'asnipe: Animal Social Network Inference and Permutations for Ecologists' @@ -182,7 +198,8 @@ references: - family-names: Farine given-names: Damien R. email: dfarine@ab.mpg.de - year: '2023' + year: '2024' + doi: 10.32614/CRAN.package.asnipe - type: software title: knitr abstract: 'knitr: A General-Purpose Package for Dynamic Report Generation in R' @@ -194,7 +211,8 @@ references: given-names: Yihui email: xie@yihui.name orcid: https://orcid.org/0000-0003-0645-5666 - year: '2023' + year: '2024' + doi: 10.32614/CRAN.package.knitr - type: software title: markdown abstract: 'markdown: Render Markdown with ''commonmark''' @@ -210,7 +228,8 @@ references: given-names: JJ - family-names: Horner given-names: Jeffrey - year: '2023' + year: '2024' + doi: 10.32614/CRAN.package.markdown - type: software title: rmarkdown abstract: 'rmarkdown: Dynamic Documents for R' @@ -253,7 +272,8 @@ references: given-names: Richard email: rich@posit.co orcid: https://orcid.org/0000-0003-3925-190X - year: '2023' + year: '2024' + doi: 10.32614/CRAN.package.rmarkdown - type: software title: testthat abstract: 'testthat: Unit Testing for R' @@ -264,5 +284,7 @@ references: - family-names: Wickham given-names: Hadley email: hadley@posit.co - year: '2023' + year: '2024' + doi: 10.32614/CRAN.package.testthat version: '>= 2.1.0' + diff --git a/CRAN-RELEASE b/CRAN-RELEASE deleted file mode 100644 index 4fa820f4..00000000 --- a/CRAN-RELEASE +++ /dev/null @@ -1,2 +0,0 @@ -This package was submitted to CRAN on 2021-02-23. -Once it is accepted, delete this file and tag the release (commit 66b5ba0). diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION deleted file mode 100644 index 93970dd9..00000000 --- a/CRAN-SUBMISSION +++ /dev/null @@ -1,3 +0,0 @@ -Version: 0.2.2 -Date: 2023-09-07 20:22:33 UTC -SHA: c3ec2a5725d30b8576e48cb0a3e2928def2320cf diff --git a/DESCRIPTION b/DESCRIPTION index b1d1f70c..fff9c703 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: spatsoc Title: Group Animal Relocation Data by Spatial and Temporal Relationship -Version: 0.2.3 +Version: 0.2.4.9000 Authors@R: c( person("Alec L.", "Robitaille", , "robit.alec@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-4706-1762")), @@ -38,6 +38,6 @@ VignetteBuilder: knitr Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 SystemRequirements: GDAL (>= 2.0.1), GEOS (>= 3.4.0), PROJ (>= 4.8.0), sqlite3 diff --git a/Makefile b/Makefile deleted file mode 100644 index 203e4a2e..00000000 --- a/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -### Makefile for spatsoc -# Alec Robitaille - -# Website -../spatsoc.gitlab.io/public/index.html: DESCRIPTION README.Rmd README.md vignettes/* _pkgdown.yml - Rscript --vanilla -e "pkgdown::build_site(pkg = '../spatsoc', lazy = FALSE)" - diff --git a/NAMESPACE b/NAMESPACE index 1ac2b55a..db671366 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(build_polys) export(dyad_id) export(edge_dist) export(edge_nn) +export(fusion_id) export(get_gbi) export(group_lines) export(group_polys) diff --git a/NEWS.md b/NEWS.md index 8691c0fa..7645d780 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# v 0.2.4.9000 + +* (experimental) `fusion_id` function for flexibly identifying fission-fusion +events [PR 78](https://github.com/ropensci/spatsoc/pull/78) + + # v 0.2.3 * Fix/replace igraph clusters with components [PR 61](https://github.com/ropensci/spatsoc/pull/61) diff --git a/R/fusion_id.R b/R/fusion_id.R new file mode 100644 index 00000000..461a4cbd --- /dev/null +++ b/R/fusion_id.R @@ -0,0 +1,190 @@ +#' Fission-fusion events +#' +#' \code{fusion_id} identifies fusion events in distance based edge lists. +#' The function accepts a distance based edge list generated by +#' \code{edge_dist}, a threshold argument and arguments controlling how fusion +#' events are defined. +#' +#' The \code{edges} must be a \code{data.table} returned by the \code{edge_dist} +#' function. In addition, \code{fusion_id} requires a dyad ID set on the edge +#' list generated by \code{dyad_id}. If your data is a \code{data.frame}, you +#' can convert it by reference using +#' \code{\link[data.table:setDT]{data.table::setDT}}. +#' +#' The \code{threshold} must be provided in the units of the coordinates. The +#' \code{threshold} must be larger than 0. The coordinates must be planar +#' coordinates (e.g.: UTM). In the case of UTM, a \code{threshold} = 50 would +#' indicate a 50 m distance threshold. +#' +#' The \code{n_min_length} argument defines the minimum number of successive +#' fixes that are required to establish a fusion event. The \code{n_max_missing} +#' argument defines the the maximum number of allowable missing observations for +#' either individual in a dyad within a fusion event. The \code{allow_split} +#' argument defines if a single observation can be greater than the threshold +#' distance without initiating fission event. +#' +#' @return \code{fusion_id} returns the input \code{edges} appended with a +#' \code{fusionID} column. +#' +#' This column represents the fusion event id. As with \code{spatsoc}'s +#' grouping functions, the actual value of \code{fusionID} is arbitrary and +#' represents the identity of a given fusion event. If the data was reordered, +#' the \code{fusionID} may change, but the membership of each fusion event +#' would not. +#' +#' A message is returned when a column named \code{fusionID} already exists in +#' the input \code{edges}, because it will be overwritten. +#' +#' +#' @param edges distance based edge list generated by \code{edge_dist} function, +#' with dyad ID generated by \code{dyad_ID} +#' @param threshold spatial distance threshold in the units of the projection +#' @param n_min_length minimum length of fusion events +#' @param n_max_missing maximum number of missing observations within a fusion +#' event +#' @param allow_split boolean defining if a single observation can be greater +#' than the threshold distance without initiating fission event +#' @export +#' @seealso \code{\link{edge_dist}} +#' @references +#' See examples of identifying fission-fusion events with spatiotemporal data: +#' * +#' * +#' * +#' @examples +#' +#' # Load data.table +#' library(data.table) +#' \dontshow{data.table::setDTthreads(1)} +#' +#' # Read example data +#' DT <- fread(system.file("extdata", "DT.csv", package = "spatsoc")) +#' +#' # Cast the character column to POSIXct +#' DT[, datetime := as.POSIXct(datetime, tz = 'UTC')] +#' +#' # Temporal grouping +#' group_times(DT, datetime = 'datetime', threshold = '20 minutes') +#' +#' # Edge list generation +#' edges <- edge_dist( +#' DT, +#' threshold = 100, +#' id = 'ID', +#' coords = c('X', 'Y'), +#' timegroup = 'timegroup', +#' returnDist = TRUE, +#' fillNA = TRUE +#' ) +#' +#' dyad_id(edges, 'ID1', 'ID2') +#' +#' fusion_id( +#' edges = edges, +#' threshold = 100, +#' n_min_length = 1, +#' n_max_missing = 0, +#' allow_split = FALSE +#' ) +fusion_id <- function(edges = NULL, + threshold = 50, + n_min_length = 0, + n_max_missing = 0, + allow_split = FALSE) { + + # due to NSE notes in R CMD check + . <- both_rleid <- distance <- dyadID <- fusionID <- tg_diff <- timegroup <- within_rleid <- NULL + + if (is.null(edges)) { + stop('input edges required') + } + + stopifnot('dyadID' %in% colnames(edges)) + stopifnot('timegroup' %in% colnames(edges)) + stopifnot('distance' %in% colnames(edges)) + + stopifnot(is.numeric(threshold)) + stopifnot(is.numeric(n_min_length)) + stopifnot(is.numeric(n_max_missing)) + stopifnot(is.logical(allow_split)) + + stopifnot(threshold >= 0) + + unique_edges <- unique(edges[, .(dyadID, timegroup, distance)]) + + setorder(unique_edges, 'timegroup') + + # Check if edge distance less than threshold + unique_edges[, within := distance < threshold] + + # If allow split, check if previously within threshold, and + # timegroup difference between before and after is only 1 + if (allow_split) { + unique_edges[, within := data.table::fifelse( + within | timegroup == min(timegroup), + within, + data.table::shift(within, -1) & + data.table::shift(within, 1) & + timegroup - data.table::shift(timegroup, 1) == 1 + ), by = dyadID] + } + + # Runs of within + unique_edges[, within_rleid := data.table::rleid(within), by = dyadID] + unique_edges[!(within), within_rleid := NA_integer_] + + # Check timegroup difference, unless first obs for dyad + unique_edges[, tg_diff := data.table::fifelse( + within, + timegroup - data.table::shift(timegroup, 1) <= 1 | + timegroup == min(timegroup), + NA + ), by = dyadID] + + # If missing obs allowed, adjust timegroup difference to allow as long as + # previously within threshold distance + if (n_max_missing > 0) { + unique_edges[, tg_diff := data.table::fifelse( + tg_diff, + tg_diff, + data.table::shift(within, 1) & + (timegroup - data.table::shift(timegroup, 1)) <= + (1 + n_max_missing) + ), by = dyadID] + } + + # Get runs on within and timegroup difference. Adjust if runs of isolated + # observations together (eg. within T, T but timegroup diff F, F) + unique_edges[(within), both_rleid := data.table::rleid(within_rleid, tg_diff), by = dyadID] + unique_edges[(within) & !(tg_diff), + both_rleid := (both_rleid + seq.int(.N)) * -1, + by = dyadID] + + # Correct if (looking forward) the loc is part of a new fusion run + unique_edges[, both_rleid := data.table::fifelse( + timegroup - data.table::shift(timegroup, -1) == -1 & + within & !(tg_diff), + data.table::shift(both_rleid, -1), + both_rleid + ), by = dyadID] + + # If n minimum length > 0, check nrows and return NA if less than min + if (n_min_length > 0) { + unique_edges[!is.na(both_rleid), both_rleid := data.table::fifelse( + .N >= n_min_length, + both_rleid, + NA_integer_), + by = .(dyadID, both_rleid)] + } + + # Set fusion id on runs and dyad id + unique_edges[!is.na(both_rleid), fusionID := .GRP, by = .(dyadID, both_rleid)] + + # Merge fusion id onto input edges + if ('fusionID' %in% colnames(edges)) { + message('fusionID column will be overwritten by this function') + data.table::set(edges, j = 'fusionID', value = NULL) + } + edges[unique_edges, fusionID := fusionID, on = .(timegroup, dyadID)] + return(edges) +} diff --git a/codemeta.json b/codemeta.json index 574ff201..7b61cb03 100644 --- a/codemeta.json +++ b/codemeta.json @@ -4,17 +4,17 @@ "identifier": "spatsoc", "description": "Detects spatial and temporal groups in GPS relocations (Robitaille et al. (2019) ). It can be used to convert GPS relocations to gambit-of-the-group format to build proximity-based social networks In addition, the randomizations function provides data-stream randomization methods suitable for GPS data.", "name": "spatsoc: Group Animal Relocation Data by Spatial and Temporal Relationship", - "relatedLink": ["https://docs.ropensci.org/spatsoc/", "https://CRAN.R-project.org/package=spatsoc"], + "relatedLink": "https://docs.ropensci.org/spatsoc/", "codeRepository": "https://github.com/ropensci/spatsoc", "issueTracker": "https://github.com/ropensci/spatsoc/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "0.2.2", + "version": "0.2.4.9000", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.3.1 (2023-06-16)", + "runtimePlatform": "R version 4.4.1 (2024-06-14)", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -190,7 +190,7 @@ }, "SystemRequirements": "GDAL (>= 2.0.1), GEOS (>= 3.4.0), PROJ (>= 4.8.0),\n sqlite3" }, - "fileSize": "1868.342KB", + "fileSize": "1881.983KB", "citation": [ { "@type": "ScholarlyArticle", @@ -223,15 +223,5 @@ } } } - ], - "releaseNotes": "https://github.com/ropensci/spatsoc/blob/master/NEWS.md", - "readme": "https://github.com/ropensci/spatsoc/blob/main/README.md", - "contIntegration": ["https://app.codecov.io/gh/ropensci/spatsoc", "https://github.com/ropensci/spatsoc/actions/workflows/R-CMD-check.yaml"], - "developmentStatus": "https://www.repostatus.org/", - "review": { - "@type": "Review", - "url": "https://github.com/ropensci/software-review/issues/237", - "provider": "https://ropensci.org" - }, - "keywords": ["r", "spatial", "social", "gps", "animal", "network", "r-package", "rstats"] + ] } diff --git a/man/fusion_id.Rd b/man/fusion_id.Rd new file mode 100644 index 00000000..27641225 --- /dev/null +++ b/man/fusion_id.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fusion_id.R +\name{fusion_id} +\alias{fusion_id} +\title{Fission-fusion events} +\usage{ +fusion_id( + edges = NULL, + threshold = 50, + n_min_length = 0, + n_max_missing = 0, + allow_split = FALSE +) +} +\arguments{ +\item{edges}{distance based edge list generated by \code{edge_dist} function, +with dyad ID generated by \code{dyad_ID}} + +\item{threshold}{spatial distance threshold in the units of the projection} + +\item{n_min_length}{minimum length of fusion events} + +\item{n_max_missing}{maximum number of missing observations within a fusion +event} + +\item{allow_split}{boolean defining if a single observation can be greater +than the threshold distance without initiating fission event} +} +\value{ +\code{fusion_id} returns the input \code{edges} appended with a +\code{fusionID} column. + +This column represents the fusion event id. As with \code{spatsoc}'s +grouping functions, the actual value of \code{fusionID} is arbitrary and +represents the identity of a given fusion event. If the data was reordered, +the \code{fusionID} may change, but the membership of each fusion event +would not. + +A message is returned when a column named \code{fusionID} already exists in +the input \code{edges}, because it will be overwritten. +} +\description{ +\code{fusion_id} identifies fusion events in distance based edge lists. +The function accepts a distance based edge list generated by +\code{edge_dist}, a threshold argument and arguments controlling how fusion +events are defined. +} +\details{ +The \code{edges} must be a \code{data.table} returned by the \code{edge_dist} +function. In addition, \code{fusion_id} requires a dyad ID set on the edge +list generated by \code{dyad_id}. If your data is a \code{data.frame}, you +can convert it by reference using +\code{\link[data.table:setDT]{data.table::setDT}}. + +The \code{threshold} must be provided in the units of the coordinates. The +\code{threshold} must be larger than 0. The coordinates must be planar +coordinates (e.g.: UTM). In the case of UTM, a \code{threshold} = 50 would +indicate a 50 m distance threshold. + +The \code{n_min_length} argument defines the minimum number of successive +fixes that are required to establish a fusion event. The \code{n_max_missing} +argument defines the the maximum number of allowable missing observations for +either individual in a dyad within a fusion event. The \code{allow_split} +argument defines if a single observation can be greater than the threshold +distance without initiating fission event. +} +\examples{ + +# Load data.table +library(data.table) +\dontshow{data.table::setDTthreads(1)} + +# Read example data +DT <- fread(system.file("extdata", "DT.csv", package = "spatsoc")) + +# Cast the character column to POSIXct +DT[, datetime := as.POSIXct(datetime, tz = 'UTC')] + +# Temporal grouping +group_times(DT, datetime = 'datetime', threshold = '20 minutes') + +# Edge list generation +edges <- edge_dist( + DT, + threshold = 100, + id = 'ID', + coords = c('X', 'Y'), + timegroup = 'timegroup', + returnDist = TRUE, + fillNA = TRUE + ) + +dyad_id(edges, 'ID1', 'ID2') + +fusion_id( + edges = edges, + threshold = 100, + n_min_length = 1, + n_max_missing = 0, + allow_split = FALSE + ) +} +\references{ +See examples of identifying fission-fusion events with spatiotemporal data: +\itemize{ +\item \url{doi:10.1111/ele.12457} +\item \url{doi:10.1016/j.anbehav.2018.03.014} +\item \url{doi:10.1890/08-0345.1} +} +} +\seealso{ +\code{\link{edge_dist}} +} diff --git a/man/spatsoc.Rd b/man/spatsoc.Rd index 84fd40e8..31afc909 100644 --- a/man/spatsoc.Rd +++ b/man/spatsoc.Rd @@ -3,7 +3,6 @@ \docType{package} \name{spatsoc} \alias{spatsoc} -\alias{_PACKAGE} \alias{spatsoc-package} \title{spatsoc} \description{ diff --git a/tests/testthat/test-fusion-id.R b/tests/testthat/test-fusion-id.R new file mode 100644 index 00000000..3b4ca4d3 --- /dev/null +++ b/tests/testthat/test-fusion-id.R @@ -0,0 +1,123 @@ +context("test-fusion-id") + +library(spatsoc) + +DT <- fread('../testdata/DT.csv') + +group_times(DT, datetime = 'datetime', threshold = '20 minutes') + +edges <- edge_dist( + DT, + threshold = 100, + id = 'ID', + coords = c('X', 'Y'), + timegroup = 'timegroup', + returnDist = TRUE, + fillNA = TRUE + ) + +dyad_id(edges, 'ID1', 'ID2') + +test_that('edges is required', { + expect_error(fusion_id(), + 'input edges required') +}) + + +test_that('columns are required otherwise error detected', { + expect_error(fusion_id( + edges = edges[, .SD, .SDcols = -'dyadID'] + )) + expect_error(fusion_id( + edges = edges[, .SD, .SDcols = -'timegroup'] + )) + expect_error(fusion_id( + edges = edges[, .SD, .SDcols = -'distance'] + )) +}) + +test_that('arguments are correctly provided or error detected', { + expect_error(fusion_id( + edges = edges, + threshold = NULL + )) + expect_error(fusion_id( + edges = edges, + threshold = -42 + )) + expect_error(fusion_id( + edges = edges, + threshold = "potato" + )) + + expect_error(fusion_id( + edges = edges, + n_min_length = 'potato' + )) + + expect_error(fusion_id( + edges = edges, + n_max_missing = 'potato' + )) + expect_error(fusion_id( + edges = edges, + allow_split = 'potato' + )) + expect_error(fusion_id( + edges = edges, + allow_split = 42 + )) +}) + +test_that('returns a data.table', { + expect_s3_class(fusion_id( + edges = edges, + threshold = 50 + ), 'data.table') +}) + +test_that('returns a numeric fusionID column', { + edges[, fusionID := NULL] + expect_contains(colnames(fusion_id( + edges = edges, + threshold = 50 + )), 'fusionID') + expect_type(edges$fusionID, 'integer') +}) + +test_that('message if fusionID column already present, overwritten', { + fusionID_present <- copy(edges)[, fusionID := 42] + expect_message(fusion_id( + edges = fusionID_present, + threshold = 50 + ), 'fusionID column will be overwritten by this function') +}) + + + +test_that('allow_split TRUE returns less unique fusionID', { + # When splits are allowed, more fusion events are combined + # resulting in less unique fusion events + expect_lt( + fusion_id(edges, allow_split = TRUE)[, uniqueN(fusionID)], + fusion_id(edges, allow_split = FALSE)[, uniqueN(fusionID)] + ) +}) + +test_that('larger n_max_missing returns less unique fusionID', { + # When a larger n max missing is provided, more fusion events are combined + # resulting in less unique fusion events + expect_lt( + fusion_id(edges, n_max_missing = 3)[, uniqueN(fusionID)], + fusion_id(edges, n_max_missing = 0)[, uniqueN(fusionID)] + ) +}) + +test_that('larger n_min_length returns less unique fusionID', { + # When a larger n min length is provided, less fusion events are identified + # resulting in less unique fusion events + expect_lt( + fusion_id(edges, n_min_length = 3)[, uniqueN(fusionID)], + fusion_id(edges, n_min_length = 0)[, uniqueN(fusionID)] + ) +})