diff --git a/.gitignore b/.gitignore index 0f899e5..a9f1c82 100644 --- a/.gitignore +++ b/.gitignore @@ -19,7 +19,7 @@ /*.Rcheck/ # RStudio files -.Rproj.user/ +.Rproj.user # produced vignettes vignettes/*.html diff --git a/R/fun_check_data.R b/R/fun_check_data.R index 1388a21..6ad4056 100644 --- a/R/fun_check_data.R +++ b/R/fun_check_data.R @@ -25,10 +25,12 @@ atl_check_data <- function(data, # get the colmumn names data_names <- colnames(data) - invisible(lapply(names_expected, function(nr) { - assertthat::assert_that(nr %in% data_names, - msg = glue::glue("atl_check_data: {nr} is + invisible( + vapply(names_expected, function(nr) { + assertthat::assert_that(nr %in% data_names, + msg = glue::glue("atl_check_data: {nr} is required but missing from data!") - ) - })) + ) + }, FUN.VALUE = TRUE) + ) } diff --git a/R/fun_filter_covariates.R b/R/fun_filter_covariates.R index 52dcff8..36130d0 100644 --- a/R/fun_filter_covariates.R +++ b/R/fun_filter_covariates.R @@ -1,15 +1,15 @@ #' Filter data by position covariates. #' -#' The atlastools function \code{atl_filter_covariates} allows convenient +#' The atlastools function \code{atl_filter_covariates} allows convenient #' filtering of a dataset by any number of logical filters. -#' This function can be used to easily filter timestamps in a range, as well as +#' This function can be used to easily filter timestamps in a range, as well as #' combine simple spatial and temporal filters. -#' It accepts a character vector of \code{R} expressions that each return a +#' It accepts a character vector of \code{R} expressions that each return a #' logical vector (i.e., \code{TRUE} or \code{FALSE}). -#' Each filtering condition is interpreted in the context of the dataset -#' supplied, and used to filter for rows that satisfy each of the filter +#' Each filtering condition is interpreted in the context of the dataset +#' supplied, and used to filter for rows that satisfy each of the filter #' conditions. -#' Users must make sure that the filtering variables exist in their dataset in +#' Users must make sure that the filtering variables exist in their dataset in #' order to avoid errors. #' #' @author Pratik R. Gupte diff --git a/R/fun_get_patch_data.R b/R/fun_get_patch_data.R index dc910fc..a0487dd 100644 --- a/R/fun_get_patch_data.R +++ b/R/fun_get_patch_data.R @@ -1,6 +1,6 @@ #' Get residence patch data. #' -#' The function \code{atl_patch_summary} can be used to extract patch-specific +#' The function \code{atl_patch_summary} can be used to extract patch-specific #' summary data such as the median coordinates, the patch duration, the distance #' travelled within the patch, the displacement within the patch, and the patch #' area. diff --git a/R/fun_make_res_patch.R b/R/fun_make_res_patch.R index 97f90ad..036645c 100644 --- a/R/fun_make_res_patch.R +++ b/R/fun_make_res_patch.R @@ -1,24 +1,24 @@ #' Construct residence patches from position data. #' -#' A cleaned movement track can be classified into residence patches using the +#' A cleaned movement track can be classified into residence patches using the #' function \code{atl_res_patch}. #' The function expects a specific organisation of the data: there should be #' at least the following columns, \code{x}, \code{y}, \code{time}, and #' \code{id}, all named in lower case, and corresponding to the coordinates, #' timestamp in the UNIX format (seconds since 1970), and the identity of the #' tracked individual. -#' The result contains only the data that was classified as a residence patch +#' The result contains only the data that was classified as a residence patch #' and removes transit between them. -#' \code{atl_res_patch} requires only three parameters: (1) the distance -#' threshold between positions (called \code{buffer_size}), (2) the distance -#' threshold between clusters of positions (called \code{lim_spat_indep}), +#' \code{atl_res_patch} requires only three parameters: (1) the distance +#' threshold between positions (called \code{buffer_size}), (2) the distance +#' threshold between clusters of positions (called \code{lim_spat_indep}), #' and (3) the time interval between clusters (called \code{lim_time_indep}). #' Clusters formed of fewer than a minimum number of positions can be excluded. #' The exclusion of clusters with few positions can help in removing bias due to -#' short stops, but if such short stops are also of interest, they can be +#' short stops, but if such short stops are also of interest, they can be #' included by reducing the \code{min_fixes} argument. -#' Position covariates such as speed may also be summarised patch-wise by -#' passing covariate names and summary functions as character vectors to the +#' Position covariates such as speed may also be summarised patch-wise by +#' passing covariate names and summary functions as character vectors to the #' \code{summary_variables} and \code{summary_functions} arguments, respectively #' . #' diff --git a/R/fun_remove_reflections.R b/R/fun_remove_reflections.R index dd7566c..76cf459 100644 --- a/R/fun_remove_reflections.R +++ b/R/fun_remove_reflections.R @@ -1,16 +1,13 @@ #' Remove reflected positions. #' -#' Remove reflections, or prolonged spikes from a movement track by identifying +#' Remove reflections, or prolonged spikes from a movement track by identifying #' the bounds and removing positions between them. -#' The important function arguments here are \code{point_angle_cutoff} ($A$), -#' \code{reflection_speed_cutoff} ($S$), and \code{est_ref_len}, the maximum -#' number of positions after the anchor point that are candidates for the end of -#' the prolonged spike. -#' If the prolonged spike ends after less than N positions, the true end point +#' The important function arguments here are \code{point_angle_cutoff} ($A$), +#' \code{reflection_speed_cutoff} ($S$). +#' If the prolonged spike ends before the last row of data, the true end point #' is used as the outer bound of the spike. -#' However, the algorithm behind this function fails when the prolonged spike -#' ends after more than N positions. Users are advised to use a liberally large -#' value of N in the \code{est_ref_len} argument; 1,000 may be appropriate. +#' If the prolonged spike does not end within the last row of data, all the +#' data are retained and a message is printed. #' #' @author Pratik R. Gupte #' @@ -22,7 +19,6 @@ #' high instantaneous speeds are considered an anomaly rather than fast transit. #' @param reflection_speed_cutoff The speed (in m/s) above which an anomaly is #' detected when combined with a high turning angle. -#' @param est_ref_len How many positions are expected to be in a reflection. #' #' @return A dataframe with reflections removed. #' @examples @@ -31,8 +27,7 @@ #' data = track_data, #' x = "x", y = "y", time = "time", #' point_angle_cutoff = A, -#' reflection_speed_cutoff = S, -#' est_ref_len = N +#' reflection_speed_cutoff = S #' ) #' } #' @export @@ -41,9 +36,8 @@ atl_remove_reflections <- function(data, y = "y", time = "time", point_angle_cutoff = 45, - reflection_speed_cutoff = 20, - est_ref_len = 1000) { - speed <- angle <- NULL + reflection_speed_cutoff = 20) { + speed_in <- speed_out <- angle <- NULL # check data atl_check_data(data, names_expected = c(x, y, time)) @@ -51,75 +45,83 @@ atl_remove_reflections <- function(data, data.table::setorderv(data, time) # get speed and angle - speed <- atl_get_speed(data, x = x, y = y, time = time) - angle <- atl_turning_angle(data, x = x, y = y, time = time) - data[, `:=`( - speed = speed, - angle = angle - )] + data$speed_in <- atl_get_speed( + data, + x = x, y = y, time = time, + type = "in" + ) + data$speed_out <- atl_get_speed( + data, + x = x, y = y, time = time, + type = "out" + ) + data$angle <- atl_turning_angle(data, x = x, y = y, time = time) # remove points that cannot be assessed # can't determine whether the last few points are reflections hence remove - data <- data[!is.na(speed) & !is.na(angle), ] + data <- data[ + !is.na(speed_in) & !is.na(speed_out) & !is.na(angle) & !is.nan(angle), + ] # prepare a vector of rows to discard vec_discard <- integer() - # identify the last point before an anomaly - anchor_point <- which(data$speed >= - reflection_speed_cutoff & - data$angle >= point_angle_cutoff)[1] - 1 + # identify the anomaly point + anchor_point <- which( + data$speed_in > reflection_speed_cutoff & + data$angle > point_angle_cutoff + )[1] # message message(glue::glue("first anchor at {anchor_point}")) - while (anchor_point < nrow(data) - 1) { + while (anchor_point < (nrow(data) - 1)) { - # the next est_ref_len subsequent points are suspect - suspect_point <- anchor_point + 1 - # find the max speed after the first anomaly, which is the blink away - # the next highest should be the blink back - suspect_speeds <- data[(suspect_point + 1):est_ref_len, speed] + # find next point with speed out > S + est_ref_end <- which(data[ + seq( + anchor_point, + nrow(data) + ), + speed_out + ] > reflection_speed_cutoff) - # drop NA here - suspect_speeds <- suspect_speeds[!is.na(suspect_speeds)] - - # get the next highest speed - nx_high_speed <- which.max(rank(suspect_speeds)) - # this gets the next highest speed, which should be the end of the - # reflection, but may also be the beginning or end of another reflection - if (suspect_speeds[nx_high_speed] < reflection_speed_cutoff) { - reflection_end <- nrow(data) - message(glue::glue("remove_reflections: reflection does not end within \\ - {est_ref_len} positions")) + # handle case where there is no end, conservatively keep all data + if (!any(est_ref_end)) { + message("the reflection does not appear to end: keeping all points") + break() } else { - reflection_end <- suspect_point + nx_high_speed + 1 # one added for safety - # message ref end - message(glue::glue("reflection ends {reflection_end}")) - } + # identify end point + est_ref_end <- anchor_point + est_ref_end - # identify rows to remove - # may be excessive but works - vec_discard <- c(vec_discard, seq(anchor_point, reflection_end)) + # print message + message(sprintf("reflection ends at %i", est_ref_end)) - # set the next anchor - next_anchor <- which(data$speed[reflection_end:nrow(data)] >= - reflection_speed_cutoff & - data$angle[reflection_end:nrow(data)] >= - point_angle_cutoff)[1] + # update discard vector + vec_discard <- c(vec_discard, seq(anchor_point, est_ref_end)) - if (is.na(next_anchor)) { - # break the loop if there's no further anomalies - break() - } else { - anchor_point <- reflection_end + next_anchor - 1 - # check for errors in order - assertthat::assert_that(anchor_point > reflection_end, - msg = glue::glue("anchor point {anchor_point} is \\ + # set the next anchor relative to the ref end + next_anchor <- which( + (data$speed_in[seq(est_ref_end, nrow(data))] > reflection_speed_cutoff) & + (data$angle[seq(est_ref_end, nrow(data))] > point_angle_cutoff) + )[1] + + # break if there are no more reflections + # must check for NA because we access the first element of an empty vec + if (any(!any(next_anchor), is.na(next_anchor))) { + message("no more reflections; ending") + break() + } else { + # the absolute next anchor + anchor_point <- est_ref_end + next_anchor + # check for errors in order + assertthat::assert_that(anchor_point > est_ref_end, + msg = glue::glue("anchor point {anchor_point} is \\ before reflection end {reflection_end}") - ) - # message - message(glue::glue("next anchor is {anchor_point}")) + ) + # message + message(glue::glue("next anchor is {anchor_point}")) + } } } diff --git a/R/fun_simple_distance.R b/R/fun_simple_distance.R index a554298..62ea86f 100644 --- a/R/fun_simple_distance.R +++ b/R/fun_simple_distance.R @@ -1,6 +1,6 @@ #' Calculate distances between successive points. #' -#' Gets the euclidean distance between consecutive points in a coordinate +#' Gets the euclidean distance between consecutive points in a coordinate #' reference system in metres, i.e., UTM systems. #' #' @param x A column name in a data.frame object that contains the numeric X or diff --git a/R/fun_smooth_data.R b/R/fun_smooth_data.R index 698fc26..c85fca4 100644 --- a/R/fun_smooth_data.R +++ b/R/fun_smooth_data.R @@ -1,6 +1,6 @@ #' Apply a median smooth to coordinates. #' -#' Applies a median smooth defined by a rolling window to the X and Y +#' Applies a median smooth defined by a rolling window to the X and Y #' coordinates of the data. #' This function \emph{modifies in place}, i.e., \emph{the results #' need not be assigned to a new data.table}. diff --git a/R/fun_thin_data.R b/R/fun_thin_data.R index dc4e0b6..a6b265d 100644 --- a/R/fun_thin_data.R +++ b/R/fun_thin_data.R @@ -1,27 +1,27 @@ #' Thin tracking data by resampling or aggregation. #' -#' Uniformly reduce data volumes with either aggregation or resampling -#' (specified by the \code{method} argument) over an interval specified in +#' Uniformly reduce data volumes with either aggregation or resampling +#' (specified by the \code{method} argument) over an interval specified in #' seconds using the \code{interval} argument. -#' Both options make two important assumptions: -#' (1) that timestamps are named `time', and +#' Both options make two important assumptions: +#' (1) that timestamps are named `time', and #' (2) all columns except the identity columns can be averaged in \code{R}. -#' While the `resample' option returns a thinned dataset with all columns from -#' the input data, the `aggregate' option drops the column \code{COVXY}, since +#' While the `resample' option returns a thinned dataset with all columns from +#' the input data, the `aggregate' option drops the column \code{COVXY}, since #' this cannot be propagated to the averaged position. -#' Both options handle the column `time' differently: while `resample' returns -#' the actual timestamp (in UNIX time) of each sample, `aggregate' returns the +#' Both options handle the column `time' differently: while `resample' returns +#' the actual timestamp (in UNIX time) of each sample, `aggregate' returns the #' mean timestamp (also in UNIX time). #' In both cases, an extra column, \code{time_agg}, is added which has a uniform -#' difference between each element corresponding to the user-defined thinning +#' difference between each element corresponding to the user-defined thinning #' interval. -#' The `aggregate' option only recognises errors named \code{VARX} and +#' The `aggregate' option only recognises errors named \code{VARX} and #' \code{VARY}, and standard deviation around each position named \code{SD}. -#' If all of these columns are not present together the function assumes there +#' If all of these columns are not present together the function assumes there #' is no measure of error, and drops those columns. -#' If there is actually no measure of error, the function simply returns the +#' If there is actually no measure of error, the function simply returns the #' averaged position and covariates in each time interval. -#' Grouping variables' names (such as animal identity) may be passed as a +#' Grouping variables' names (such as animal identity) may be passed as a #' character vector to the \code{id_columns} argument. #' #' @param data Cleaned data to aggregate. Must have a numeric column named time. diff --git a/man/atl_filter_bounds.Rd b/man/atl_filter_bounds.Rd index d5d3213..58c0cbb 100644 --- a/man/atl_filter_bounds.Rd +++ b/man/atl_filter_bounds.Rd @@ -45,12 +45,14 @@ ranges, or by an \code{sf-*POLYGON} object. } \examples{ \dontrun{ -filtered_data <- atl_filter_bounds(data = data, - x = "X", y = "Y", - x_range = c(x_min, x_max), - y_range = c(y_min, y_max), - sf_polygon = your_polygon, - remove_inside = FALSE) +filtered_data <- atl_filter_bounds( + data = data, + x = "X", y = "Y", + x_range = c(x_min, x_max), + y_range = c(y_min, y_max), + sf_polygon = your_polygon, + remove_inside = FALSE +) } } \author{ diff --git a/man/atl_filter_covariates.Rd b/man/atl_filter_covariates.Rd index 90df18f..cf86521 100644 --- a/man/atl_filter_covariates.Rd +++ b/man/atl_filter_covariates.Rd @@ -20,24 +20,40 @@ A missing filter variables \emph{will} result in an empty data frame.} A dataframe filtered using the filters specified. } \description{ -The atlastools function \code{atl_filter_covariates} allows convenient filtering of a dataset by any number of logical filters. -This function can be used to easily filter timestamps in a range, as well as combine simple spatial and temporal filters. -It accepts a character vector of \code{R} expressions that each return a logical vector (i.e., \code{TRUE} or \code{FALSE}). -Each filtering condition is interpreted in the context of the dataset supplied, and used to filter for rows that satisfy each of the filter conditions. -Users must make sure that the filtering variables exist in their dataset in order to avoid errors. +The atlastools function \code{atl_filter_covariates} allows convenient +filtering of a dataset by any number of logical filters. +This function can be used to easily filter timestamps in a range, as well as +combine simple spatial and temporal filters. +It accepts a character vector of \code{R} expressions that each return a +logical vector (i.e., \code{TRUE} or \code{FALSE}). +Each filtering condition is interpreted in the context of the dataset +supplied, and used to filter for rows that satisfy each of the filter +conditions. +Users must make sure that the filtering variables exist in their dataset in +order to avoid errors. } \examples{ \dontrun{ -night_data <- atl_filter_covariates(data = dataset, - filters = c("!inrange(hour, 6, 18)")) +night_data <- atl_filter_covariates( + data = dataset, + filters = c("!inrange(hour, 6, 18)") +) -data_in_area <- atl_filter_covariates(data = dataset, - filters = c("between(time, t_min, t_max)", - "between(x, x_min, x_max)")) -filtered_data <- atl_filter_covariates(data = data, - filters = c("NBS > 3", - "SD < 100", - "between(day, 5, 8)")) +data_in_area <- atl_filter_covariates( + data = dataset, + filters = c( + "between(time, t_min, t_max)", + "between(x, x_min, x_max)" + ) +) +filtered_data <- atl_filter_covariates( + data = data, + filters = c( + "NBS > 3", + "SD < 100", + "between(day, 5, 8)" + ) +) } } \author{ diff --git a/man/atl_get_speed.Rd b/man/atl_get_speed.Rd index 264c02f..020ac02 100644 --- a/man/atl_get_speed.Rd +++ b/man/atl_get_speed.Rd @@ -31,8 +31,9 @@ on the units of the column specified in \code{time}. \examples{ \dontrun{ data$speed_in <- atl_get_speed(data, - x = "x", y = "y", - time = "time", type = c("in")) + x = "x", y = "y", + time = "time", type = c("in") +) } } \author{ diff --git a/man/atl_median_smooth.Rd b/man/atl_median_smooth.Rd index 221392b..b7b8ce1 100644 --- a/man/atl_median_smooth.Rd +++ b/man/atl_median_smooth.Rd @@ -29,16 +29,19 @@ additional columns posID and ts, which is TIME converted to human readable POSIXct format. } \description{ -Applies a median smooth defined by a rolling window to the X and Y coordinates of the data. +Applies a median smooth defined by a rolling window to the X and Y +coordinates of the data. This function \emph{modifies in place}, i.e., \emph{the results need not be assigned to a new data.table}. } \examples{ \dontrun{ -atl_median_smooth(data = track_data, - x = "x", y = "y", - time = "time", - moving_window = 5) +atl_median_smooth( + data = track_data, + x = "x", y = "y", + time = "time", + moving_window = 5 +) } } diff --git a/man/atl_patch_summary.Rd b/man/atl_patch_summary.Rd index 8c4f006..27b8113 100644 --- a/man/atl_patch_summary.Rd +++ b/man/atl_patch_summary.Rd @@ -23,13 +23,18 @@ An object of type \code{sf} or \code{data.table} depending on which data is requested. } \description{ -The function \code{atl_patch_summary} can be used to extract patch-specific summary data such as the median coordinates, the patch duration, the distance travelled within the patch, the displacement within the patch, and the patch area. +The function \code{atl_patch_summary} can be used to extract patch-specific +summary data such as the median coordinates, the patch duration, the distance + travelled within the patch, the displacement within the patch, and the patch + area. } \examples{ \dontrun{ -patch_summary <- atl_patch_summary(patch_data = patches, - which_data = "summary", - buffer_radius = 10) +patch_summary <- atl_patch_summary( + patch_data = patches, + which_data = "summary", + buffer_radius = 10 +) } } diff --git a/man/atl_remove_reflections.Rd b/man/atl_remove_reflections.Rd index 1266485..ebc2524 100644 --- a/man/atl_remove_reflections.Rd +++ b/man/atl_remove_reflections.Rd @@ -10,8 +10,7 @@ atl_remove_reflections( y = "y", time = "time", point_angle_cutoff = 45, - reflection_speed_cutoff = 20, - est_ref_len = 1000 + reflection_speed_cutoff = 20 ) } \arguments{ @@ -28,25 +27,28 @@ high instantaneous speeds are considered an anomaly rather than fast transit.} \item{reflection_speed_cutoff}{The speed (in m/s) above which an anomaly is detected when combined with a high turning angle.} - -\item{est_ref_len}{How many positions are expected to be in a reflection.} } \value{ A dataframe with reflections removed. } \description{ -Remove reflections, or prolonged spikes from a movement track by identifying the bounds and removing positions between them. -The important function arguments here are \code{point_angle_cutoff} ($A$), \code{reflection_speed_cutoff} ($S$), and \code{est_ref_len}, the maximum number of positions after the anchor point that are candidates for the end of the prolonged spike. -If the prolonged spike ends after less than N positions, the true end point is used as the outer bound of the spike. -However, the algorithm behind this function fails when the prolonged spike ends after more than N positions. Users are advised to use a liberally large value of N in the \code{est_ref_len} argument; 1,000 may be appropriate. +Remove reflections, or prolonged spikes from a movement track by identifying +the bounds and removing positions between them. +The important function arguments here are \code{point_angle_cutoff} ($A$), +\code{reflection_speed_cutoff} ($S$). +If the prolonged spike ends before the last row of data, the true end point +is used as the outer bound of the spike. +If the prolonged spike does not end within the last row of data, all the +data are retained and a message is printed. } \examples{ \dontrun{ -filtered_data <- atl_remove_reflections(data = track_data, - x = "x", y = "y", time = "time", - point_angle_cutoff = A, - reflection_speed_cutoff = S, - est_ref_len = N) +filtered_data <- atl_remove_reflections( + data = track_data, + x = "x", y = "y", time = "time", + point_angle_cutoff = A, + reflection_speed_cutoff = S +) } } \author{ diff --git a/man/atl_res_patch.Rd b/man/atl_res_patch.Rd index da086b0..26558e4 100644 --- a/man/atl_res_patch.Rd +++ b/man/atl_res_patch.Rd @@ -50,27 +50,39 @@ data used to construct the patch. In addition, there are columns with patch summary variables. } \description{ -A cleaned movement track can be classified into residence patches using the function \code{atl_res_patch}. +A cleaned movement track can be classified into residence patches using the +function \code{atl_res_patch}. The function expects a specific organisation of the data: there should be at least the following columns, \code{x}, \code{y}, \code{time}, and \code{id}, all named in lower case, and corresponding to the coordinates, timestamp in the UNIX format (seconds since 1970), and the identity of the tracked individual. -The result contains only the data that was classified as a residence patch and removes transit between them. -\code{atl_res_patch} requires only three parameters: (1) the distance threshold between positions (called \code{buffer_size}), (2) the distance threshold between clusters of positions (called \code{lim_spat_indep}), and (3) the time interval between clusters (called \code{lim_time_indep}). +The result contains only the data that was classified as a residence patch +and removes transit between them. +\code{atl_res_patch} requires only three parameters: (1) the distance +threshold between positions (called \code{buffer_size}), (2) the distance +threshold between clusters of positions (called \code{lim_spat_indep}), +and (3) the time interval between clusters (called \code{lim_time_indep}). Clusters formed of fewer than a minimum number of positions can be excluded. -The exclusion of clusters with few positions can help in removing bias due to short stops, but if such short stops are also of interest, they can be included by reducing the \code{min_fixes} argument. -Position covariates such as speed may also be summarised patch-wise by passing covariate names and summary functions as character vectors to the \code{summary_variables} and \code{summary_functions} arguments, respectively. +The exclusion of clusters with few positions can help in removing bias due to + short stops, but if such short stops are also of interest, they can be +included by reducing the \code{min_fixes} argument. +Position covariates such as speed may also be summarised patch-wise by +passing covariate names and summary functions as character vectors to the +\code{summary_variables} and \code{summary_functions} arguments, respectively +. } \examples{ \dontrun{ -patches <- atl_res_patch(data = track_data, - buffer_radius = 10, - lim_spat_indep = 100, - lim_time_indep = 30, - min_fixes = 3, - summary_variables = c("speed"), - summary_functions = c("mean", "sd")) +patches <- atl_res_patch( + data = track_data, + buffer_radius = 10, + lim_spat_indep = 100, + lim_time_indep = 30, + min_fixes = 3, + summary_variables = c("speed"), + summary_functions = c("mean", "sd") +) } } \author{ diff --git a/man/atl_simple_dist.Rd b/man/atl_simple_dist.Rd index aafb810..2ab25c4 100644 --- a/man/atl_simple_dist.Rd +++ b/man/atl_simple_dist.Rd @@ -20,5 +20,6 @@ latitude coordinate for position data.} Returns a vector of distances between consecutive points. } \description{ -Gets the euclidean distance between consecutive points in a coordinate reference system in metres, i.e., UTM systems. +Gets the euclidean distance between consecutive points in a coordinate +reference system in metres, i.e., UTM systems. } diff --git a/man/atl_thin_data.Rd b/man/atl_thin_data.Rd index b39ce3c..cca874c 100644 --- a/man/atl_thin_data.Rd +++ b/man/atl_thin_data.Rd @@ -27,22 +27,37 @@ mean is taken.} A dataframe aggregated taking the mean over the interval. } \description{ -Uniformly reduce data volumes with either aggregation or resampling (specified by the \code{method} argument) over an interval specified in seconds using the \code{interval} argument. -Both options make two important assumptions: (1) that timestamps are named `time', and (2) all columns except the identity columns can be averaged in \code{R}. -While the `resample' option returns a thinned dataset with all columns from the input data, the `aggregate' option drops the column \code{COVXY}, since this cannot be propagated to the averaged position. -Both options handle the column `time' differently: while `resample' returns the actual timestamp (in UNIX time) of each sample, `aggregate' returns the mean timestamp (also in UNIX time). -In both cases, an extra column, \code{time_agg}, is added which has a uniform difference between each element corresponding to the user-defined thinning interval. -The `aggregate' option only recognises errors named \code{VARX} and \code{VARY}, and standard deviation around each position named \code{SD}. -If all of these columns are not present together the function assumes there is no measure of error, and drops those columns. -If there is actually no measure of error, the function simply returns the averaged position and covariates in each time interval. -Grouping variables' names (such as animal identity) may be passed as a character vector to the \code{id_columns} argument. +Uniformly reduce data volumes with either aggregation or resampling +(specified by the \code{method} argument) over an interval specified in +seconds using the \code{interval} argument. +Both options make two important assumptions: +(1) that timestamps are named `time', and +(2) all columns except the identity columns can be averaged in \code{R}. +While the `resample' option returns a thinned dataset with all columns from +the input data, the `aggregate' option drops the column \code{COVXY}, since +this cannot be propagated to the averaged position. +Both options handle the column `time' differently: while `resample' returns +the actual timestamp (in UNIX time) of each sample, `aggregate' returns the +mean timestamp (also in UNIX time). +In both cases, an extra column, \code{time_agg}, is added which has a uniform + difference between each element corresponding to the user-defined thinning +interval. +The `aggregate' option only recognises errors named \code{VARX} and +\code{VARY}, and standard deviation around each position named \code{SD}. +If all of these columns are not present together the function assumes there +is no measure of error, and drops those columns. +If there is actually no measure of error, the function simply returns the +averaged position and covariates in each time interval. +Grouping variables' names (such as animal identity) may be passed as a +character vector to the \code{id_columns} argument. } \examples{ \dontrun{ thinned_data <- atl_thin_data(data, - interval = 60, - id_columns = c("animal_id"), - method = "aggregate") + interval = 60, + id_columns = c("animal_id"), + method = "aggregate" +) } } diff --git a/man/atl_turning_angle.Rd b/man/atl_turning_angle.Rd index 1c67213..6a6b327 100644 --- a/man/atl_turning_angle.Rd +++ b/man/atl_turning_angle.Rd @@ -28,7 +28,8 @@ The turning angle is returned in degrees. \examples{ \dontrun{ data$angle <- atl_turning_angle(data, - x = "x", y = "y", time = "time") + x = "x", y = "y", time = "time" +) } } \author{ diff --git a/tests/testthat/test_remove_reflections.R b/tests/testthat/test_remove_reflections.R index e2ecb50..f1100bc 100644 --- a/tests/testthat/test_remove_reflections.R +++ b/tests/testthat/test_remove_reflections.R @@ -12,7 +12,7 @@ testthat::test_that("reflections are removed", { )] # remove NA speeds - stats::na.omit(test_data, + test_data <- stats::na.omit(test_data, cols = c("in_speed", "out_speed", "angle") ) @@ -38,6 +38,11 @@ testthat::test_that("reflections are removed", { # should return fewer elements than nrows in df testthat::expect_lte(nrow(test_output), nrow(test_data)) + # get speeds again to test extremes removed + test_output[, c("speed") := atl_get_speed(test_output, + x = "X", y = "Y", time = "Time" + )] + # no extreme speeds should remain testthat::expect_lte(max(test_output$speed, na.rm = TRUE), 0.024) @@ -58,8 +63,10 @@ testthat::test_that("reflections are removed", { angle = atlastools::atl_turning_angle(test_data) )] - test_output <- atlastools::atl_remove_reflections(test_data, - reflection_speed_cutoff = 5, + test_output <- atlastools::atl_remove_reflections( + test_data, + x = "x", y = "y", time = "time", + reflection_speed_cutoff = 3, point_angle_cutoff = 5 ) @@ -83,7 +90,9 @@ testthat::test_that("reflections do not end", { )] # remove NA speeds - stats::na.omit(test_data, cols = c("in_speed", "out_speed", "angle")) + test_data <- stats::na.omit(test_data, + cols = c("in_speed", "out_speed", "angle") + ) # remove outliers test_data <- test_data[in_speed < 0.024 & out_speed < 0.024, ] @@ -92,18 +101,16 @@ testthat::test_that("reflections do not end", { test_data <- test_data[seq(600), ] # now get output - test_output <- atlastools::atl_remove_reflections(test_data, + test_output <- atlastools::atl_remove_reflections( + test_data, reflection_speed_cutoff = 0.024, - point_angle_cutoff = 5, - est_ref_len = 500 + point_angle_cutoff = 5 ) # do tests - # should return fewer elements than nrows in df + # should return fewer or the same number of elements than nrows in df testthat::expect_lte(nrow(test_output), nrow(test_data)) - # should remove points with y coordinates less than 60 - # this is still true even though the reflection does not end - # in this subset - testthat::expect_lt(max(test_output$y), 1.02) + # removed test checking for removed speeds + # the conservative approach is to keep data })