Skip to content

Commit

Permalink
Merge pull request #42 from ecohealthalliance/feature/obfuscate_gps
Browse files Browse the repository at this point in the history
Feature/obfuscate gps
  • Loading branch information
collinschwantes authored Aug 1, 2024
2 parents 9196f68 + 9dfe2c9 commit 63a26d8
Show file tree
Hide file tree
Showing 6 changed files with 324 additions and 2 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: ohcleandat
Type: Package
Title: One Health Data Cleaning and Quality Checking Package
Version: 0.2.6
Version: 0.3.0
Authors@R: c(
person("Collin", "Schwantes", email = "schwantes@ecohealthalliance.org", role = c("cre", "aut"), comment = c(ORCID = "0000-0003-4014-4896")),
person("Johana", "Teigen", email = "teigen@ecohealthalliance.org", role = "aut", comment = c(ORCID = "0000-0002-6209-2321")),
Expand All @@ -14,7 +14,7 @@ Description: This package provides useful functions to orchestrate analytics and
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.3.2
RoxygenNote: 7.3.2.9000
Suggests:
knitr,
rmarkdown
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@ export(dropbox_upload)
export(get_dropbox_val_logs)
export(get_odk_form_schema)
export(get_odk_responses)
export(get_precision)
export(get_species_letter)
export(guess_col_type)
export(id_checker)
export(make_report_urls)
export(obfuscate_gps)
export(obfuscate_lat)
export(obfuscate_lon)
export(othertext_lookup)
export(read_excel_all_sheets)
export(read_googlesheets)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# ohcleandat 0.3.0

* Adding GPS obfuscation function - this function uses two methods to reduce the
accuracy of GPS points. The first is adding some amount of error to the measurement
from a user defined random uniform distribution. The second is by rounding to
remove precision from the measurement.
202 changes: 202 additions & 0 deletions R/obfuscate_gps.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#' Obfuscate GPS
#'
#' This function fuzzes gps points by first adding
#' error then rounding to a certain number of digits.
#'
#'
#' @param x Numeric. Vector of gps points
#' @param precision Integer. Number of digits to keep. See `round` for more
#' details
#' @param fuzz Numeric. Positive number indicating how much error to introduce
#' to the gps measurements. This is used to generate the random uniform
#' distribution `runif(1,min = -fuzz, max = fuzz)`
#' @param type Character. One of "lat" or "lon"
#'
#' @return Numeric. A vector of fuzzed and rounded GPS points
#' @export
#'
#' @examples
#'
#' # make data
#' gps_data <- data.frame(lat = c(1.0001, 10.22223, 4.00588),
#' lon = c(2.39595, 4.506930, -60.09999901))
#'
#' # Default obfuscation settings correspont to roughly a 27 by 27 km area
#' gps_data$lat |>
#' obfuscate_gps(type = "lat")
#'
#' # Obfuscation can be made more or less precise by changing the number of
#' # decimal points included or modifying the amount of fuzz (error)
#' # introduced
#' gps_data$lon |>
#' obfuscate_gps(precision = 4, fuzz = 0.002, type = "lon")
#'
#' ### working at the poles
#' gps_data_poles <- data.frame(lat = c(89.0001, 89.22223, -89.8881),
#' lon = c(2.39595, 4.506930, -60.09999901))
#'
#'
#' gps_data_poles$lat |>
#' obfuscate_gps(fuzz = 1, type = "lat")
#'
#'
#' ### working at the 180th meridian
#' gps_data_180 <- data.frame(lat = c(2, 3, 4),
#' lon = c(179.39595, -179.506930, -178.09999901))
#' gps_data_180$lon |>
#' obfuscate_gps(fuzz = 1, type = "lon")
#'
#' ### GPS is on the fritz!
#' \dontrun{
#' gps_data_fritz <- data.frame(lat = c(91, -91, 90),
#' lon = c(181.0001, -181.9877, -178.09999901))
#' gps_data_fritz$lon |>
#' obfuscate_gps(fuzz = 1, type = "lon")
#'
#' gps_data_fritz$lat |>
#' obfuscate_gps(fuzz = 1, type = "lat")
#' }
#'
obfuscate_gps <- function(x, precision = 2, fuzz = 0.125, type = c("lat","lon")){

## max precision in your data
# find value in x with most decimal points
data_precision <- get_precision(x,func = min)

msg_data_precision <- sprintf("The data have a max precision of: %s",data_precision)
message(msg_data_precision)

## warning for max shift is fuzz+1e-precision
max_shift <- fuzz+10^-(precision/2)
msg_max_shift <- sprintf("The max shift from the combination of precision and fuzz is: %s degrees",max_shift)
message(msg_max_shift)

if(max_shift/fuzz > 2){
message("The majority of the obfuscation is coming from rounding, this
potentially makes re-identification easier")
}

## check if obfuscation will have an impact on the data



type <- match.arg(type, c("lat","lon"))

if(type == "lat"){
out <- obfuscate_lat(x,precision,fuzz)
}

if(type == "lon"){
out <- obfuscate_lon(x,precision,fuzz)
}

return(out)
}

obfuscate_point <- function(x, precision = 2, fuzz = 0.125){

# fuzz point
gps_error <- stats::runif(1,min = -fuzz, max = fuzz)
x_fuzz <- x + gps_error

# round to 2 decimal points
out <- round(x_fuzz,digits = precision)
return(out)
}


#' Obfuscates latitude data
#'
#' @rdname obfuscate_gps
#'
#' @return Numeric vector
#' @export
#'
obfuscate_lat <- function(x, precision = 2, fuzz = 0.125){

## check that fuzz doesnt exceed maximum values
if(fuzz > 90){
stop("fuzz greater than range of latitude on earth")
}

if(any(x > 90 | x < -90)){
stop("Latitude is outside the range of latitude on earth")
}

points <- obfuscate_point(x,precision,fuzz)

# make sure point is between 90 and -90
points_in_range <- purrr::map_dbl(points,function(point){
while(all(point > 90 | point < -90)){
point <- obfuscate_point(point,precision,fuzz)
}
return(point)
})

return(points_in_range)
}


#' Obfuscates longitude data
#'
#' @rdname obfuscate_gps
#'
#' @return Numeric vector
#' @export
#'
obfuscate_lon <- function(x, precision = 2, fuzz = 0.125){

## check that fuzz doesnt exceed maximum values
if(fuzz > 180){
stop("fuzz greater than range of longitude on earth")
}

if(any(x > 180 | x < -180)){
stop("Longitude is outside the range of longitude on earth ")
}

points <- obfuscate_point(x,precision,fuzz)

### wrap points near the 180th meridian
points_in_range <- purrr::map_dbl(points,function(point){

# if point greater than 180, wrap
if(point > 180){
difference <- point - 180
point <- -180 + difference
}

# if point less than -180, wrap
if(point < -180){
difference <- -180-point
point <- 180 - difference
}

return(point)
})

return(points_in_range)
}

#' Get Precision
#'
#' @param x Numeric. Vector of gps points
#' @param func Function. Apply some function to the vector of precisions. Default is c so that
#' all values are returned
#'
#' @return output of func - likely a vector
#' @export
#' @author Nathan Layman
#'
#' @examples
#'
#' x <- c(1,100,1.11)
#' get_precision(x,func = min)
#'
#'
get_precision <- function(x,func = c) {
# number of characters with the decimal - number of chacters without it
precision <- 10^-(nchar(gsub("\\.", "", as.character(x))) - nchar(as.character(trunc(x))))
out <- func(precision)
return(out)
}
30 changes: 30 additions & 0 deletions man/get_precision.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

80 changes: 80 additions & 0 deletions man/obfuscate_gps.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 63a26d8

Please sign in to comment.