Skip to content

Commit

Permalink
Export and document dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
ccamara committed Jun 30, 2024
1 parent 043a6b9 commit ccf9ea5
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 40 deletions.
10 changes: 5 additions & 5 deletions R/boundaries_get.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
#' retrieves the requested geographical boundaries in the form of a sf object.
#'
#' @param boundary a string containing ... Accepted values are:
#' `r levels(data_urls$boundary)`
#' `r levels(ons_boundaries$boundary)`
#' @param year a number containing...
#' @param detail_level a string defining the level of detail in the geometry.
#' Accepted values are: `r levels(data_urls$resolution)`. Each value
#' Accepted values are: `r levels(ons_boundaries$detail_level)`. Each value
#' corresponds to:
#'
#' - Full Extent (BFE) – Full resolution boundaries go to the Extent of the Realm (Low Water Mark) and are the most detailed of the boundaries.
Expand Down Expand Up @@ -44,11 +44,11 @@ boundaries_get <- function(boundary, year = NULL, detail_level = "BUC") {
class = "error_not_single_string"
)
}
if (!boundary %in% levels(data_urls$boundary)) {
if (!boundary %in% levels(ons_boundaries$boundary)) {
cli::cli_abort(
paste(
"`boundary` must be one of these values:",
levels(data_urls$boundary)
levels(ons_boundaries$boundary)
),
class = "error_boundary_not_valid"
)
Expand All @@ -70,7 +70,7 @@ boundaries_get <- function(boundary, year = NULL, detail_level = "BUC") {

lookup <- paste(boundary, year, detail_level, sep = "_")

url <- data_urls$url_download[data_urls$id == lookup]
url <- ons_boundaries$url_download[ons_boundaries$id == lookup]

if (length(url) == 0) {
cli::cli_abort(
Expand Down
26 changes: 26 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#' ONS Boundaries
#'
#' Metadata about a subset of the geographical boundaries from [ONS'
#' geoportal](https://geoportal.statistics.gov.uk/) that can be downloaded by
#' `{ukgeographies}`.
#' These boundaries have been retrieved from the services provided by
#' [ONS' geoportal API](https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/) and
#' automatically categorised from their URLS, to
#' match Geoportal's menus and datasets.
#'
#' @format
#' A data frame with `r nrow(ons_boundaries)` rows and `r ncol(ons_boundaries)`
#' columns:
#' \describe{
#' \item{id}{A unique identifier for every boundary}
#' \item{service}{URL pointing to the API service}
#' \item{boundary_type}{Type of boundary, according to ONS' classification}
#' \item{boundary}{Boundary (short) name, according to ONS' naming}
#' \item{detail_level}{Boundary's level of detail. For a detailed description of the methodology refer to [Digital boundaries](https://www.ons.gov.uk/methodology/geography/geographicalproducts/digitalboundaries) }
#' \item{year}{Year in which the boundaries were created}
#' \item{url_download}{URL querying the API service to return all features as a geojson file}
#'
#' }
#'
#' @source <https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/>
"ons_boundaries"
Binary file removed R/sysdata.rda
Binary file not shown.
74 changes: 40 additions & 34 deletions data-raw/data-urls.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,33 +23,31 @@ services <- ons_geoportal |>

# Build the dataframe -----------------------------------------------------

data_urls <- as.data.frame(services) |>
ons_data <- as.data.frame(services) |>
rename(service = services) |>
# Remove Mapserver
filter(str_detect(services, "/FeatureServer")) |>
filter(str_detect(service, "/FeatureServer")) |>
# Convert absolute URLS
mutate(services = paste0(geoportal_base_url, services)) |>
mutate(type = case_when(
str_detect(services, "Lookup") ~ "Lookup",
)) |>
mutate(service = paste0(geoportal_base_url, service)) |>
# Infer categories from titles
mutate(
boundary = case_when(
str_detect(services, "Combined_Authorities") ~ "CAUTH",
str_detect(services, "/Counties_and_Unitary_Authorities") ~ "CTYUA",
str_detect(services, "/Counties_") ~ "CTY",
str_detect(services, "/Countries_") ~ "CTRY",
str_detect(services, "/County_Electoral_Division") ~ "CED",
str_detect(services, "/Local_Authority_Districts") ~ "LAD",
str_detect(services, "/Local_Planning_Authorities") ~ "LPA",
str_detect(services, "/Metropolitan_Counties") ~ "MCTY",
str_detect(services, "/Parishes_and_Non_Civil_Parished_Areas") ~ "PARNCP",
str_detect(services, "/Parishes") ~ "PAR",
str_detect(services, "/Regions") ~ "RGN",
str_detect(services, "/Upper_Tier") ~ "UTLA",
str_detect(services, "/Wards") ~ "WD",
str_detect(services, "/Lower_Layer") ~ "LSOA",
str_detect(services, "/Middle_Layer") ~ "MSOA",
str_detect(services, "/Output_Areas") ~ "OA",
str_detect(service, "Combined_Authorities") ~ "CAUTH",
str_detect(service, "/Counties_and_Unitary_Authorities") ~ "CTYUA",
str_detect(service, "/Counties_") ~ "CTY",
str_detect(service, "/Countries_") ~ "CTRY",
str_detect(service, "/County_Electoral_Division") ~ "CED",
str_detect(service, "/Local_Authority_Districts") ~ "LAD",
str_detect(service, "/Local_Planning_Authorities") ~ "LPA",
str_detect(service, "/Metropolitan_Counties") ~ "MCTY",
str_detect(service, "/Parishes_and_Non_Civil_Parished_Areas") ~ "PARNCP",
str_detect(service, "/Parishes") ~ "PAR",
str_detect(service, "/Regions") ~ "RGN",
str_detect(service, "/Upper_Tier") ~ "UTLA",
str_detect(service, "/Wards") ~ "WD",
str_detect(service, "/Lower_Layer") ~ "LSOA",
str_detect(service, "/Middle_Layer") ~ "MSOA",
str_detect(service, "/Output_Areas") ~ "OA",
),
boundary = as.factor(boundary)
) |>
Expand All @@ -66,25 +64,33 @@ data_urls <- as.data.frame(services) |>
) |>
relocate(boundary_type, .before = boundary) |>
mutate(
resolution = case_when(
str_detect(services, "_BFC") ~ "BFC",
str_detect(services, "_BFE") ~ "BFE",
str_detect(services, "_BGC") ~ "BGC",
str_detect(services, "_BUC") ~ "BUC"
detail_level = case_when(
str_detect(service, "_BFC") ~ "BFC",
str_detect(service, "_BFE") ~ "BFE",
str_detect(service, "_BGC") ~ "BGC",
str_detect(service, "_BUC") ~ "BUC"
),
detail_level = as.factor(resolution)
detail_level = as.factor(detail_level)
) |>
mutate(
year = str_extract(services, "_(19|20)(\\d){2}"),
year = str_extract(service, "_(19|20)(\\d){2}"),
year = as.numeric(str_remove(year, "_"))
) |>
mutate(
type = case_when(
str_detect(service, "Lookup") ~ "Lookup",
!is.na(boundary) ~ "Boundary",
),
type = as.factor(type)
)

ons_boundaries <- ons_data |>
filter(type == "Boundary") |>
select(-type) |>
# Create URL to query featureserver and return a geojson file.
mutate(url_download = paste0(services, "/0/query?where=1%3D1&outFields=*&outSR=4326&f=json")) |>
#
# data_boundaries <- data_urls |>
# filter(!is.na(boundary)) |>
mutate(url_download = paste0(service, "/0/query?where=1%3D1&outFields=*&outSR=4326&f=json")) |>
# Create unique id
mutate(id = paste(boundary, year, detail_level, sep = "_")) |>
relocate(id)

usethis::use_data(data_urls, overwrite = TRUE, internal = TRUE)
usethis::use_data(ons_boundaries, overwrite = TRUE)
Binary file added data/ons_boundaries.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion man/boundaries_get.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions man/ons_boundaries.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit ccf9ea5

Please sign in to comment.