Style vignettes

epiverse-trace · Aug 2, 2024 · f9000b1 · f9000b1
1 parent 86c6d3f
commit f9000b1
Show file tree

Hide file tree

Showing 9 changed files with 84 additions and 56 deletions.
diff --git a/R/download_demographic.R b/R/download_demographic.R
@@ -5,7 +5,7 @@
 #' and Dwelling Census (CNPV) of 2018.
 #'
 #' @param dataset character with the demographic dataset name. Please use
-#' \code{list_datasets("demographic", "EN")} or 
+#' \code{list_datasets("demographic", "EN")} or
 #' \code{list_datasets("demographic", "ES")} to check available datasets.
 #'
 #' @examples

diff --git a/R/download_geospatial.R b/R/download_geospatial.R
@@ -38,7 +38,7 @@
 #' @return \code{data.frame} object with downloaded data.
 #'
 #' @export
-download_geospatial <- function(spatial_level, simplified = TRUE, 
+download_geospatial <- function(spatial_level, simplified = TRUE,
                                 include_geom = TRUE, include_cnpv = TRUE) {
   checkmate::assert_logical(simplified)
   checkmate::assert_logical(include_geom)
@@ -47,17 +47,17 @@ download_geospatial <- function(spatial_level, simplified = TRUE,
     "At least one of the groups (`geom` and/or `cnpv`)
             must be TRUE" = any(include_geom, include_cnpv)
   )
-  
+
   dataset <- retrieve_geospatial_name(spatial_level)
   dataset_path <- retrieve_path(dataset)
-  
-  if(simplified){
+
+  if (simplified) {
     dataset_path <- sub("\\.gpkg$", "_SIM.gpkg", dataset_path)
   }
   geospatial_data <- sf::st_read(dataset_path, quiet = TRUE)
   geospatial_vars <- c("area", "latitud", "longitud")
   shape_vars <- c("shape_length", "shape_area")
-  
+
   if (include_geom && !include_cnpv) {
     last_base_index <- which(colnames(geospatial_data) == "longitud")
     geospatial_data <- geospatial_data %>%

diff --git a/R/merge_geo_demographic.R b/R/merge_geo_demographic.R
@@ -55,9 +55,9 @@ merge_geo_demographic <- function(demographic_dataset, simplified = TRUE) {
         names_from = dplyr::all_of(column),
         values_from = dplyr::all_of(total_col)
       )
-    geospatial <- suppressMessages(download_geospatial("department", 
-                                                       simplified = simplified,
-                                                       include_cnpv = FALSE
+    geospatial <- suppressMessages(download_geospatial("department",
+      simplified = simplified,
+      include_cnpv = FALSE
     ))
     merged_data <- merge(geospatial, filtered_df,
       by.x = "codigo_departamento",
@@ -72,9 +72,9 @@ merge_geo_demographic <- function(demographic_dataset, simplified = TRUE) {
         names_from = dplyr::all_of(column),
         values_from = dplyr::all_of(total_col)
       )
-    geospatial <- suppressMessages(download_geospatial("municipality", 
-                                                       simplified = simplified,
-                                                       include_cnpv = FALSE
+    geospatial <- suppressMessages(download_geospatial("municipality",
+      simplified = simplified,
+      include_cnpv = FALSE
     ))
     merged_data <- merge(geospatial, filtered_df,
       by.x = "codigo_municipio",

diff --git a/vignettes/climate_data.Rmd b/vignettes/climate_data.Rmd
@@ -51,6 +51,7 @@ knitr::kable(IDEAM_tags)
 Each observation is subject to the availability of stations in the ROI and the stations' status (active, maintenance or suspended), as well as quality filters implemented by IDEAM.
 
 In this vignette you will learn: 
+
 1. How to download climate data using **ColOpenData**.
 2. How to aggregate climate data by different frequencies 
 3. How to plot downloaded climate data
@@ -106,7 +107,7 @@ We can make a first exploration to check if there are any stations contained ins
 :::
 
 ```{r stations in roi}
-stations <- stations_in_roi(roi)
+stations <- stations_in_roi(geometry = roi)
 
 head(stations)
 ```
@@ -160,27 +161,37 @@ To plot a time series of the stations' data we can use `ggplot()` function from
 ```{r plot temperatures stations}
 ggplot(data = tssm_stations) +
   geom_line(aes(x = date, y = value, group = station), color = "#106ba0") +
-  ggtitle("Max Temperature in Espinal by station") +
+  ggtitle("Dry-bulb Temperature in Espinal by station") +
   xlab("Date") +
   ylab("Temperature [°C]") +
+  facet_grid(rows = vars(station)) +
   theme_minimal() +
-  facet_grid(rows = vars(station))
+  theme(
+    plot.background = element_rect(fill = "white", colour = "white"),
+    panel.background = element_rect(fill = "white", colour = "white"),
+    plot.title = element_text(hjust = 0.5)
+  )
 ```
 
 ::: {style="text-align: justify;"}
 As we can see, only one station has data for the selected period. However, by having the data measured by hours, we cannot easily observe changes in the temperature patterns along time. To aid this issue, we will use the aggregation function `aggregate_climate()`, which aggregates climate data by time. This function takes by parameter the desired aggregation.
 :::
 
 ```{r plot monthly}
-tssm_month <- tssm_stations %>% aggregate_climate("month")
+tssm_month <- tssm_stations %>% aggregate_climate(frequency = "month")
 
 ggplot(data = tssm_month) +
   geom_line(aes(x = date, y = value, group = station), color = "#106ba0") +
-  ggtitle("Dry-bulb Temperature") +
+  ggtitle("Dry-bulb Temperature in Espinal by station") +
   xlab("Date") +
   ylab("Dry-bulb temperature [C]") +
+  facet_grid(rows = vars(station)) +
   theme_minimal() +
-  facet_grid(rows = vars(station))
+  theme(
+    plot.background = element_rect(fill = "white", colour = "white"),
+    panel.background = element_rect(fill = "white", colour = "white"),
+    plot.title = element_text(hjust = 0.5)
+  )
 ```
 ## Other methods
 ::: {style="text-align: justify;"}
@@ -200,7 +211,7 @@ tssm_roi <- download_climate_geom(
   start_date = "2013-01-01",
   end_date = "2016-12-31",
   tag = "TSSM_CON"
-) %>% aggregate_climate("month")
+) %>% aggregate_climate(frequency = "month")
 ```
 
 To make the download process even easier, and avoid the creation of already known geometries like municipalities or departments, **ColOpenData** offers an extra function to download data using the areas' DIVIPOLA code.
@@ -230,7 +241,7 @@ tssm_mpio <- download_climate(
   start_date = "2013-01-01",
   end_date = "2016-12-31",
   tag = "TMX_CON"
-) %>% aggregate_climate("month")
+) %>% aggregate_climate(frequency = "month")
 ```
 
 ## Disclaimer

diff --git a/vignettes/demographic_data.Rmd b/vignettes/demographic_data.Rmd
@@ -51,7 +51,7 @@ First, we have to access the demographic documentation, to check available datas
 :::
 
 ```{r documentation, echo =TRUE}
-datasets_dem <- list_datasets("demographic", "EN")
+datasets_dem <- list_datasets(module = "demographic", language = "EN")
 
 head(datasets_dem)
 ```
@@ -63,7 +63,7 @@ After checking the documentation, we can load the data we want to work with. To
 ### Data load
 
 ```{r data load, echo=TRUE}
-public_services_d <- download_demographic("DANE_CNPVV_2018_8VD")
+public_services_d <- download_demographic(dataset = "DANE_CNPVV_2018_8VD")
 
 head(public_services_d)
 ```
@@ -124,19 +124,9 @@ ggplot(proportions_wss, aes(
   ) +
   theme_minimal() +
   theme(
+    plot.background = element_rect(fill = "white", colour = "white"),
+    panel.background = element_rect(fill = "white", colour = "white"),
     axis.text.x = element_text(angle = 45, hjust = 1),
     plot.title = element_text(hjust = 0.5)
   )
 ```
-
-## Merge geospatial and demographic
-
-::: {style="text-align: justify;"}
-Geospatial and demographic data can be merged based on the spatial aggregation level. While geospatial data can be aggregated down to the block level, demographic data is typically available only at the department and municipality levels.
-
-To download a merged dataset, we only will need the name of the desired demographic dataset, being careful about the requested level of aggregation. We would use the function `merge_geo_demographic()` using the dataset name as a parameter.s
-:::
-
-```{r merge geo dem, eval = FALSE}
-merged_data <- merge_geo_demographic("DANE_CNPVV_2018_8VD")
-```
diff --git a/vignettes/documentation_and_dictionaries.Rmd b/vignettes/documentation_and_dictionaries.Rmd
@@ -114,13 +114,15 @@ library(ColOpenData)
 
 ```{r list datasets}
 datasets <- list_datasets(language = "EN")
+
 head(datasets)
 ```
 
 To list only demographic datasets we can use:
 
 ```{r list demographic datasets}
 demographic_datasets <- list_datasets(module = "demographic", language = "EN")
+
 head(demographic_datasets)
 ```
 
@@ -143,16 +145,20 @@ Sometimes, going through each dataset to find specific information can be tiring
 
 ```{r list datasets with information by age}
 age_datasets <- look_up(keywords = "age")
+
 head(age_datasets)
 ```
 
 We can specify a module to make a more narrow and precise search.
 
 ```{r list datasets with information by area and sex in demographic module}
 area_sex_datasets <- look_up(
-  keywords = c("area", "sex"), module = "demographic",
-  logic = "and"
+  keywords = c("area", "sex"),
+  module = "demographic",
+  logic = "and",
+  language = "EN"
 )
+
 head(area_sex_datasets)
 ```
 
@@ -163,7 +169,11 @@ Datasets inside the geospatial module contain a summarized version of the census
 :::
 
 ```{r dictionary for MGNCNPV at municipalities}
-dict_mpio <- geospatial_dictionary("municipality", "EN")
+dict_mpio <- geospatial_dictionary(
+  spatial_level = "municipality",
+  language = "EN"
+)
+
 head(dict_mpio)
 ```
 
@@ -174,7 +184,8 @@ Climate data is not stored in multiple datasets but as an unique dataset with nu
 :::
 
 ```{r dicionary for climate data}
-dict_climate <- get_climate_tags()
+dict_climate <- get_climate_tags(language = "EN")
+
 head(dict_climate)
 ```
 
@@ -194,15 +205,18 @@ To get the DIVIPOLA code of a municipality or department we can use the auxiliar
 :::
 
 ```{r cordoba}
-name_to_code_dep("Guajira")
+name_to_code_dep(department_name = "Guajira")
 ```
 
 ::: {style="text-align: justify;"}
 To retrieve a municipality code we must include the department name and the municipality name. This is to consider repetition among municipalities' names across departments.
 :::
 
 ```{r divipola tunja}
-name_to_code_mun("Boyacá", "Tunja")
+name_to_code_mun(
+  department_name = "Boyacá",
+  municipality_name = "Tunja"
+)
 ```
 
 ::: {style="text-align: justify;"}
@@ -212,5 +226,5 @@ On the other hand, departments' and municipalities' codes can be translated to r
 :::
 
 ```{r tunja name}
-code_to_name_mun("15001")
+code_to_name_mun(municipality_code = "15001")
 ```
diff --git a/vignettes/geospatial_data.Rmd b/vignettes/geospatial_data.Rmd
@@ -103,7 +103,7 @@ To understand which column contains the internet related information, we will ne
 :::
 
 ```{r dictionary for urban sections}
-dict <- geospatial_dictionary("dpto", "EN")
+dict <- geospatial_dictionary(spatial_level = "dpto", language = "EN")
 
 head(dict)
 ```
@@ -127,6 +127,8 @@ ggplot(data = internet_cov) +
   geom_sf(mapping = aes(fill = internet), color = NA) +
   theme_minimal() +
   theme(
+    plot.background = element_rect(fill = "white", colour = "white"),
+    panel.background = element_rect(fill = "white", colour = "white"),
     panel.grid = element_blank(),
     axis.text = element_blank(),
     axis.ticks = element_blank()

diff --git a/vignettes/merge_geo_demographic.Rmd b/vignettes/merge_geo_demographic.Rmd
@@ -37,26 +37,25 @@ library(ggplot2)
 ::: {style="text-align: justify;"}
 Geospatial and demographic data can be merged based on the spatial aggregation level (SAL). While geospatial data can be aggregated down to the block level, demographic data is typically available only at the department and municipality levels. Therefore, these are the only SAL that can be accessed in both types of data for merging.
 
-Now, the `merge_geo_demographic()` function only takes by parameter the demographic dataset of interest. Therefore, we should first access the demographic documentation to know which dataset we want to work with. Let's suppose we want to select a dataset at the department level.
+Now, the `merge_geo_demographic()` function takes as a  parameter the demographic dataset of interest. Therefore, we should first access the demographic documentation to know which dataset we want to work with. Let's suppose we want to select a dataset at the department level. We can load all demographic available datasets and then filter the level by the desired SAL.
 :::
 
 ```{r documentation, echo =TRUE}
-# Available demographic datasets
 datasets_dem <- list_datasets("demographic", "EN")
 
-# Available demographic datasets at the department level
 department_datasets <- datasets_dem[datasets_dem["level"] == "department", ]
+
 head(department_datasets)
 ```
 
 ::: {style="text-align: justify;"}
-After reviewing the available datasets, we can select the one we wish to work with and take a closer look. For instance, let's suppose we choose the dataset "DANE_CNPVPD_2018_14BPD".
+After reviewing the available datasets, we can select the one we wish to work with and take a closer look. For instance, let's suppose we choose the dataset ["DANE_CNPVPD_2018_14BPD"]{.underline}.
 :::
 
 ```{r data, echo =TRUE}
-# Download demographic dataset
-chosen_data <- download_demographic("DANE_CNPVPD_2018_14BPD")
-head(chosen_data)
+chosen_dataset <- download_demographic("DANE_CNPVPD_2018_14BPD")
+
+head(chosen_dataset)
 ```
 
 ::: {style="text-align: justify;"}
@@ -66,34 +65,44 @@ The `simplified` argument downloads a simplified version of the geometries. This
 :::
 
 ```{r merge data, echo =TRUE}
-# Download demographic dataset
-merged_data <- merge_geo_demographic("DANE_CNPVPD_2018_14BPD")
+merged_data <- merge_geo_demographic(
+  demographic_dataset =
+    "DANE_CNPVPD_2018_14BPD"
+)
+
 head(merged_data)
 ```
 
 ::: {style="text-align: justify;"}
 `merged_data` presents geospatial information related to departments, as well as the information related to the health service attended by the population. We can use this dataset to visualize the proportion of people in each department who used home remedies for health issues. To achieve this, we will calculate the proportion by dividing the count of people who reported using home remedies ("uso_remedios_caseros") by the total count of people who reported experiencing a health problem in each department.
 :::
 
-```{r plot}
-# Calculate the proportion
+```{r mutate}
 merged_data <- merged_data %>%
   mutate(proportion_home_remedies = uso_remedios_caseros /
     total_personas_que_tuvieron_alguna_enfermedad)
+```
 
-# Plot the proportion by department
+::: {style="text-align: justify;"}
+We can now plot the results
+:::
+
+```{r plot}
 ggplot(data = merged_data) +
   geom_sf(mapping = aes(fill = proportion_home_remedies), color = "white") +
   theme_minimal() +
   theme(
+    plot.background = element_rect(fill = "white", colour = "white"),
+    panel.background = element_rect(fill = "white", colour = "white"),
     panel.grid = element_blank(),
     axis.text = element_blank(),
-    axis.ticks = element_blank()
+    axis.ticks = element_blank(),
+    plot.title = element_text(hjust = 0.5)
   ) +
   scale_fill_gradient("Count", low = "#10bed2", high = "#deff00") +
   ggtitle(
     label = "Proportion of people who reported using home remedies to treat
     a health problem",
     subtitle = "Colombia"
   )
-```
+```
diff --git a/vignettes/population_projections.Rmd b/vignettes/population_projections.Rmd
@@ -122,6 +122,8 @@ ggplot(female_groups, aes(
   ) +
   theme_minimal() +
   theme(
+    plot.background = element_rect(fill = "white", colour = "white"),
+    panel.background = element_rect(fill = "white", colour = "white"),
     axis.text.x = element_text(angle = 45, hjust = 1),
     plot.title = element_text(hjust = 0.5)
   )