Skip to content

Commit

Permalink
adjust xml conversion script to get the actual values..
Browse files Browse the repository at this point in the history
  • Loading branch information
Edouard-Legoupil committed Feb 9, 2024
1 parent a2dae97 commit 9e6dff4
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 21 deletions.
16 changes: 11 additions & 5 deletions dev/dev_unhcr_programme.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ ping_ollama()

## Show donors

* Who are the __main donors by country__ in terms of number of projects and/or total budget?
* Who are the __main donors by country__ in terms of transaction?

```{r function-show_donors, echo=FALSE, message=FALSE, warning=FALSE}
#' show_donors
#'
#' @description Who are the main donors by country in terms of number of projects and/or total budget?
#' @description Who are the main donors by country in terms of transaction?
#'
#' @param year year to select starting from 2016 - could be one year or a list
#' @param programme_lab A character vector corresponding to the name of the programme.
Expand All @@ -113,6 +113,8 @@ show_donors <- function(year,
ctr_name = NULL,
transaction_type_name = "Incoming Commitment" ) {
## Check year is after or equal 2016
# Check if only one argument is passed
if (!is.null(programme_lab) && !is.null(iati_identifier_ops)) {
Expand Down Expand Up @@ -186,9 +188,11 @@ show_donors <- function(year,

```{r examples-show_donors, message=FALSE, warning=FALSE, fig.retina = 2, fig.width = 8, fig.asp = 0.618, fig.align = "center", out.width = "90%"}
knitr::kable( codeTransactionType |> dplyr::select(name, description) )
knitr::kable(iati::dataTransaction |>
dplyr::select( transaction_type_name, transaction_type_description) |>
dplyr::distinct() )
show_donors(year = 2018,
show_donors(year = 2022,
programme_lab = "The Americas",
transaction_type_name = "Incoming Commitment" )
Expand Down Expand Up @@ -503,7 +507,9 @@ show_partnership <- function( year,
participating_org_type_name,
participating_org_role_name) |>
tidyr::pivot_wider(names_from = participating_org_role_name,
values_from = participating_org_eng )
values_from = participating_org_eng ,
values_fn = list) |>
dplyr::select( - Accountable)
df3 <- df |>
dplyr::select(participating_org_eng,
Expand Down
14 changes: 12 additions & 2 deletions inst/build.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,22 @@ iati_file <- file_temp()
# https://reporting.unhcr.org/files/IATI/UNHCR-Activities-2020.xml
# https://reporting.unhcr.org/files/IATI/UNHCR-Activities-2023.xml

download.file('https://reporting.unhcr.org/files/IATI/UNHCR-Activities-2022.xml',
#' list_iati <- c("http://reporting.unhcr.org/files/IATI/UNHCR-Activities-2016.xml",
#' "http://reporting.unhcr.org/files/IATI/UNHCR-Activities-2017.xml",
#' "http://reporting.unhcr.org/files/IATI/UNHCR-Activities-2018.xml",
#' "http://reporting.unhcr.org/files/IATI/UNHCR-Activities-2019.xml",
#' "http://reporting.unhcr.org/files/IATI/UNHCR-Activities-2020.xml",
#' "http://reporting.unhcr.org/files/IATI/UNHCR-Activities-2021.xml",
#' "http://reporting.unhcr.org/files/IATI/UNHCR-Activities-2022.xml",
#' "http://reporting.unhcr.org/files/IATI/UNHCR-Activities-2023.xml"
#' )

download.file('https://reporting.unhcr.org/files/IATI/UNHCR-Activities-2021.xml',
iati_file,
quiet = TRUE)

# create folder -----------------------------------------------------------
folder_name = "data-raw-2022"
folder_name = "data-raw-2021"
dir_create(folder_name)

# list tasks --------------------------------------------------------------
Expand Down
100 changes: 86 additions & 14 deletions inst/source/tasks/1_extract_data/iati_result.R
Original file line number Diff line number Diff line change
@@ -1,38 +1,110 @@
iati_result <- function(xml_iati) {
# xml subset --------------------------------------------------------------

# xml subset Extraction:--------------------------------------------------------------
# The function starts by extracting a subset of XML data related to results.
# It looks for all nodes with the name iati-activity/result/indicator using xml_find_all.
# The result is stored in the notesets_indicator variable.


notesets_indicator <- xml_iati %>%
xml_find_all('.//iati-activity/result/indicator') |>
map(xml_children)


# extract data ------------------------------------------------------------
# Data Extraction and Transformation:------------------------------------------------------------

# The function then processes each indicator node in the notesets_indicator subset.
df_result <- map_dfr(notesets_indicator, function(i) {

# For each indicator, it extracts relevant information into a tibble (data frame):
tibble(
iati_identifier = tryCatch(i|> xml_parent() |> xml_parent() |> xml_parent() |> xml_find_all("iati-identifier") |> xml_text(), error = function(e){ NA_character_}),
result_type = tryCatch(i |> xml_parent() |> xml_parent() |> xml_attr("type"), error = function(e){ NA_character_}),
result_aggregation_status = tryCatch(i |> xml_parent() |> xml_parent() |> xml_attr("aggregation-status"), error = function(e){ NA_character_}),
result_title = i |> xml_parent() |> xml_parent() |> xml_find_first("title") |> xml_text(),
result_desc = i |> xml_parent() |> xml_parent() |> xml_find_first("description") |> xml_text(),
result_indicator_measure = i |> xml_parent() |> xml_attr("measure"),
result_indicator_title = xml_text_by_name(i, ".", "title"),
result_indicator_desc = xml_text_by_name(i, ".", "description"),
# Tries to find the IATI identifier associated with the indicator.
iati_identifier = tryCatch(
i |> xml_parent() |> xml_parent() |> xml_parent() |> xml_find_all("iati-identifier") |> xml_text(),
error = function(e) {
NA_character_
}
),
# Tries to extract the type of result (output, outcome, etc.).
result_type = tryCatch(
i |> xml_parent() |> xml_parent() |> xml_attr("type"),
error = function(e) {
NA_character_
}
),
## Applies for the whole results the indicator is linked to...
result_title = i |>
xml_parent() |>
xml_parent() |>
xml_find_first("title") |>
xml_text(),




## metadata for the indicator itself..
# result_aggregation_status = tryCatch(
# i |> xml_parent() |> xml_parent() |> xml_attr("aggregation-status"),
# error = function(e) {
# NA_character_
# }
# ),
# result_ascending = tryCatch(
# i |> xml_parent() |> xml_parent() |> xml_attr("ascending"),
# error = function(e) {
# NA_character_
# }
# ),
# # result_indicator_measure = i |> xml_parent() |> xml_attr("measure"),
# result_measure = tryCatch(
# i |> xml_parent() |> xml_parent() |> xml_attr("measure"),
# error = function(e) {
# NA_character_
# }
# ),

result_indicator_title = xml_text_by_name(i, ".", "title"),
result_indicator_measure = i |> xml_parent() |> xml_attr("measure"),
result_indicator_aggregation_status = i |> xml_parent() |> xml_attr("aggregation-status"),
result_indicator_ascending = i |> xml_parent() |> xml_attr("ascending"),
result_indicator_reference_code = xml_child_attr_by_name(i, "reference", "code", 1),
result_indicator_reference_uri = xml_child_attr_by_name(i, "reference", "indicator-uri", 1),
result_indicator_reference_vocabulary = xml_child_attr_by_name(i, "reference", "vocabulary", 1),


## Check baseline
result_indicator_baseline_location_ref = xml_child_attr_by_name(i, "location", "ref"),
result_indicator_baseline_value = xml_attr_by_name(i, ".", "baseline", "value"),
result_indicator_baseline_year = xml_attr_by_name(i, ".", "baseline", "year"),
result_indicator_baseline_date = xml_attr_by_name(i, ".", "baseline", "iso-date"),
result_indicator_baseline_location_ref = xml_child_attr_by_name(i, "location", "ref"),

result_indicator_baseline_dimension_1 = xml_child_attr_by_name(i, "dimension", "name", 1),
result_indicator_baseline_dimension_value_1 = xml_child_attr_by_name(i, "dimension", "value", 1),
result_indicator_baseline_dimension_2 = xml_child_attr_by_name(i,"dimension", "name", 2),
result_indicator_baseline_dimension_2 = xml_child_attr_by_name(i, "dimension", "name", 2),
result_indicator_baseline_dimension_value_2 = xml_child_attr_by_name(i, "dimension", "value", 2),

## Check period...

## First time for the period...
result_indicator_period_start = xml_attr_by_name(i, "period-start", "period-start", "iso-date"),
result_indicator_period_end = xml_attr_by_name(i, "period-end", "period-end", "iso-date"),

## Value for target
result_indicator_target_value = xml_attr_by_name(i, "target", "target", "value"),
result_indicator_target_location_ref = xml_attr_by_name(i, "target/location", "location", "ref"),
result_indicator_target_dimension_1 = xml_attr_by_name(i, "./target/dimension", "dimension", "name", 1),
result_indicator_target_value_1 = xml_attr_by_name(i, "./target/dimension", "dimension", "value", 1),
result_indicator_target_dimension_2 = xml_attr_by_name(i, "./target/dimension", "dimension", "name", 2),
result_indicator_target_value_2 = xml_attr_by_name(i, "./target/dimension", "dimension", "value", 2)
result_indicator_target_value_2 = xml_attr_by_name(i, "./target/dimension", "dimension", "value", 2),

## Value for actual
result_indicator_actual_value = xml_attr_by_name(i, "actual", "actual", "value"),
result_indicator_actual_location_ref = xml_attr_by_name(i, "actual/location", "location", "ref"),
result_indicator_actual_dimension_1 = xml_attr_by_name(i, "./actual/dimension", "dimension", "name", 1),
result_indicator_actual_value_1 = xml_attr_by_name(i, "./actual/dimension", "dimension", "value", 1),
result_indicator_actual_dimension_2 = xml_attr_by_name(i, "./actual/dimension", "dimension", "name", 2),
result_indicator_actual_value_2 = xml_attr_by_name(i, "./actual/dimension", "dimension", "value", 2)


)

})
Expand Down

0 comments on commit 9e6dff4

Please sign in to comment.