diff --git a/globalprep/lsp/v2023/1_prep_wdpa_rast.html b/globalprep/lsp/v2023/1_prep_wdpa_rast.html new file mode 100644 index 00000000..f9ec98d7 --- /dev/null +++ b/globalprep/lsp/v2023/1_prep_wdpa_rast.html @@ -0,0 +1,2183 @@ + + + + +
+ + + + + + + + + +
+
OHI Science | Citation policy
+
terra::resterize()
+(fasterize()
doesn’t work with SpatRaster
+objects) and save to disk using terra::writeRaster()
(this
+was formerly done with a function defined in the document:
+writeRasterBlocks()
- as of now, it has not been updated to
+use terra
functions and objects.)View lsp_data_prep.Rmd for full description of Lasting Special Places +data preparation.
+Accessing and downloading the data was difficult in 2023 due to a +downloading bug, luckly there are multiple ways to download the data +from the webpage. The below directions sound easy; but it is easy to be +navigated to a page where the download functionality is broken.
+Directions to download data:
+1: Link to specific website: https://www.protectedplanet.net/en/thematic-areas/wdpa?tab=WDPA
+2: Select the download button in the top right hand corner.
+3: Download and unzip the file
+4: There will be additional zip files within the zip file you +download. Once unzipped, these are the three files you will use +throughout the LSP dataprep.
+The WDPA-MPA dataset comes as a shapefile or geodatabase in WGS84 +coordinate reference system.
+Once the polygons have been prepped, we rasterize the results to 500 +m resolution.
+This process is all done in the script:
+1_prep_wdpa_rast.Rmd
. After that is complete, move on to
+computing zonal statistics.
2021 Changed data source to 2021 The source data has now been split +into 3 different files, so we will need to merge all three shapefiles +together, before we can work with the data.
+We updated the script to save 3 separate files for the reordering and +transforming, for ease of use and reproducibility.
+Read in the polygons from the WDPA dataset; filter as needed.
+# create path objects for the 3 different zip files downloaded from source
+<- file.path(dir_data, 'WDPA_Jun2023_Public_shp_0', 'WDPA_Jun2023_Public_shp-polygons')
+ shp_raw_0 <- file.path(dir_data, 'WDPA_Jun2023_Public_shp_1', 'WDPA_Jun2023_Public_shp-polygons')
+ shp_raw_1 <- file.path(dir_data, 'WDPA_Jun2023_Public_shp_2', 'WDPA_Jun2023_Public_shp-polygons')
+ shp_raw_2
+# read shape files in as sf objects
+<- st_read(dsn = dirname(shp_raw_0),
+ wdpa_poly_0 layer = basename(shp_raw_0),
+ stringsAsFactors = FALSE)
+
+
+ <- st_read(dsn = dirname(shp_raw_1),
+ wdpa_poly_1 layer = basename(shp_raw_1),
+ stringsAsFactors = FALSE)
+
+
+ <- st_read(dsn = dirname(shp_raw_2),
+ wdpa_poly_2 layer = basename(shp_raw_2),
+ stringsAsFactors = FALSE)
+
+# put all shape files into a list
+<- list(wdpa_poly_0, wdpa_poly_1, wdpa_poly_2) wdpa_list
Test and inspect the raw data to ensure quality
+# inspect status column to find unique values:
+# "Designated" "Inscribed" "Proposed" "Not Reported" "Established" "Adopted"
+$STATUS %>% unique()
+ wdpa_poly_0
+# check for non-mpa program in management plan column, we want them to be empty
+<- wdpa_poly_0 %>%
+ x_0 filter(str_detect(tolower(MANG_PLAN), 'non-mpa program'))
+
+<- wdpa_poly_1 %>%
+ x_1 filter(str_detect(tolower(MANG_PLAN), 'non-mpa program'))
+
+<- wdpa_poly_2 %>%
+ x_2 filter(str_detect(tolower(MANG_PLAN), 'non-mpa program'))
+
+# List of data frames
+<- list(x_0 = x_0, x_1 = x_1, x_2 = x_2)
+ df_list
+# Iterate over the list of data frames
+for(i in names(df_list)) {
+# Check if the data frame is empty
+ if(nrow(df_list[[i]]) == 0) {
+ print(paste(i, "is empty (good)"))
+ else {
+ } print(paste(i, "is not empty (bad)"))
+
+ }
+ }
+# remove uneeded objects
+rm(df_list, x_0, x_1, x_2)
# create a function to run over list of sf objects
+<- function(wdpa_poly_object) {
+ tidy_wdpa_data
+<- wdpa_poly_object %>%
+ DF setNames(tolower(names(.))) %>% #improve?
+ ::select(wdpaid, name, orig_name,
+ dplyr
+ desig, desig_eng, desig_type,
+ iucn_cat,
+ marine, no_take, no_tk_area,
+ status, status_yr,
+ mang_auth, mang_plan, verif,%>%
+ sub_loc, parent_iso, iso3) ::mutate(status_yr = as.integer(status_yr))
+ dplyr
+ <- DF[DF$status == 'Designated', ]
+ DF <- DF[!str_detect(tolower(DF$mang_plan), 'non-mpa program'), ]
+ DF
+ return(DF)
+
+ }
+# run function over the list
+<- lapply(wdpa_list, tidy_wdpa_data)
+ wdpa_poly_list
+# check to see if it worked, we should have 19 columns and fewer observations
+<- wdpa_poly_list[[1]]
+ test
+# remove test for memory
+rm(test)
# now we need to unlist them, and write them to the appropriate folder
+<- wdpa_poly_list[[1]]
+ wdpa_poly_fix_0 <- wdpa_poly_list[[2]]
+ wdpa_poly_fix_1 <- wdpa_poly_list[[3]]
+ wdpa_poly_fix_2
+# created filepaths for the files, make sure their names alligne with your year's dates
+<- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_ordered_0') # replace month and year with the appropriate month and year
+ shp_reorder_0 <- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_ordered_1') # replace month and year with the appropriate month and year
+ shp_reorder_1 <- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_ordered_2') # replace month and year with the appropriate month and year
+ shp_reorder_2
+ # write the shapefile to the raw_data Mazu folder, warning suppressed, takes about 20 minutes.
+suppressWarnings(st_write(wdpa_poly_fix_0,
+dsn = dirname(shp_reorder_0),
+ layer = basename(shp_reorder_0),
+ driver = 'ESRI Shapefile'))
+
+ suppressWarnings(st_write(wdpa_poly_fix_1,
+dsn = dirname(shp_reorder_1),
+ layer = basename(shp_reorder_1),
+ driver = 'ESRI Shapefile'))
+
+ suppressWarnings(st_write(wdpa_poly_fix_2,
+dsn = dirname(shp_reorder_2),
+ layer = basename(shp_reorder_2),
+ driver = 'ESRI Shapefile'))
+
+ #clean up memory
+rm('wdpa_poly_list', "wdpa_list", "wdpa_poly_0", "wdpa_poly_1", "wdpa_poly_2", "wdpa_poly_fix_0", "wdpa_poly_fix_1", "wdpa_poly_fix_2")
+gc()
Transform ordered polygons to Mollweide and save as new polygons.
+# file paths for shape files we just wrote to Mazu
+<- file.path(dir_data, 'shps', 'WDPA_Jun2022_shp_ordered_0')
+ shp_reorder_0 <- file.path(dir_data, 'shps', 'WDPA_Jun2022_shp_ordered_1')
+ shp_reorder_1 <- file.path(dir_data, 'shps', 'WDPA_Jun2022_shp_ordered_2')
+ shp_reorder_2
+# read in those shape files as sf objects to ensure they were written correctly and to use below
+<- st_read(dsn = dirname(shp_reorder_0),
+ wdpa_poly_0 layer = basename(shp_reorder_0),
+ stringsAsFactors = FALSE)
+ <- st_read(dsn = dirname(shp_reorder_1),
+ wdpa_poly_1 layer = basename(shp_reorder_1),
+ stringsAsFactors = FALSE)
+ <- st_read(dsn = dirname(shp_reorder_2),
+ wdpa_poly_2 layer = basename(shp_reorder_2),
+ stringsAsFactors = FALSE)
+
+# put the sf objects in a list
+<- list(wdpa_poly_0, wdpa_poly_1, wdpa_poly_2)
+ wdpa_list
+# create function to run over list and change the CRS to Mollweide
+<- function(wdpa_poly_object) {
+ change_crs
+message('Spatial transforming WDPA polygons to Mollweide')
+
+<- st_transform(wdpa_poly_object,
+ DF crs = '+proj=moll +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs')
+
+ return(DF)
+
+ }
+# run function over list of sf objects
+<- lapply(wdpa_list, change_crs)
+ wdpa_poly_list
+# unlist the items
+<- wdpa_poly_list[[1]]
+ wdpa_poly_fix_0 <- wdpa_poly_list[[2]]
+ wdpa_poly_fix_1 <- wdpa_poly_list[[3]]
+ wdpa_poly_fix_2
+# validate that the CRS has changed successfully
+st_crs(wdpa_poly_fix_0)
+
+# set file path for writing, be sure to change these to the correct titles for saving
+<- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_xformed_0') # replace month and year with the appropriate month and year
+ shp_xformed_0 <- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_xformed_1') # replace month and year with the appropriate month and year
+ shp_xformed_1 <- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_xformed_2') # replace month and year with the appropriate month and year
+ shp_xformed_2
+# write the transformed files to Mazu, this will likely take ~7 mins per file, look at the file size on Mazu to check if it is done writing
+st_write(wdpa_poly_fix_0, dsn = dirname(shp_xformed_0), layer = basename(shp_xformed_0),
+driver = 'ESRI Shapefile', update = TRUE)
+ st_write(wdpa_poly_fix_1, dsn = dirname(shp_xformed_1), layer = basename(shp_xformed_1),
+driver = 'ESRI Shapefile', update = TRUE)
+ st_write(wdpa_poly_fix_2, dsn = dirname(shp_xformed_2), layer = basename(shp_xformed_2),
+driver = 'ESRI Shapefile', update = TRUE)
terra::rasterize()
2023: Here we switch to using the terra package to turn the vector sf +objects into rasters. Terra is the most modern package for raster +managment and rasterization and there was work in previous years to +transition from older packages to the Terra package. To find details of +this transition, look at previous notebooks, this notebook has been +cleaning and organized to minimize the visibility of that transition and +to streamline the work of future OHI fellows.
+2022: terra::rasterize()
is used with two
+SpatRaster
objects as input, followed by
+terra::writeRaster()
.
2021: fasterize()
from the fasterize
+package takes advantage of Simple Features objects from the
+sf
package, rather than objects from the sp
+package. It is considerably faster; it returned a completed raster in
+ten minutes. However, saving the very large (18GB) resulting raster
+proved problematic. The writeRasterBlocks()
function
+defined above helped get around that problem though still took over an
+hour to write the raster to disk.
# destination filepath for all the new raster file
+<- file.path(dir_goal_anx, 'rast', 'wdpa_2023_moll_500m.tif')
+ rast_wdpa_file
+# function to create raster file or skip process if raster file is present
+if(!file.exists(rast_wdpa_file)) {
+
+ # file paths for vector data created earlier
+ # replace month and year with the appropriate month and year
+ <- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_xformed_0.shp')
+ shp_xformed_file_0 <- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_xformed_1.shp')
+ shp_xformed_file_1 <- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_xformed_2.shp')
+ shp_xformed_file_2
+# create time stamp
+ <- proc.time()
+ ptm
+# read in data as SpatVectors with terra
+ <- vect(shp_xformed_file_0)
+ wdpa_poly_0 <- vect(shp_xformed_file_1)
+ wdpa_poly_1 <- vect(shp_xformed_file_2)
+ wdpa_poly_2
+ # bind them all together into one object
+ <- rbind(wdpa_poly_0, wdpa_poly_1, wdpa_poly_2)
+ wdpa_poly_all
+ # report time: ~25s to read in each file
+ cat('elapsed: ', (proc.time() - ptm)[3])
+
+ # clear memory
+ rm("wdpa_poly_0", "wdpa_poly_1", "wdpa_poly_2")
+ gc()
+
+# read in base raster file used in OHI
+ <- terra::rast(file.path(dir_M, 'git-annex/globalprep/spatial/d2014',
+ rast_base 'data/rgn_mol_raster_500m',
+ 'rgn_inland1km_mol_500mcell.tif'))
+
+ # time stamp
+ <- proc.time()
+ ptm
+ # convert vector object into raster object
+ # for overlapping polygons, use the oldest (minimum) status year for that area: fun = "min"
+ <- terra::rasterize(wdpa_poly_all,
+ rast_wdpa
+ rast_base, field = 'status_yr',
+ fun = 'min')
+
+ # time stamp report
+ cat('rasterize elapsed: ', (proc.time() - ptm)[3])
+
+ # 2021 fasterize: 45.006 seconds!
+ # 2022 terra::rasterize: 876 seconds
+ # 2023 terra::rasterize: 454 seconds
+
+# time stamp
+ <- proc.time()
+ ptm
+ # must create new folder and rast subfolder at destination for this to work
+ <- writeRaster(rast_wdpa, rast_wdpa_file)
+ x
+ # report time stamp: ~76 seconds
+ message('writeRaster elapsed: ', (proc.time() - ptm)[3])
+ }
# you are encouraged to use this space to examing the raster created from the WDPA data
+
+# read in this year and last year's data
+<- terra::rast(rast_wdpa_file)
+ check_current <- terra::rast(file.path("/home/shares/ohi/git-annex/globalprep/lsp/v2022/rast/wdpa_2022_moll_500m.tif"))
+ check_past
+# visualize both rasters
+plot(check_current, col = 'blue')
+plot(check_past, col = 'blue')
Compare shapefile 2021 v 2022.
+library(sf)
+library(raster)
+
+# file paths for previous data as SF
+<- file.path('/home/shares/ohi/git-annex/globalprep/_raw_data/wdpa_mpa/d2022/WDPA_Jun2022_Public_shp/shps/WDPA_Jun2022_shp_ordered_0')
+ shp_reorder_21_0 <- file.path('/home/shares/ohi/git-annex/globalprep/_raw_data/wdpa_mpa/d2022/WDPA_Jun2022_Public_shp/shps/WDPA_Jun2022_shp_ordered_0')
+ shp_reorder_21_1 <- file.path('/home/shares/ohi/git-annex/globalprep/_raw_data/wdpa_mpa/d2022/WDPA_Jun2022_Public_shp/shps/WDPA_Jun2022_shp_ordered_0')
+ shp_reorder_21_2
+# read in previous data
+<- st_read(dsn = dirname(shp_reorder_21_0),
+ wdpa_poly_21_0 layer = basename(shp_reorder_21_0),
+ stringsAsFactors = FALSE)
+ <- st_read(dsn = dirname(shp_reorder_21_1),
+ wdpa_poly_21_1 layer = basename(shp_reorder_21_1),
+ stringsAsFactors = FALSE)
+ <- st_read(dsn = dirname(shp_reorder_21_2),
+ wdpa_poly_21_2 layer = basename(shp_reorder_21_2),
+ stringsAsFactors = FALSE)
+
+# bind all into one
+<- rbind(wdpa_poly_21_0, wdpa_poly_21_1, wdpa_poly_21_2)
+ wdpa_poly_all_previous
+
+# file paths for current data as SF
+<- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_ordered_0')
+ shp_reorder_0 <- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_ordered_1')
+ shp_reorder_1 <- file.path(dir_data, 'shps', 'WDPA_Jun2023_shp_ordered_2')
+ shp_reorder_2
+# read in current data
+<- st_read(dsn = dirname(shp_reorder_0),
+ wdpa_poly_0 layer = basename(shp_reorder_0),
+ stringsAsFactors = FALSE)
+ <- st_read(dsn = dirname(shp_reorder_1),
+ wdpa_poly_1 layer = basename(shp_reorder_1),
+ stringsAsFactors = FALSE)
+ <- st_read(dsn = dirname(shp_reorder_2),
+ wdpa_poly_2 layer = basename(shp_reorder_2),
+ stringsAsFactors = FALSE)
+
+# bind current data together
+<- rbind(wdpa_poly_0, wdpa_poly_1, wdpa_poly_2)
+ wdpa_poly_all_current
+# check colnames
+colnames(wdpa_poly_all_previous) == colnames(wdpa_poly_all_current)
+
+# check names
+unique(wdpa_poly_all_current$orig_name)
+unique(wdpa_poly_all_previous$orig_name)
+
+
+# test area to see if things look right
+<- wdpa_poly_all_current %>%
+ test_area_current filter(parent_iso == "GBR") %>%
+ filter(iso3 == "PCN")
+
+<- wdpa_poly_all_previous %>%
+ test_area_previous filter(parent_iso == "GBR") %>%
+ filter(iso3 == "PCN")
+
+
+mapview(test_area_current)
+mapview(test_area_previous)
+
+st_area(test_area_current)
+# 2021 836075862002 m2
+# 2022 841909966909 m2
+
OHI Science | Citation policy
+
From Halpern et al. 2012 supplemental info:
+++The ‘Lasting Special Places’ sub-goal focuses instead on those +geographic locations that hold particular value for aesthetic, +spiritual, cultural, recreational or existence reasons. This sub-goal is +particularly hard to quantify. Ideally one would survey every community +around the world to determine the top list of special places, and then +assess how those locations are faring relative to a desired state (e.g., +protected or well managed). The reality is that such lists do not exist. +Instead, we assume areas that are protected represent these special +places (i.e. the effort to protect them suggests they are important +places).
+
++Clearly this is an imperfect assumption but in many cases it will be +true. Using lists of protected areas as the catalogue of special places +then creates the problem of determining a reference condition. We do not +know how many special places have yet to be protected, and so we end up +having all identified special places also being protected. To solve this +problem we make two important assumptions. First, we assume that all +countries have roughly the same percentage of their coastal waters and +coastline that qualify as lasting special places. In other words, they +all have the same reference target (as a percentage of the total area). +Second, we assume that the target reference level is 30% of area +protected.
+
The model for this goal considers the inland coastal zone (up to 1 km +inland) independently from, and equally weighted with, the offshore +coastal zone (up to 3 nm offshore). The status for this goal is +calculated as:
+\[X_{LSP} = +\frac{\left(\frac{Area_{P}}{Area_{P_{ref}}} + +\frac{Area_{MPA}}{Area_{MPA_{ref}}}\right)}{2}\]
+where:
+v2023 Using updated June 2023 data Switched raster
+functions over to their terra
equivalents.
v2021 Using updated February 2021 data
+Reference: IUCN and UNEP-WCMC (2023), The World +Database on Protected Areas (WDPA) [On-line], June 2023. Cambridge, UK: +UNEP-WCMC. Available at: www.protectedplanet.net.
+Downloaded: June 12, 2023
+Description: Shapefile of World Database on +Protected Areas
+Time range: 1800 - 2022; some protected areas do not +have an associated “status year” and are reported as year 0.
+Format: Shapefile
+File location:
+Mazu:git-annex/globalprep/_raw_data/wdpa_mpa/d2023/WDPA_Jun2023_Public_shp/
The WDPA-MPA dataset comes as a shapefile or geodatabase in WGS84 +coordinate reference system.
+Once the polygons have been prepped, we rasterize the results to 500 +m resolution.
+This process is all done in the script:
+1_prep_wdpa_rast.Rmd
. After that is complete, move on to
+computing zonal statistics.
Comparing the global WDPA raster to the 3 nautical miles offshore and +1 km inland rasters, we can tally the protected area within each region +and compare to the total area within each region. Note each cell is 500 +m x 500 m, so area is .25 km2, but since we are simply +calculating a ratio, this cancels out.
+# list intermedite file paths
+zonal_files <- c('zonal_3nm' = file.path(dir_goal, 'int', 'zonal_stats_3nm.csv'),
+ 'zonal_1km' = file.path(dir_goal, 'int', 'zonal_stats_1km.csv'),
+ 'zonal_eez' = file.path(dir_goal, 'int', 'zonal_stats_eez.csv'))
+
+# load raster created in 1_prep_wdpa_rast.rmd
+rast_wdpa <- terra::rast(file.path(dir_goal_anx, 'rast', 'wdpa_2023_moll_500m.tif'))
+
+# point to 500 m rasters for 3 nautical mile coastal regions, and 1 km inland coastal regions.
+dir_zones <- file.path(dir_anx, 'spatial/d2014/data/rgn_mol_raster_500m')
+
+# list filepaths to raster files for LSP areas
+rgn_rast_list <- c(
+ 'zonal_3nm' = file.path(dir_zones, 'rgn_offshore3nm_mol_500mcell.tif'),
+ 'zonal_1km' = file.path(dir_zones, 'rgn_inland1km_mol_500mcell.tif'),
+ 'zonal_eez' = file.path(dir_zones, 'rgn_eez_mol_500mcell.tif'))
+
+### Remove all files in `int` if it's the first time working through this data prep for this assessment
+### Filters out finished zonal files: if zonal files don't exist yet, they will be created (comment out to recalculate)
+zonal_files_to_run <- zonal_files[!file.exists(zonal_files)]
+rgn_rast_list <- rgn_rast_list[!file.exists(zonal_files)]
+
+
+ ### NOTE: The crosstab function returns this warning - does it affect the
+ ### outcomes, or does the function coerce the correct outcome?
+ # Warning message:
+ # In FUN(X[[i]], ...) : integer overflow - use sum(as.numeric(.))
+ ### zonal() wouldn't work since we want to track the frequency of each
+ ### year value within each rgn_id value.
+
+ lsp_crosstab <- function(rgn_rast_file, rast_values) {
+ rgn_rast <- terra::rast(rgn_rast_file)
+ message('Cross tabulating ', rgn_rast_file)
+ rast_df <- terra::crosstab(c(rast_values, rgn_rast), useNA = TRUE) %>%
+ as.data.frame() %>%
+ setNames(c('year', 'rgn_id', 'n_cells')) %>%
+ mutate(year = as.integer(as.character(year)),
+ rgn_id = as.integer(as.character(rgn_id))) %>%
+ arrange(rgn_id, year)
+
+ return(rast_df)
+ }
+
+# Processing & saving zonal statistics for a single raster
+
+# time stamp
+ptm <- proc.time()
+
+# run the function over the wdpa raster using each of the LSP zones
+x <- lsp_crosstab(rgn_rast_list[1], rast_values = rast_wdpa) #~25 minutes to run #3nm
+print('writeRaster elapsed: ', (proc.time() - ptm)[3])
+
+y <- lsp_crosstab(rgn_rast_list[2], rast_values = rast_wdpa) #19 minutes to run #1km
+print('writeRaster elapsed: ', (proc.time() - ptm)[3])
+
+z <- lsp_crosstab(rgn_rast_list[3], rast_values = rast_wdpa) #~50 min minutes to run #eez
+print('writeRaster elapsed: ', (proc.time() - ptm)[3])
+
+## Save these files to the int folder
+write_csv(x, zonal_files_to_run[1])
+write_csv(y, zonal_files_to_run[2])
+write_csv(z, zonal_files_to_run[3])
Once the WDPA raster is cross-tabulated against the OHI region +rasters (both 3 nm offshore and 1 km inland) we have the number of +protected cells, identified by year of protection, within each region. +NA values are unprotected cells.
+Grouping by rgn_id, the total number of cells per region is +determined by summing cell counts across ALL years, including cells with +year == NA (unprotected cells). We can then determine the protected area +for each year by looking at the cumulative sum of cells up to any given +year.
+Since the cells are 500 m on a side, we can easily calculate area by +multiplying cell count * 0.25 km2 per cell.
+Finally we can calculate the status of a region for any given year by +finding the ratio of protected:total and normalizing by the goal’s +target of 30% protected area.
+# read in LSP zone csvs created abvove
+stats_3nm <- read_csv(zonal_files['zonal_3nm'])
+stats_1km <- read_csv(zonal_files['zonal_1km'])
+stats_eez <- read_csv(zonal_files['zonal_eez'])
+
+# assign OHI core reagions to object
+rgn_eez <- region_data()
+
+# the WDPA data us published annually, so these should all be the year before the OHI year
+max_year <- max(c(stats_1km$year, stats_3nm$year, stats_eez$year), na.rm = TRUE)
+
+### Determine total cells per region (n_cells_tot) and then a cumulative
+### total of cells per region
+
+# OHI Core function
+region_data() #use this function to call rgns_eez, which is called below in the function "calc_areas()"
+
+# create function to calculate values based on OHI regions
+calc_areas <- function(stats_df) {
+ area_df <- stats_df %>%
+ group_by(rgn_id) %>%
+ mutate(n_cells_tot = sum(n_cells),
+ a_tot_km2 = n_cells_tot / 4) %>%
+ filter(!is.na(year) & !is.na(rgn_id)) %>%
+ mutate(n_cells_cum = cumsum(n_cells),
+ a_prot_km2 = n_cells_cum / 4) %>%
+ complete(year = 2000:max_year) %>%
+ ungroup() %>%
+ fill(-year, .direction = 'down') %>%
+ dplyr::select(-contains('cell')) %>%
+ distinct() %>%
+ left_join(rgns_eez, by = 'rgn_id') %>%
+ dplyr::select(rgn_id:rgn_name)
+
+ return(area_df)
+}
+
+# execute functions on stats data frames
+prot_1km <- stats_1km %>% calc_areas()
+prot_3nm <- stats_3nm %>% calc_areas()
+prot_eez <- stats_eez %>% calc_areas()
+
+# write results to int folder as csv
+write_csv(prot_3nm, file.path(dir_goal, 'int', 'area_protected_3nm.csv'))
+write_csv(prot_1km, file.path(dir_goal, 'int', 'area_protected_1km.csv'))
+write_csv(prot_eez, file.path(dir_goal, 'int', 'area_protected_eez.csv'))
From the protected area files, write out the individual layers ready +for the Toolbox[TM].
+# read in files and rename
+prot_3nm <- read_csv(file.path(dir_goal, 'int', 'area_protected_3nm.csv')) %>%
+ rename(area = a_tot_km2,
+ a_prot_3nm = a_prot_km2)
+prot_1km <- read_csv(file.path(dir_goal, 'int', 'area_protected_1km.csv')) %>%
+ rename(area = a_tot_km2,
+ a_prot_1km = a_prot_km2)
+
+# create function to create LSP layer
+write_lsp_layer <- function(df, layers, layername) {
+ df1 <- df[ , c('rgn_id', layers)] %>%
+ filter(rgn_id <= 250) %>%
+ distinct()
+ write_csv(df1, file.path(dir_goal, 'output', paste0(layername, '.csv')))
+}
+
+# write LSP output files
+a_tot_3nm <- write_lsp_layer(prot_3nm, 'area', 'rgn_area_offshore3nm')
+a_tot_1km <- write_lsp_layer(prot_1km, 'area', 'rgn_area_inland1km')
+
+a_prot_3nm <- write_lsp_layer(prot_3nm, c('year', 'a_prot_3nm'), 'lsp_prot_area_offshore3nm')
+a_prot_1km <- write_lsp_layer(prot_1km, c('year', 'a_prot_1km'), 'lsp_prot_area_inland1km')
Some goals require calculation of resilience nearshore (3nm) or +entire EEZ.
+area_ref = .30 ### 30% of area protected = reference point
+
+# read in regional data csvs from int
+resil_3nm <- read_csv(file.path(dir_goal, 'int', 'area_protected_3nm.csv')) %>%
+ mutate(resilience.score = (a_prot_km2 / a_tot_km2) / area_ref,
+ resilience.score = ifelse(resilience.score > 1, 1, resilience.score))
+
+resil_eez <- read_csv(file.path(dir_goal, 'int', 'area_protected_eez.csv')) %>%
+ mutate(resilience.score = (a_prot_km2 / a_tot_km2) / area_ref,
+ resilience.score = ifelse(resilience.score > 1, 1, resilience.score))
+
+## Save resilience scores for 3 nm and EEZ data in output
+ tmp_3nm <- resil_3nm %>%
+ dplyr::select(rgn_id, year, resilience.score)
+ write_csv(tmp_3nm, file.path(dir_goal, 'output', "mpa_3nm_resilience.csv"))
+
+ tmp_eez <- resil_eez %>%
+ dplyr::select(rgn_id, year, resilience.score)
+ write_csv(tmp_eez, file.path(dir_goal, 'output', "mpa_eez_resilience.csv"))
There was no gapfilling for these data. Created gapfill files with +values of 0.
+library(dplyr)
+
+res_eez <- read.csv("output/mpa_eez_resilience.csv")%>%
+ mutate(resilience.score = 0) %>%
+ rename(gapfilled = resilience.score)
+
+write.csv(res_eez, "output/mpa_eez_resilience_gf.csv", row.names=FALSE)
+
+res_3nm <- read.csv("output/mpa_3nm_resilience.csv")%>%
+ mutate(resilience.score = 0) %>%
+ rename(gapfilled = resilience.score)
+
+write.csv(res_3nm, "output/mpa_3nm_resilience_gf.csv", row.names=FALSE)
+
+inland <- read.csv("output/lsp_prot_area_inland1km.csv") %>%
+ mutate(a_prot_1km = 0) %>%
+ rename(gapfilled = a_prot_1km)
+
+write.csv(inland, "output/lsp_prot_area_inland1km_gf.csv", row.names=FALSE)
+
+offshore <- read.csv("output/lsp_prot_area_offshore3nm.csv") %>%
+ mutate(a_prot_3nm = 0)%>%
+ rename(gapfilled = a_prot_3nm)
+
+write.csv(offshore, "output/lsp_prot_area_offshore3nm_gf.csv", row.names=FALSE)
Plot scores for 2020 vs 2019 assessment years
+library(ggplot2)
+#library(plotly)
+
+## Calculates this year and last year's coastal marine protected area ratio (CMPA/Ref-CMPA) for plotting
+status_3nm_new <- read_csv(file.path(dir_goal, 'output', 'lsp_prot_area_offshore3nm.csv')) %>%
+ full_join(read_csv(file.path(dir_goal, 'output', 'rgn_area_offshore3nm.csv')),
+ by = 'rgn_id') %>%
+ mutate(pct_prot_3nm_new = a_prot_3nm / area,
+ status_3nm_new = pct_prot_3nm_new / 0.3,
+ status_3nm_new = ifelse(status_3nm_new > 1, 1, status_3nm_new)) %>%
+ filter(year == max(year)) %>%
+ dplyr::select(rgn_id, pct_prot_3nm_new, status_3nm_new)
+
+status_3nm_old <- read_csv(file.path(dir_goal, '../v2022/output', 'lsp_prot_area_offshore3nm.csv')) %>%
+ full_join(read_csv(file.path(dir_goal, 'output', 'rgn_area_offshore3nm.csv')),
+ by = 'rgn_id') %>%
+ mutate(pct_prot_3nm_old = a_prot_3nm / area,
+ status_3nm_old = pct_prot_3nm_old / 0.3,
+ status_3nm_old = ifelse(status_3nm_old > 1, 1, status_3nm_old)) %>%
+ filter(year == max(year)) %>%
+ dplyr::select(rgn_id, pct_prot_3nm_old, status_3nm_old)
+
+## Calculates this year and last year's coastline protected ratio (CP/Ref-CP) for plotting
+status_1km_new <- read_csv(file.path(dir_goal, 'output', 'lsp_prot_area_inland1km.csv')) %>%
+ full_join(read_csv(file.path(dir_goal, 'output', 'rgn_area_inland1km.csv')),
+ by = 'rgn_id') %>%
+ mutate(pct_prot_1km_new = a_prot_1km / area,
+ status_1km_new = pct_prot_1km_new / 0.3,
+ status_1km_new = ifelse(status_1km_new > 1, 1, status_1km_new)) %>%
+ filter(year == max(year)) %>%
+ dplyr::select(rgn_id, pct_prot_1km_new, status_1km_new)
+
+status_1km_old <- read_csv(file.path(dir_goal, '../v2022/output', 'lsp_prot_area_inland1km.csv')) %>%
+ full_join(read_csv(file.path(dir_goal, 'output', 'rgn_area_inland1km.csv')),
+ by = 'rgn_id') %>%
+ mutate(pct_prot_1km_old = a_prot_1km / area,
+ status_1km_old = pct_prot_1km_old / 0.3,
+ status_1km_old = ifelse(status_1km_old > 1, 1, status_1km_old)) %>%
+ filter(year == max(year)) %>%
+ dplyr::select(rgn_id, pct_prot_1km_old, status_1km_old)
+
+# Updated sequence to use pivot_longer() instead of gather()
+lsp_new_old <- status_3nm_new %>%
+ full_join(status_3nm_old, by = c('rgn_id')) %>%
+ full_join(status_1km_new, by = c('rgn_id')) %>%
+ full_join(status_1km_old, by = c('rgn_id')) %>%
+ mutate(status_old = (status_3nm_old + status_1km_old) / 2,
+ status_new = (status_3nm_new + status_1km_new) / 2) %>%
+ pivot_longer(cols = contains('new'), names_to = 'rgn', values_to = 'score_new') %>%
+ pivot_longer(cols = contains('old'), names_to = 'rgn_old', values_to = 'score_old') %>%
+ mutate(rgn = str_replace(rgn, '_new', ''),
+ rgn_old = str_replace(rgn_old, '_old', ''),
+ score_new = round(score_new, 3),
+ score_old = round(score_old, 3)) %>%
+ filter(rgn_id <= 250) %>%
+ filter(rgn == rgn_old) %>%
+ dplyr::select(-rgn_old) %>%
+ left_join(rgns_eez, by = 'rgn_id') %>%
+ dplyr::select(rgn_id:rgn_name)
+
+# plot score change results
+lsp_status_plot <- ggplot(lsp_new_old,
+ aes(x = score_old, y = score_new, key = rgn_name)) +
+ geom_point(alpha = .6) +
+ theme(legend.position = 'none') +
+ geom_abline(slope = 1, intercept = 0, color = 'red') +
+ labs(x = 'LSP status v2022 (data through Jan 2022)',
+ y = 'LSP status v2023 (data through Jun 2023)',
+ title = 'Comparing LSP status: 2023 vs 2022') +
+ facet_wrap( ~ rgn)
+
+lsp_status_plot #got rid of ggplotly
+
+# save plot to figs folder
+ggsave(file.path(dir_goal, 'Figs/plot_v2022_v2023.png'),
+ plot = lsp_status_plot, height = 4.5, width = 6)
+
+# create data frame with scores that changed by more than 0.05
+mjr_score_change <- lsp_new_old %>%
+ mutate(diff = score_new - score_old) %>%
+ filter(rgn == 'status' & abs(diff) > 0.05) %>%
+ mutate(abs_diff = abs(diff))
+
+# save major score changes to output folder as csv
+write.csv(mjr_score_change, "output/major_changes_2023.csv")
+
OHI Science | Citation policy
+
This analysis converts FAO capture production data into the OHI 2022 +targeted harvest pressure data.
+2023 - One more year of data, 2021 Small fixes to how N data is +handled in fao_fxn.R Delete unused/not relevant code
+http://www.fao.org/fishery/statistics/software/fishstatj/en#downlApp
+ Release date: March 2021
+FAO Global Capture Production Quantity 1950_2019
+Information: http://www.fao.org/fishery/statistics/global-capture-production/en
Reference: United Nations, 2021. FAO Fisheries & +Aquaculture - Fishery Statistical Collections - Global Capture +Production [WWW Document]. URL http://www.fao.org/fishery/statistics/global-capture-production/en +(accessed 4.29.21).
+Downloaded: April 25, 2022
+Description: Quantity (tonnes) of fisheries capture +for each county, species, year.
+Time range: 1950-2020
+# load libraries, set directories
+library(ohicore) #devtools::install_github('ohi-science/ohicore@dev')
+library(tidyverse)
+library(plotly)
+library(here)
+library(janitor)
+
+### Load FAO-specific user-defined functions
+source(here('workflow/R/fao_fxn.R')) # function for cleaning FAO files (not combined into common.R like most other functions have been at this point)
+source(here('workflow/R/common.R')) # directory locations
+
+version_year <- 2023
+latest_data_yr <- version_year - 2
This includes the FAO capture production data and a list of the +“target” species.
+## FAO capture production data - all columns being parsed as characters and producing error in one column, but not sure which? (read.csv might help avoid this error?)
+# The last row is not relevant to the data and is removed which is what the warning is referring to
+fis_fao_raw <- read_csv(
+ file.path(dir_M, 'git-annex/globalprep/_raw_data/FAO_capture',
+ paste0('d', version_year),
+ paste0('Global_capture_production_Quantity_1950-', latest_data_yr,'.csv'))
+ # , na = "..."
+)
+
+# List of species included as cetaceans or marine turtles (this probably won't change at all)
+sp2grp <- read_csv(here('globalprep', 'prs_targetedharvest', paste0('v', version_year), 'raw', 'species2group.csv')) %>%
+ dplyr::filter(incl_excl == 'include') %>%
+ dplyr::select(target, species); head(sp2grp)
# Rename columns and remove unit column
+fao_clean <- fis_fao_raw %>%
+ dplyr::rename(country = "Country (Name)",
+ species = "ASFIS species (Name)",
+ area = "FAO major fishing area (Name)") %>%
+ dplyr::select(-c("Unit (Name)", "Unit")) %>% # added removing Unit also
+ dplyr::rename_with(~ base::gsub("\\[", "", .)) %>%
+ dplyr::rename_with(~ base::gsub("\\]", "", .))
+
+
+# Pivot_longer by year and value to expand and make each line a single observation for country, species and year (tidy data!)
+fao_clean <- fao_clean %>%
+ tidyr::pivot_longer(cols = -(1:3), names_to = 'year', values_to = 'value', values_drop_na = T) %>%
+ fao_clean_data_new()
+
+fao_clean <- fao_clean %>%
+ dplyr::mutate(
+ species = as.character(species),
+ species = stringr::str_replace_all(
+ string = species,
+ pattern = "Henslow.*s swimming crab",
+ replacement = "Henslow's swimming crab")
+ )
This analysis only includes target species. The warning messages need +to be checked and, if necessary, changes should be made to the +raw/species2group.csv
+# check for discrepancies in species list
+spgroups <- sort(as.character(unique(fao_clean$species))) # species groups in FAO data
+groups <- c('turtle', 'whale', 'dolphin', 'porpoise') # seals and sea lions removed from vector (pinnipeds no longer included)
+
+# Going through FAO data species and seeing if they're in our master list of species
+## Looking to see if we need to add species that have changed name
+for (group in groups) {# group='dolphin'
+ possibles <- spgroups[grep(group, spgroups)]
+ d_missing_l <- setdiff(possibles, sp2grp$species)
+ if (length(d_missing_l) > 0){
+ cat(sprintf("\nMISSING in the lookup the following species in target='%s'.\n %s\n",
+ group, paste(d_missing_l, collapse='\n ')))
+ }
+}
+
+# check for species in lookup not found in data
+l_missing_d <- setdiff(sp2grp$species, spgroups)
+if (length(l_missing_d) > 0){
+ cat(sprintf('\nMISSING: These species in the lookup are not found in the FAO data \n'))
+ print(l_missing_d)
+}
+
+#### v2023
+# MISSING in the lookup the following species in target='turtle'.
+# Chinese softshell turtle - not a marine turtle
+# Eastern Pacific green turtle - added this to sp2grp to include
+# River and lake turtles nei - not a marine turtle
+#
+# MISSING in the lookup the following species in target='whale'.
+# Creek whaler - shark, not a whale
+# Velvet whalefish - fish, not a whale
+#
+# MISSING in the lookup the following species in target='dolphin'.
+# Common dolphinfish - fish not a cetacean
+# Pompano dolphinfish - fish not a cetacean
+
+## filter data to include only target species ----
+target_spp <- fao_clean %>%
+ dplyr::filter(species %in% sp2grp$species) # this goes from 2384 spp in FAO list to just 72
+
+unique(target_spp$area) # confirm these are all marine regions
+unique(fao_clean$species) # 2384 species
# pivot wider to expand years
+wide <- target_spp %>%
+ tidyr::pivot_wider(names_from = year, values_from = value) %>%
+ dplyr::left_join(sp2grp, by='species'); head(wide)
+
+# pivot longer long by target
+long <- wide %>%
+ dplyr::select(-area) %>%
+ tidyr::pivot_longer(cols = c(-country, -species, -target),
+ names_to = 'year',
+ values_to = 'value',
+ values_drop_na = T) %>%
+ dplyr::mutate(year = as.integer(as.character(year))) %>%
+ dplyr::arrange(country, target, year); head(long)
+
+
+# explore Japan[210] as an example
+japan <- long %>%
+ dplyr::group_by(country, target, year) %>%
+ dplyr::summarize(value = sum(value)) %>%
+ dplyr::filter(country == 'Japan', target == 'cetacean', year >= 2000)
+
+# summarize totals per region per year - number of individual animals from each spp group?
+sum <- long %>%
+ dplyr::group_by(country, year) %>%
+ dplyr::summarize(value = sum(value, na.rm=TRUE)) %>%
+ dplyr::filter(value != 0) %>%
+ dplyr::ungroup(); head(sum)
sum <- sum %>%
+ ohicore::split_regions() %>%
+ dplyr::mutate(country = as.character(country)) %>%
+ dplyr::mutate(country = ifelse(stringr::str_detect(country, "C.*te d'Ivoire"), "Ivory Coast", country))
+
+### Function to convert to OHI region ID
+m_sum_rgn <- name_2_rgn(df_in = sum,
+ fld_name='country',
+ flds_unique=c('year'))
+
+# Check out duplicates based on error message from previous step
+dplyr::filter(m_sum_rgn, country %in% c("Guadeloupe", "Martinique"))
+# this is ok, we report these two together, so this will be fixed with the summarize in the next step
+
+# They will be summed:
+m_sum_rgn <- m_sum_rgn %>%
+ dplyr::group_by(rgn_id, rgn_name, year) %>%
+ dplyr::summarize(value = sum(value)) %>%
+ dplyr::ungroup()
Data is rescaled by dividing by the 95th quantile of values across +all regions from 2011 to 2020 (most recent year of FAO data).
+target_harvest <- m_sum_rgn %>%
+ dplyr::mutate(
+ quant_95 = quantile(value[year %in% 2011:latest_data_yr], 0.95, na.rm = TRUE),
+ score = value / quant_95,
+ score = ifelse(score > 1, 1, score)) %>%
+ dplyr::select(rgn_id, year, pressure_score = score) %>%
+ dplyr::arrange(rgn_id, year); head(target_harvest); summary(target_harvest)
+
+# v2021 quant_95 = 3409.4
+# v2022 quant_95 = 3450.05
+# v2023 quant_95 = 3572
+
+# any regions that did not have a catch should have score = 0
+rgns <- rgn_master %>%
+ dplyr::filter(rgn_typ == "eez") %>%
+ dplyr::select(rgn_id = rgn_id_2013) %>%
+ dplyr::filter(rgn_id < 255) %>%
+ base::unique() %>%
+ dplyr::arrange(rgn_id)
+
+# Add year; for v2023, min year is 1950, and max year is 2021
+rgns <- expand.grid(rgn_id = rgns$rgn_id, year = min(target_harvest$year):max(target_harvest$year))
+
+# Change NAs in pressure_score column to 0s
+target_harvest <- rgns %>%
+ dplyr::left_join(target_harvest) %>%
+ dplyr::mutate(pressure_score = ifelse(is.na(pressure_score), 0, pressure_score)) %>%
+ dplyr::arrange(rgn_id); head(target_harvest); summary(target_harvest)
+
+# Write target_harvest to "fao_targeted.csv" in output folder
+write_csv(
+ target_harvest,
+ here('globalprep', 'prs_targetedharvest', paste0('v', version_year), 'output', 'fao_targeted.csv')
+)
+
+# Create gapfill dataframe
+target_harvest_gf <- target_harvest %>%
+ dplyr::mutate(gapfill = 0) %>%
+ dplyr::select(rgn_id, year, gapfill)
+# all zeroes for gapfill column; nothing being gapfilled but need to have a record
+
+# Write target_harvest_gf to "fao_targeted_gf.csv" in output folder
+write_csv(
+ target_harvest_gf,
+ here('globalprep', 'prs_targetedharvest', paste0('v', version_year), 'output', 'fao_targeted_gf.csv')
+)
The data from last year and this year should be the same unless there +were changes to underlying FAO data or the master species list.
+In this case, all of the regions looked very similar.
+# pull just 2020 data from target_harvest df, since its the most recent year in common
+common_year <- 2020
+
+new <- readr::read_csv(here("globalprep", "prs_targetedharvest", paste0('v', version_year), "output", "fao_targeted.csv")) %>%
+ dplyr::filter(year == common_year)
+
+old <- readr::read_csv(here("globalprep", "prs_targetedharvest", paste0('v', version_year - 1), "output", "fao_targeted.csv")) %>%
+ dplyr::filter(year == common_year) %>%
+ dplyr::select(rgn_id, year, pressure_score_old = pressure_score) %>%
+ dplyr::left_join(new, by = c("rgn_id", "year"))
+
+# Compare pressure_score between last year and this year's assessments
+compare_plot <- ggplot(data = old, aes(x = pressure_score_old, y = pressure_score, label = rgn_id)) +
+ geom_point() +
+ geom_abline(color = "red")
+
+plot(compare_plot)
+ggplotly(compare_plot)
+
+### v2022: Explore outliers
+
+### explore United States [163]
+# out_country <- "United States of America"
+### explore Indonesia [216]
+out_country <- "Indonesia"
+
+outlier_country <- long %>%
+ dplyr::group_by(country, target, year) %>%
+ dplyr::summarize(value = sum(value)) %>%
+ dplyr::filter(country == out_country, year >= 2000)
+
+
+region_data() ## read in regions
+
+
+new <- readr::read_csv(here("globalprep", "prs_targetedharvest", paste0('v', version_year), "output", "fao_targeted.csv")) %>%
+ dplyr::filter(year == 2021) %>%
+ dplyr::select(-year)
+
+old <- readr::read_csv(here("globalprep", "prs_targetedharvest", paste0('v', version_year - 1), "output", "fao_targeted.csv")) %>%
+ dplyr::filter(year == common_year) %>%
+ dplyr::select(-year) %>%
+ dplyr::select(rgn_id, pressure_score_2020 = pressure_score) %>%
+ dplyr::left_join(new, by = c("rgn_id")) %>%
+ dplyr::rename(pressure_score_2021 = pressure_score) %>%
+ left_join(rgns_eez)
+
+
+# Compare pressure_score between last year and this year's assessments
+compare_plot <- ggplot(data = old, aes(x = pressure_score_2020, y = pressure_score_2021, label = rgn_name)) +
+ geom_point() +
+ geom_abline(color = "red")
+
+plot(compare_plot)
+ggplotly(compare_plot)