-
Notifications
You must be signed in to change notification settings - Fork 3
/
extract-lithology.R
237 lines (202 loc) · 9.49 KB
/
extract-lithology.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
## ------------------------------------------------------- ##
# Silica WG - Extract Spatial Data - Lithology
## ------------------------------------------------------- ##
# Written by:
## Nick J Lyon
# Purpose:
## Using the watershed shapefiles created in "wrangle-watersheds.R"
## Extract the following data: LITHOLOGY
## ------------------------------------------------------- ##
# Housekeeping ----
## ------------------------------------------------------- ##
# Load needed libraries
# install.packages("librarian")
librarian::shelf(tidyverse, sf, stars, terra, exactextractr, NCEAS/scicomptools,
googledrive, readxl)
# Clear environment
rm(list = ls())
# Silence `summarize`
options(dplyr.summarise.inform = F)
# Identify path to location of shared data
(path <- scicomptools::wd_loc(local = F, remote_path = file.path('/', "home", "shares", "lter-si", "si-watershed-extract")))
# Load in site names with lat/longs
sites <- readxl::read_excel(path = file.path(path, "site-coordinates",
"silica-coords_RAW.xlsx")) %>%
## Pare down to minimum needed columns
dplyr::select(LTER, Stream_Name, Discharge_File_Name, Shapefile_Name) %>%
## Drop duplicate rows (if any)
dplyr::distinct()
# Remove any watersheds without a shapefile
# dplyr::filter(!is.na(Shapefile_Name) &
# nchar(Shapefile_Name) != 0 &
# !Shapefile_Name %in% c("?", "MISSING"))
# Check it out
dplyr::glimpse(sites)
# Grab the shapefiles the previous script (see PURPOSE section) created
sheds <- sf::st_read(dsn = file.path(path, "site-coordinates", "silica-watersheds.shp")) %>%
# Expand names to what they were before
dplyr::rename(Shapefile_Name = shp_nm,
Stream_Name = Strm_Nm,
expert_area_km2 = exp_area,
shape_area_km2 = real_area)
## combine sites and sheds to get ALL sheds (including hydrosheds) and their metadata (from the sites dataframe)
sheds <- sheds %>%
dplyr::left_join(y = sites, by = c("LTER", "Shapefile_Name"))
sheds$Stream_Name <- ifelse(!is.na(sheds$Stream_Name.x), sheds$Stream_Name.x, sheds$Stream_Name.y)
sheds$Discharge_File_Name <- ifelse(!is.na(sheds$Dsc_F_N), sheds$Dsc_F_N, sheds$Discharge_File_Name)
sheds <- sheds %>% select (-c(Stream_Name.x, Stream_Name.y, expert_area_km2, shape_area_km2, exp_are, hydrshd, real_ar,
Dsc_F_N))
# Check that out
dplyr::glimpse(sheds)
# Clean up environment
rm(list = setdiff(ls(), c('path', 'sites', 'sheds')))
## ------------------------------------------------------- ##
# Lithology - Extract ----
## ------------------------------------------------------- ##
# Pull in the raw lithology data
rocks_raw <- terra::rast(x = file.path(path, "raw-driver-data", "raw-lithology-data",
"glim_wgs84_0point5deg.txt.asc"))
# Check CRS
crs(rocks_raw)
## Looks good
# Experimental plotting
# plot(rocks_raw, reset = F, axes = T)
# plot(sheds, add = T, axes = T)
# Strip out rocks from our polygons
rocks_out <- exactextractr::exact_extract(x = rocks_raw, y = sheds,
include_cols = c("LTER", "Shapefile_Name")) %>%
# Above returns a list so switch it to a dataframe
purrr::map_dfr(dplyr::select, c(LTER, Shapefile_Name, value)) %>%
# Filter out NAs
dplyr::filter(!is.na(value)) %>%
# Count pixels within each river
dplyr::group_by(LTER, Shapefile_Name, value) %>%
dplyr::summarise(pixel_ct = dplyr::n()) %>%
dplyr::ungroup()
# Check that dataframe
dplyr::glimpse(rocks_out)
## ------------------------------------------------------- ##
# Lithology - Index Prep ----
## ------------------------------------------------------- ##
# Bring in the index tying rock code integers with rock abbreviations
rock_index_raw <- read.table(file = file.path(path, "raw-driver-data",
"raw-lithology-data", "Classnames.txt"),
header = T, sep = ';')
# Fix this index to make it more usable
rock_index <- rock_index_raw %>%
# Rename the most important columns
dplyr::rename(rock_code = OBJECTID,
rock_abbrev = xx) %>%
# And get a more descriptive version of each of the rock types
dplyr::mutate(
rock_type = dplyr::case_when(
# Abbreviations found here:
# https://www.clisap.de/fileadmin/B-Research/IA/IA5/LITHOMAP/
rock_abbrev == 'su' ~ 'unconsolidated_sediments',
rock_abbrev == 'ss' ~ 'siliciclastic_sedimentary_rocks',
rock_abbrev == 'sm' ~ 'mixed_sedimentary_rocks',
rock_abbrev == 'py' ~ 'pyroclastic',
rock_abbrev == 'sc' ~ 'carbonate_sedimentary_rocks',
rock_abbrev == 'ev' ~ 'evaporites',
rock_abbrev == 'mt' ~ 'metamorphic_rocks',
rock_abbrev == 'pa' ~ 'acid_plutonic_rocks',
rock_abbrev == 'pi' ~ 'intermediate_plutonic_rocks',
rock_abbrev == 'pb' ~ 'basic_plutonic_rocks',
rock_abbrev == 'va' ~ 'acid_volcanic_rocks',
rock_abbrev == 'vi' ~ 'intermediate_volcanic_rocks',
rock_abbrev == 'vb' ~ 'basic_volcanic_rocks',
rock_abbrev == 'ig' ~ 'ice_and_glaciers',
rock_abbrev == 'wb' ~ 'water_bodies',
rock_abbrev == 'nd' ~ 'no_data',
TRUE ~ as.character(rock_abbrev) ) ) %>%
# Remove unneeded columns
dplyr::select(value = rock_code, rock_type)
# Check that worked
dplyr::glimpse(rock_index)
## ------------------------------------------------------- ##
# Lithology - Summarize ----
## ------------------------------------------------------- ##
# Process the extracted lithology information into a dataframe
rock_v2 <- rocks_out %>%
# Bring over the rock names from the index
dplyr::left_join(y = rock_index, by = "value") %>%
# Remove the now-unneeded code column
dplyr::select(-value) %>%
# Simplify the imported categories
dplyr::mutate(
rock_type = dplyr::case_when(
rock_type == 'unconsolidated_sediments' ~ 'sedimentary',
rock_type == 'siliciclastic_sedimentary_rocks' ~ 'sedimentary',
rock_type == 'mixed_sedimentary_rocks' ~ 'sedimentary',
rock_type == 'pyroclastic' ~ 'volcanic',
rock_type == 'carbonate_sedimentary_rocks' ~ 'carbonate_evaporite',
rock_type == 'evaporites' ~ 'carbonate_evaporite',
rock_type == 'metamorphic_rocks' ~ 'metamorphic',
rock_type == 'acid_plutonic_rocks' ~ 'plutonic',
rock_type == 'intermediate_plutonic_rocks' ~ 'plutonic',
rock_type == 'basic_plutonic_rocks' ~ 'plutonic',
rock_type == 'acid_volcanic_rocks' ~ 'volcanic',
rock_type == 'intermediate_volcanic_rocks' ~ 'volcanic',
rock_type == 'basic_volcanic_rocks' ~ 'volcanic',
T ~ as.character(rock_type) ) ) %>%
# Remove unwanted data values
dplyr::filter(!rock_type %in% c("no_data", "water_bodies", "ice_and_glaciers")) %>%
# Bin rock categories and re-summarise within consolidated categories
dplyr::group_by(LTER, Shapefile_Name, rock_type) %>%
dplyr::summarize(pixel_ct_v2 = sum(pixel_ct, na.rm = T)) %>%
dplyr::ungroup() %>%
# Now count total pixels per watershed
dplyr::group_by(LTER, Shapefile_Name) %>%
dplyr::mutate(total_shed_pixels = sum(pixel_ct_v2, na.rm = T)) %>%
dplyr::ungroup() %>%
# And calculate the percent of total for each row
dplyr::mutate(perc_total = ((pixel_ct_v2 / total_shed_pixels) * 100)) %>%
# Remove the two pixel count columns
dplyr::select(-pixel_ct_v2, -total_shed_pixels)
# Get wide format version where categories are columns
rock_wide <- rock_v2 %>%
# Add "rocks" to each category
dplyr::mutate(rock_type = paste0("rocks_", rock_type)) %>%
# Now pivot
tidyr::pivot_wider(names_from = rock_type,
values_from = perc_total)
# Now identify "major" (i.e., dominant) rocks
rock_major <- rock_v2 %>%
# Filter to only max of each rock type per uniqueID & LTER
dplyr::group_by(LTER, Shapefile_Name) %>%
dplyr::filter(perc_total == max(perc_total)) %>%
dplyr::ungroup() %>%
# Remove the percent total
dplyr::select(-perc_total) %>%
# Pivot back to wide format
tidyr::pivot_wider(names_from = rock_type,
values_from = rock_type) %>%
# Paste all the non-NAs (i.e., the dominant rocks) into a single column
tidyr::unite(col = major_rock, -LTER, -Shapefile_Name, na.rm = T, sep = "; ")
# Now attach the major rocks to the wide format one
rock_actual <- rock_wide %>%
dplyr::left_join(y = rock_major, by = c("LTER", "Shapefile_Name")) %>%
dplyr::relocate(major_rock, .after = Shapefile_Name)
# Examine
dplyr::glimpse(rock_actual)
## ------------------------------------------------------- ##
# Lithology - Export ----
## ------------------------------------------------------- ##
# Let's get ready to export
rock_export <- sheds %>%
# Join the rock data
dplyr::left_join(y = rock_actual, by = c("LTER", "Shapefile_Name"))%>%
# this drops the geometry column, which causes issues on export
sf::st_drop_geometry()
# Check it out
dplyr::glimpse(rock_export)
# Create folder to export to
dir.create(path = file.path(path, "extracted-data"), showWarnings = F)
# Export the summarized lithology data
write.csv(x = rock_export, na = '', row.names = F,
file = file.path(path, "extracted-data", "si-extract_lithology_2.csv"))
# Upload to GoogleDrive
googledrive::drive_upload(media = file.path(path, "extracted-data", "si-extract_lithology_2.csv"),
overwrite = T,
path = googledrive::as_id("https://drive.google.com/drive/u/0/folders/1FBq2-FW6JikgIuGVMX5eyFRB6Axe2Hld"))
# End ----