-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path06_land-cover-classification.Rmd
484 lines (404 loc) · 18.8 KB
/
06_land-cover-classification.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
---
editor_options:
chunk_output_type: console
bibliography: references.bib
---
# Land cover classification and change detection
In this script, we will process land cover polygons and rasters for multiple time periods (1848 and 2018) across the Nilgiri hills of the Western Ghats biodiversity hotspot. For the year 1848, a survey map (created by Captain John Ouchterlony) was obtained from the British Library and the Tamil Nadu State Archive and was manually digitized to arrive at multiple land cover classes (for more information on the process of digitization, please read the accompanying manuscript and use the READMe section in the GitHub repository). For the year 2018, we relied on satellite imagery from Sentinel-2 (the imagery was classified using Google Earth Engine and more information on the classification can be obtained below).
## Load necessary libraries
```{r}
library(sf)
library(raster)
library(terra)
library(stars)
library(dplyr)
library(tidyverse)
library(mapview)
library(landscapemetrics)
library(scico)
library(extrafont)
library(exactextractr)
```
## Processing digitized shapefiles from the 1848 map
We will be loading shapefiles that were digitized by Amrutha Rajan for the 1848 historical map.
```{r}
# list all shapefiles in the directory
nil1848 <- list.files("data/landcover/1848-nilgiris/", full.names = T, recursive = T, pattern=".shp$")
# create vector files
ag1848 <- st_read(nil1848[1]) # type: multipolygon; 6 empty geometries
noData1848 <- st_read(nil1848[2]) # type: multipolygon
plantations1848 <- st_read(nil1848[3]) # type: polygon & multipolygon; 1 empty geometry
roads1848 <- st_read(nil1848[4]) # type: linestring
settlements1848 <- st_read(nil1848[5]) # type: polygon & multipolygon; 3 geometries empty
sholaForest1848 <- st_read(nil1848[6]) # type: polygon & multippolygon; 6 geometries empty
sholaGrassland1848 <- st_read(nil1848[7]) # type: multipolygon
swamps1848 <- st_read(nil1848[8]) # type: polygon & multipolygon; eight geometries are empty
waterBodies1848 <- st_read(nil1848[9]) # type: multipolygon
# explore and fix any issues with the above vector files
# we need to ensure consistency across files for the sake of merging them into a single geometry collection
# we notice a range of small issues with the shapefiles above
# the geometry type is variable and needs to be consistent
# empty geometries need to be removed
# attribute names need to be consistent across shapefiles
# first, we will remove empty geometries
ag1848 <- ag1848[!st_is_empty(ag1848), ]
noData1848 <- noData1848[!st_is_empty(noData1848), ]
plantations1848 <- plantations1848[!st_is_empty(plantations1848),]
roads1848 <- roads1848[!st_is_empty(roads1848),]
settlements1848 <- settlements1848[!st_is_empty(settlements1848),]
sholaForest1848 <- sholaForest1848[!st_is_empty(sholaForest1848),]
sholaGrassland1848 <- sholaGrassland1848[!st_is_empty(sholaGrassland1848),]
swamps1848 <- swamps1848[!st_is_empty(swamps1848),]
waterBodies1848 <- waterBodies1848[!st_is_empty(waterBodies1848),]
# fixing attribute tables to ensure they are consistent across shapefiles
names(ag1848) <- c("id", "name","geometry")
ag1848$name <- "agriculture"
names(noData1848) <- c("id", "name","geometry")
noData1848$name <- "no_data"
names(plantations1848) <- c("id", "name","geometry")
plantations1848$name <- "plantations"
names(roads1848) <- c("id", "name","geometry")
roads1848$name <- "roads"
names(settlements1848) <- c("id", "name","geometry")
settlements1848$name <- "settlements"
names(sholaForest1848) <- c("id", "name","geometry")
sholaForest1848$name <- "shola_forest"
names(sholaGrassland1848) <- c("id", "name","geometry")
sholaGrassland1848$name <- "shola_grassland"
names(swamps1848) <- c("id", "name","geometry")
swamps1848$name <- "swamps"
names(waterBodies1848) <- c("id", "name","geometry")
waterBodies1848$name <- "water_bodies"
# Note: the roads shapefile is a linestring while the other geometry types are polygon/multipolygon
# transform to UTM 43N
ag1848 <- st_transform(ag1848, 32643)
noData1848 <- st_transform(noData1848, 32643)
plantations1848 <- st_transform(plantations1848, 32643)
roads1848 <- st_transform(roads1848, 32643)
settlements1848 <- st_transform(settlements1848, 32643)
sholaForest1848 <- st_transform(sholaForest1848, 32643)
sholaGrassland1848 <- st_transform(sholaGrassland1848, 32643)
swamps1848 <- st_transform(swamps1848, 32643)
waterBodies1848 <- st_transform(waterBodies1848, 32643)
# creating a single simple feature collection
nil1848 <- rbind(ag1848, plantations1848, settlements1848,
sholaForest1848, sholaGrassland1848, swamps1848,
waterBodies1848)
# subsuming swamps under grasslands
nil1848 <- nil1848 %>%
mutate(name = case_when(
name == "swamps" ~ "shola_grassland",
.default = as.character(name)))
# crop the 1848 shapefiles to within the 1400m contour only
nil1848 <- st_buffer(nil1848, dist = 0)
nil1848 <- nil1848[,-c(3:5)]
# Create a common boundary for clipping files from another time period (2018)(see below)
all1848 <- st_buffer(st_union(nil1848), dist = 0)
```
## Rasterization of the 1848 shapefiles
The 1848 digitized shapefiles are rasterized for comparison with the 2018 sentinel satellite imagery.
```{r}
# nil11848 raster
# scale: 1000ft to 1 inch (1:12000; 6 metres resolution)
# This link was used for reference to convert from map scale to raster pixel resolution: https://www.esri.com/arcgis-blog/products/product/imagery/on-map-scale-and-raster-resolution/
vect1848 <- terra::vect(nil1848)
emptyRast <- terra::rast(res = 6, xmin = 660555.3, xmax = 719135.5, ymin = 1240050, ymax = 1277597, crs = "+proj=utm +zone=43 +datum=WGS84 +units=m +no_defs")
rast1848 <- terra::rasterize(vect1848, emptyRast, "name")
```
## Load the 2018 satellite image
The 2018 satellite image was obtained from Sentinel-2. Cloud-free days (1% cloud cover) were chosen from March 2018 and a composite was created. We then utilized groundtruthing points from [@arasumani2019] and used a Random Forests classifier (n = 1000 trees; Kappa statistic = 93.9%, overall accuracy of 94.8% was obtained on test data) to obtain a classified image with seven land cover classes (similar to the 1848 map). For more details on the classification, please visit this link:
```{r}
## load the raster
rast2018 <- terra::rast("data/landcover/2018-nilgiris/2018.tif")
values(rast2018)[values(rast2018) <= 0] <- NA
## convert the raster to a categorical raster
rast2018 <- as.factor(rast2018)
## set land cover class names to the 2018 raster
## note: this was done carefully by examining the values associated
## with each number from classification process (done in GEE)
## create a dataframe with names of classes and their corresponding values
landcover_class <- data.frame(ID = 1:7,
name = c( "agriculture",
"shola_forest",
"shola_grassland",
"timber_plantations",
"settlements",
"tea_plantations",
"water_bodies"))
levels(rast2018) <- landcover_class
## resample the 1848 raster to the 2018 raster to match spatial res
## The 1848 raster is at 6 m resolution
## The 2018 raster is at 10 m resolution
rast1848 <- resample(rast1848, rast2018, method = "near")
# visualization of the two rasters
colors2018 <- c(
'#be4fc4', # agriculture, violetish
'#025a05',# shola forests, dark green
'#cbb315', # shola grasslands, yellowish
'#c17111', # timber plantations, brownish
'#b0a69d', # settlements, grayish
'#04a310',# tea plantations, light green
'#2035df' # waterbodies, royal blue
)
## side-by-side visualization
colors1848 <- c(
'#be4fc4', # agriculture, violetish
'#c17111', # plantations, brownish
'#b0a69d', # settlements, grayish
'#025a05',# shola forests, dark green
'#cbb315', # shola grasslands, yellowish
'#2035df' # waterbodies, royal blue
)
## saving a high resolution visualization
png(filename = "figs/fig_landCover_1848_vs_2018.png",
width = 12, height = 7, units = "in", res = 300)
par(mfrow = c(1,2))
plot(rast1848, col = colors1848, main = "1848",
legend = FALSE)
plot(rast2018, col=colors2018, main = "2018",
legend=FALSE)
# add custom legend
par(mar = c(0, 0, 0, 0),
new = TRUE)
plot(0, 0, type = 'l', bty = 'n', xaxt = 'n', yaxt = 'n')
legend("bottom",
legend=landcover_class$name, fill=colors2018,
cex = 0.8,
title="land cover classes",
ncol = 2)
dev.off()
```
## Subsume plantations into one category for 2018
Since 1848 had very few to no tea plantations and only timber plantations were present at the time, we reclassify timber plantations into the plantations category for the 2018 raster to ease comparisons.
```{r}
# read reclassification matrix
reclassification_matrix <- read.csv("data/landcover/2018-nilgiris/reclassification-matrix.csv")
reclassification_matrix <- as.matrix(reclassification_matrix[, c("V1", "To")])
# reclassification
rast2018_reclassified <- terra::classify(
x = rast2018,
rcl = reclassification_matrix
)
## create a dataframe with names of classes and their corresponding values
landcover_reclass <- data.frame(ID = 1:6,
name = c("agriculture",
"shola_forest",
"shola_grassland",
"plantations",
"settlements",
"water_bodies"))
levels(rast2018_reclassified) <- landcover_reclass
## side-by-side visualization
colors2018reclass <- c(
'#be4fc4', # agriculture, violetish
'#025a05',# shola forests, dark green
'#cbb315', # shola grasslands, yellowish
'#c17111', # plantations, brownish
'#b0a69d', # settlements, grayish
'#2035df' # waterbodies, royal blue
)
png(filename = "figs/fig_landCover_1848_vs_2018reclassified.png",
width = 12, height = 7, units = "in", res = 300)
par(mfrow = c(1,2))
plot(rast1848, col = colors1848, main = "1848",
legend = FALSE)
plot(rast2018_reclassified, col=colors2018reclass, main = "2018",
legend=FALSE)
# add custom legend
par(mar = c(0, 0, 0, 0),
new = TRUE)
plot(0, 0, type = 'l', bty = 'n', xaxt = 'n', yaxt = 'n')
legend("bottom",
legend=landcover_reclass$name, fill=colors2018reclass,
cex = 0.8,
title="land cover classes",
ncol = 2)
dev.off()
```
![Landcover change between 1848 and 2018](figs/fig_landCover_1848_vs_2018reclassified.png)
## Write the shapefiles and rasters to file
Please note that the processed rasters above have been resampled to a resolution of 10 metres.
```{r}
# vectors
st_write(nil1848, "results/landcover/1848.shp",
driver = "ESRI Shapefile")
# rasters
terra::writeRaster(rast1848, "results/landcover/1848.tif")
terra::writeRaster(rast2018, "results/landcover/2018.tif")
terra::writeRaster(rast2018_reclassified, "results/landcover/2018reclassified.tif")
```
## Area-wise comparisons of land cover change
```{r}
# area-wise calculations for the 2018 raster (the below objects will be used to compare overall areas, alongside the 1848 map)
sz2018 <- cellSize(rast2018, unit = "m")
area2018 <- zonal(sz2018, rast2018, sum)
area2018SqKm <- (area2018$area / 1000000)
area2018 <- cbind(area2018, area2018SqKm)
# area-wise calculations for the 2018 reclassified raster (the below objects will be used to compare overall areas, alongside the 1848 map)
sz2018_reclass <- cellSize(rast2018_reclassified, unit = "m")
area2018_reclass <- zonal(sz2018_reclass, rast2018_reclassified, sum)
area2018SqKm_reclass <- (area2018_reclass$area / 1000000)
area2018_reclass <- cbind(area2018_reclass, area2018SqKm_reclass)
# area-wise calculations for the 1848 raster (the below objects will be used to compare overall areas, alongside the 2018 sentinel satellite image)
sz1848 <- cellSize(rast1848, unit = "m")
area1848 <- zonal(sz1848, rast1848, sum)
area1848SqKm <- (area1848$area / 1000000)
area1848 <- cbind(area1848, area1848SqKm)
```
## Barplots of land cover change
```{r}
# Joining all dataframes to create a single one for plotting and visualization
names(area1848) <- c("class", "areaInMeters","sumArea")
names(area2018) <- c("class", "areaInMeters","sumArea")
areaCalc <- purrr::reduce(list(
area1848,
area2018
), dplyr::full_join, by = "class")
names(areaCalc) <- c("class", "areaMt1848", "1848", "areaMt2018", "2018")
areaCalc <- areaCalc %>%
dplyr::select("class", "1848", "2018") %>%
pivot_longer(!class, names_to = "Year", values_to = "Area")
# since the 1848 map had timber and tea plantations subsumed and the 2018 had it separated, we will manually edit the same
areaCalc <- areaCalc %>% drop_na()
write.csv(areaCalc, "results/totalArea-by-landCover-timePeriod.csv", row.names = F)
# make plot
fig_area <- ggplot(areaCalc, aes(x = class,
y = Area, fill = class)) +
geom_bar(stat = "identity",
position = position_dodge()) +
scale_fill_manual(values = c('#be4fc4', '#d73027', '#b0a69d',
'#025a05','#cbb315', '#04a310',
'#c17111', '#2035df' ))+
geom_text(aes(label = round(Area), hjust = "middle",
vjust = -0.5), family = "Century Gothic",
position = position_dodge(), angle = 0,
size = 5) +
facet_wrap(~Year, scales ="free_x") +
theme_bw() +
labs(
x = "\nLand cover type",
y = "Area in sq.km. \n"
) +
theme(text = element_text(size=14, family="Century Gothic"),
axis.title = element_text(
family = "Century Gothic",
size = 14, face = "bold"),
axis.text = element_text(family = "Century Gothic",
size = 14),
axis.text.x = element_text(angle = 90, vjust = 0.5,
hjust = 1),
legend.position = "none")
ggsave(fig_area,
filename = "figs/fig_totalArea_landCover.png", width = 15, height = 13, device = png(), units = "in", dpi = 600)
dev.off()
# get percentArea occupied by each land cover class
percentArea <- areaCalc %>%
group_by(Year) %>%
mutate(
totalArea = sum(Area, na.rm = T),
percentArea = (Area / totalArea) * 100
)
# plot figure
fig_percent_area <- ggplot(percentArea, aes(x = class, y = percentArea, fill = class)) +
geom_bar(stat = "identity", position = position_dodge()) +
scale_fill_manual(values = c('#be4fc4', '#d73027', '#b0a69d',
'#025a05','#cbb315', '#04a310',
'#c17111', '#2035df' ))+
geom_text(aes(label = round(percentArea, digits = 1), hjust = "middle", vjust = -0.5), position = position_dodge(), family = "Century Gothic", angle = 0, size = 5) +
facet_wrap(~Year, scales = "free_x") +
theme_bw() +
labs(
x = "\nLand cover type",
y = "Percent Area \n"
) +
theme(text = element_text(size=14, family="Century Gothic"),
axis.title = element_text(
family = "Century Gothic",
size = 14, face = "bold"
),axis.text = element_text(family = "Century Gothic",
size = 14),
axis.text.x = element_text(angle = 90,
vjust = 0.5, hjust = 1),
legend.position = "none")
ggsave(fig_percent_area, filename = "figs/fig_percentArea_landCover.png", width = 14, height = 13, device = png(), units = "in", dpi = 500)
dev.off()
```
![Barplots of change in area of each land cover class between 1848 and 2018](figs/fig_totalArea_landCover.png)
## Validating the 2018 Sentinel-2 classification
In the above classification, we see an increase in forest cover of ~ 132 sq.km between 1848 and 2018. However, is the 2018 classification accurate? Can we compare these estimates to an existing land cover classification from 2017?
```{r}
nil2017 <- st_read("data/landcover/2017-nilgiris/2017.shp")
nil2017 <- st_intersection(nil2017, all1848)
# Let's add class name to nil2017 shapefile
nil2017 <- nil2017 %>%
mutate(name = case_when(
gridcode == 1 ~ "shola_grassland",
gridcode == 2 ~ "shola_forest",
gridcode == 3 ~ "timber_plantations",
gridcode == 4 ~ "tea_plantations",
gridcode == 6 ~ "settlements",
gridcode == 7 ~ "agriculture",
gridcode == 8 ~ "water_bodies"
))
# remove older column
nil2017 <- nil2017[,-3]
# renaming for ease of visualization
names(nil2017) <- c("id","area","geometry","name")
## note that the spatial resolution of Landsat is at 30 m spatial resolution
vect2017 <- terra::vect(nil2017)
emptyRast <- terra::rast(res = 30, xmin = 660750, xmax = 718329, ymin = 1240554, ymax = 1275510, crs = "+proj=utm +zone=43 +datum=WGS84 +units=m +no_defs")
rast2017 <- terra::rasterize(vect2017, emptyRast, "name")
# resample the 2018 raster to 30m
rast2018 <- resample(rast2018, rast2017, method = "near")
# mask the 2018 raster with the 2017 one since the 2017 raster was only for area above 1400m
rast2018 <- mask(rast2018, rast2017)
# visualizing the two classifications
colors2017 <- c(
'#be4fc4', # agriculture, violetish
'#b0a69d', # settlements, grayish
'#025a05', # shola forests, dark green
'#cbb315', # shola grasslands, yellowish
'#04a310', # tea plantations, light green
'#c17111', # timber plantations, brownish
'#2035df' # waterbodies, royal blue
)
## saving a high resolution visualization
png(filename = "figs/fig_landCover_2018_vs_2017.png",
width = 12, height = 7, units = "in", res = 300)
par(mfrow = c(1,2))
plot(rast2018, col = colors2018, main = "2018 Sentinel classification",
legend = FALSE)
plot(rast2017, col=colors2017, main = "2017 Landsat classification", legend=FALSE)
# add custom legend
par(mar = c(0, 0, 0, 0),
new = TRUE)
plot(0, 0, type = 'l', bty = 'n', xaxt = 'n', yaxt = 'n')
legend("bottom",
legend=landcover_class$name, fill=colors2018,
cex = 0.8,
title="land cover classes",
ncol = 2)
dev.off()
## area-wise calculations
sz2018 <- cellSize(rast2018, unit = "m")
area2018 <- zonal(sz2018, rast2018, sum)
area2018SqKm <- (area2018$area / 1000000)
area2018 <- cbind(area2018, area2018SqKm)
sz2017 <- cellSize(rast2017, unit = "m")
area2017 <- zonal(sz2017, rast2017, sum)
area2017SqKm <- (area2017$area / 1000000)
area2017 <- cbind(area2017, area2017SqKm)
# Joining all dataframes to create a single one for plotting and visualization
names(area2017) <- c("class", "areaInMeters","sumArea")
names(area2018) <- c("class", "areaInMeters","sumArea")
areaCalc <- purrr::reduce(list(
area2017,
area2018
), dplyr::full_join, by = "class")
names(areaCalc) <- c("class", "areaMt2017", "2017", "areaMt2018", "2018")
areaCalc <- areaCalc %>%
dplyr::select("class", "2017", "2018") %>%
pivot_longer(!class, names_to = "Year", values_to = "Area")
write.csv(areaCalc, "results/totalArea-by-landCover-2018-vs-2017.csv", row.names = F)
```
![Comparing the 2018 Sentinel classification to the 2017 Landsat classification of the same area revealed no major differences](figs/fig_landCover_2018_vs_2017.png)