-
Notifications
You must be signed in to change notification settings - Fork 0
/
03-prac-01-australia.Rmd
506 lines (376 loc) · 12.8 KB
/
03-prac-01-australia.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
# Chapter 3 Spatial descriptive statistics
1. Load, manipulate and interpret raster layers
2. Observe and critique different descriptive data manipulation methods and outputs
DATA 1. vector of Australia,download the GeoPackage <https://gadm.org/download_country_v3.html>
# 1. Projection
## packages
```{r}
library(sf)
library(here)
library(terra)
library(raster)
library(tidyverse)
library(fs)
```
## read file
```{r}
st_layers("wk3/data/gadm36_AUS.gpkg") # what's inside
Ausoutline <- st_read(("wk3/data/gadm36_AUS.gpkg"),
layer='gadm36_AUS_0')
```
## check CRS
```{r}
print(Ausoutline)
# check that the coordinate reference systems of sf or sp objects using the print function
# proj4 string -- a compact way of identifying a crs
st_crs(Ausoutline)$proj4string
# "+proj=longlat +datum=WGS84 +no_defs"
```
## assign a crs EPSG
```{r}
Ausoutline <- st_read(("wk3/data/gadm36_AUS.gpkg"),
layer='gadm36_AUS_0') %>% st_set_crs(4326)
```
However for generating maps in packages like `leaflet`, your maps will need to be in WGS84, rather than a projected (flat) reference system .
## 1.1 reproject
```{r}
AusoutlinePROJECTED <- Ausoutline %>%
st_transform(.,3112) # WGS84->GDA94 EPSG 3112
# st_transform reproject simple features, not spatraster
print(AusoutlinePROJECTED)
```
## sf to sp
```{r}
# In the SF object, compare the values in the geometry column with those in the original file to look at how they have changed
#From sf(simple features: dataframe) to sp(spatial: list inside list)
AusoutlineSP <- Ausoutline %>%
as(., "Spatial")
#From sp to sf
AusoutlineSF <- AusoutlineSP %>%
st_as_sf()
# use sf in tidyverse
```
# 1.2 worldclim data
## read data
WorldClim data --- this is a dataset of free global climate layers (rasters) with a spatial resolution of between 1km2 and 240km2 <https://www.worldclim.org/data/worldclim21.html>
```{r}
jan <- terra::rast("wk3/data/wc2.1_5m_tavg_01.tif")
jan
```
```{r}
plot(jan)
```
`Mollweide projection` retains area proportions whilst compromising accuracy of angle and shape
```{r}
# set the proj 4 to a new object
pr1 <- terra::project(jan, "+proj=moll +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs")
#or....
newproj<-"ESRI:54009"
# get the jan raster and give it the new proj4
pr1 <- jan %>%
terra::project(., newproj)
plot(pr1)
# to WGS84
pr1 <- pr1 %>%
terra::project(., "EPSG:4326")
plot(pr1)
```
# 1.3 data loading
```{r}
# look in our folder, find the files that end with .tif and
dir_info("wk3/data/")
```
## listfile
```{r}
# dplyr::select
listfiles<-dir_info("wk3/data/") %>%
filter(str_detect(path,".tif")) %>%
dplyr::select(path)%>%
dplyr::pull()
# pull() from dplyr which is the same as the $ often used to extract columns as in the next stage the input must be filenames as characters (nothing else like a column name)
listfiles
```
## load data into SpatRaster
"空间栅格" 或 "空间光栅"
```{r}
# A SpatRaster is a collection of raster layers with the same spatial extent and resolution
worldclimtemp <- listfiles %>%
terra::rast()
worldclimtemp
```
## access single layer
```{r}
worldclimtemp[[1]]
```
## rename layers within the stack
```{r}
month <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
names(worldclimtemp) <- month
# dplyr::rename() not for raster data
worldclimtemp$Jan
```
# 1.4 raster location
```{r}
site <- c("Brisbane", "Melbourne", "Perth", "Sydney", "Broome", "Darwin", "Orange",
"Bunbury", "Cairns", "Adelaide", "Gold Coast", "Canberra", "Newcastle",
"Wollongong", "Logan City" )
lon <- c(153.03, 144.96, 115.86, 151.21, 122.23, 130.84, 149.10, 115.64, 145.77,
138.6, 153.43, 149.13, 151.78, 150.89, 153.12)
lat <- c(-27.47, -37.91, -31.95, -33.87, 17.96, -12.46, -33.28, -33.33, -16.92,
-34.93, -28, -35.28, -32.93, -34.42, -27.64)
#Put all of this inforamtion into one list
samples <- data.frame(site, lon, lat, row.names="site")
# Extract the data from the Rasterstack for all points
AUcitytemp<- terra::extract(worldclimtemp, samples) # zonal statistics
```
terra::extract() is a function that extracts values from a SpatRaster for a set of points, lines or polygons. It is a wrapper for the extract function in the raster package. The function is vectorised, so you can extract values for multiple points, lines or polygons at once.
## add the city names
```{r}
Aucitytemp2 <- AUcitytemp %>%
as_tibble()%>%
add_column(Site = site, .before = "Jan")
```
# 2. descriptive statistics
## 2.1 data preparation
```{r}
# perth
Perthtemp <- Aucitytemp2 %>%
filter(site=="Perth")
# or
# Perthtemp <- Aucitytemp2[3,]
```
## 2.2 histogram
```{r}
# tibble stored the data as double and the base hist() function needs it as numeric
hist(as.numeric(Perthtemp))
```
The x axis is the temperature and the y is the frequency of occurrence.
There seems to be an outlier with this plot as we haven't removed the columns we don't need - look in the Perthtemp tibble and you will see an `ID of 3` which is plotted above.
```{r}
### improve aesthetic
# outliner ID 3
#define where the breaks in histogram
userbreak <- c(8,10,12,14,16,18,20,22,24,26)
#remove the ID and the site columns
Perthtemp <- Aucitytemp2 %>%
filter(site=="Perth")
t <- Perthtemp %>%
dplyr::select(Jan:Dec)
hist((as.numeric(t)),
breaks=userbreak,
col="red",
main="Histogram of Perth Temperature",
xlab="Temperature",
ylab="Frequency")
```
### check out the histogram
```{r}
histinfo <- as.numeric(t) %>%
as.numeric()%>%
hist(.,
breaks=userbreak,
col="red",
main="Histogram of Perth Temperature",
xlab="Temperature",
ylab="Frequency")
histinfo
```
## 2.3 using more data
```{r}
plot(Ausoutline$geom)
```
### st_simplify
```{r}
AusoutSIMPLE <- Ausoutline %>%
st_simplify(.,preserveTopology=TRUE,dTolerance = 1000) %>%
# the argument dTolerance controls the level of generalisation in the units of the map, the higher the number the more generalised the map
# preserveTopology to TRUE or FALSE, when true it means that polyongs aren’t removed and holes in polygons are retained
st_geometry() %>%
plot()
```
### projection conflict
```{r}
# make sure both layers in same CRS
print(Ausoutline)
crs(worldclimtemp)
```
### crop data
clip the data to) to the outline of Australia then crop our WorldClim dataset to it
crop:适用于需要从整个数据集中选择或提取特定地理区域的情况。例如,从全球的栅格数据中裁剪出特定国家或区域的数据
```{r}
Austemp <- Ausoutline %>%
#crop temp data to the extent
terra::crop(worldclimtemp,.) # terra::crop(worldclimtemp,Ausoutline)
#plot the output
plot(Austemp)
```
### extract by mask
the raster hasn't been perfectly clipped to the exact outline of Australia, so we will use the `mask` function to clip the raster to the exact outline of Australia.
mask:适用于需要将数据中的某些区域标记为无效或排除在分析之外的情况。例如,通过一个多边形区域来定义一个掩膜,将该区域以外的数据设为缺失值。
```{r}
exactAus <- terra::mask(Austemp,Ausoutline)
# mask(tif,shp)
```
### plot march
```{r}
# subset using the known location of the raster
hist(exactAus[[3]],col="red",main="March Temperature")
```
## 2.4 histogram with ggplot
### convert to tibble
```{r}
# make our raster into a data.frame to be compatible with ggplot2, using a dataframe or tibble
exactAusdf <- exactAus %>%
as.data.frame()
```
### ggplot
```{r}
library(ggplot2)
```
### 2.4.1 plot1 single month with mean
```{r}
gghist <- ggplot(exactAusdf,
aes(x=Mar))+
geom_histogram(color="black",
fill="white")+
labs(title="Ggplot2 histogram of Australian March temperatures",
x="Temperature",
y="Frequency")
# add a vertical line to the hisogram showing mean temperature
gghist +
geom_vline(aes(xintercept=mean(Mar,
na.rm=TRUE)),
# any missing values should be ignored when calculating the mean
color="blue",
linetype="dashed",
size=1)+ # line thickness
theme(plot.title = element_text(hjust=0.5))
# centers the title by setting the horizontal justification (hjust) to 0.5
```
### 2.4.2 plot2 multiple months
#### pivot_longer
```{r}
# plot multiple months of temperature data on the same histogram
## put our variable (months) into a one column using pivot_longer()
## select columns 1-12 (all the months) and place them in a new column called Month and their values in another called Temp
squishdata <- exactAusdf %>%
pivot_longer(
cols = 1:12,
names_to = "Month",
values_to = "Temp"
)
```
#### suset and select 2 months
```{r}
twomonths <- squishdata %>%
filter(.,Month=="Jan" | Month=="Jun")
```
#### mean of each month
```{r}
meantwomonths <- twomonths %>%
group_by(Month) %>%
summarise(Mean=mean(Temp,
na.rm=TRUE))
# any missing values should be ignored when calculating the mean
```
#### plot
```{r}
ggplot(twomonths,
aes(x=Temp,color=Month,fill=Month))+
# color=Month and fill=Month: The "Month" variable is used to differentiate the two months by color and fill.
geom_histogram(position="identity",
# ensures that the bars are positioned directly over the x-values without any stacking.
alpha=0.5)+
geom_vline(data=meantwomonths,
aes(xintercept= Mean,
color=Month),
linetype="dashed")+
labs(title="Ggplot2 histogram of Australian Jan and Jun temperatures",
x="Temperature",
y="Frequency")+
theme_classic()+
# classic theme provides a simple and clean appearance to the plot
theme(plot.title =
# "plot.title" refers to the title that you set using the labs(title = ...) function
element_text(hjust = 0.5))# element_text function to specify text properties
# theme function customize various aspects of the plot's appearance, such as text size, colors, axis labels, and more
```
1. dropped all the NAs with drop_na()
2. made sure that the Month column has the levels specified, which will map in descending order (e.g. Jan, Feb, March..)
3. selected a bin width of 5 and produced a faceted plot 分面直方图
直方图被分成多个小图(facet),每个小图表示数据中的一个子集。这些子集由一个或多个分类变量定义。例如,如果我们想要查看每个月的温度分布,我们可以使用月份作为分类变量来分面直方图。
```{r}
data_complete_cases <- squishdata %>%
drop_na()%>%
mutate(Month = factor(Month, levels = c("Jan","Feb","Mar",
"Apr","May","Jun",
"Jul","Aug","Sep",
"Oct","Nov","Dec")))
# Plot faceted histogram
ggplot(data_complete_cases, aes(x=Temp, na.rm=TRUE))+
geom_histogram(color="black", binwidth = 5)+
labs(title="Ggplot2 faceted histogram of Australian temperatures",
x="Temperature",
y="Frequency")+
facet_grid(Month ~ .)+
theme(plot.title = element_text(hjust = 0.5))
```
```{r}
library(plotly)
# split the data for plotly based on month
jan <- squishdata %>%
drop_na() %>%
filter(., Month=="Jan")
jun <- squishdata %>%
drop_na() %>%
filter(., Month=="Jun")
# give axis titles
x <- list (title = "Temperature")
y <- list (title = "Frequency")
# set the bin width
xbinsno<-list(start=0, end=40, size = 2.5)
# plot the histogram calling all the variables we just set
ihist<-plot_ly(alpha = 0.6) %>%
add_histogram(x = jan$Temp,
xbins=xbinsno, name="January") %>%
add_histogram(x = jun$Temp,
xbins=xbinsno, name="June") %>%
layout(barmode = "overlay", xaxis=x, yaxis=y)
ihist
```
```{r}
# mean per month
meanofall <- squishdata %>%
group_by(Month) %>%
summarise(mean = mean(Temp, na.rm=TRUE))
# print the top 1
head(meanofall, n=1)
```
```{r}
# standard deviation per month
sdofall <- squishdata %>%
group_by(Month) %>%
summarize(sd = sd(Temp, na.rm=TRUE))
# maximum per month
maxofall <- squishdata %>%
group_by(Month) %>%
summarize(max = max(Temp, na.rm=TRUE))
# minimum per month
minofall <- squishdata %>%
group_by(Month) %>%
summarize(min = min(Temp, na.rm=TRUE))
# Interquartlie range per month
IQRofall <- squishdata %>%
group_by(Month) %>%
summarize(IQR = IQR(Temp, na.rm=TRUE))
# perhaps you want to store multiple outputs in one list..
lotsofstats <- squishdata %>%
group_by(Month) %>%
summarize(IQR = IQR(Temp, na.rm=TRUE),
max=max(Temp, na.rm=T))
# or you want to know the mean (or some other stat)
#for the whole year as opposed to each month...
meanwholeyear=squishdata %>%
summarize(meanyear = mean(Temp, na.rm=TRUE))
```