-
Notifications
You must be signed in to change notification settings - Fork 0
2. Code Tutorial: Universal Kriging
Hyesop edited this page Jun 21, 2019
·
1 revision
This code is a single R
sourcecode but splitted for the reader's convenience.
Here, we use 5 packages tidyverse
, sf
, raster
, mgcv
, and gstat
. Wow, really? Of course, that is it.
The next step is to load the pre-cleaned NO2 then merge
them all to station points.
options(scipen = 100) # extend the system's decimal points to a hundred
memory.size() # check memory size: Windows-specific
memory.limit(99999) # expand memory size: Windows-specific
library(tidyverse) # data tidy and visualisation
library(sf) # geospatial analysis: Vector
library(raster) # geospatial analysis: Raster
library(mgcv) # GAM
library(gstat) # Kriging
load("../RData/no2.RData")
rm(list=setdiff(ls(), c("no2.win.12hr", "ratio.no2.win")))
stations <- read_sf("../seoul/stations_10km.shp")
stations_df <- stations %>% st_set_geometry(NULL)
seoul <- read_sf("../seoul/Seoul_City.shp") %>% as('Spatial') %>% fortify()
no2.winter <- merge(no2.win.12hr, stations_df, by.x = c("Station.ID", "X", "Y"), by.y = c("Station", "X", "Y"))
##########
options(warn = -1) # don't print warnings
myVario <- list()
myList <- list()
top.dec <- no2.winter[118:135]
##Automated-Supervised variograms
for(i in 1:18){
myVario[[length(myVario)+1]] <- variogram(top.dec[[i]] ~ 1, top.dec, cutoff = 50000)
myList[[i]] <- fit.variogram(myVario[[i]],
vgm(psill = 250,
range = 45000,
nugget= 20,
model = "Ste"),
fit.kappa = TRUE, fit.method = 6)
}
library(gridExtra)
p01 <- plot(myVario[[1]], myList[[1]], main = "Dec 02nd\nOffice hours")
p02 <- plot(myVario[[2]], myList[[2]], main = "Dec 02nd\nHome hours")
p03 <- plot(myVario[[3]], myList[[3]], main = "Dec 03rd\nOffice hours")
p04 <- plot(myVario[[4]], myList[[4]], main = "Dec 03rd\nHome hours")
p05 <- plot(myVario[[5]], myList[[5]], main = "Dec 04th\nOffice hours")
p06 <- plot(myVario[[6]], myList[[6]], main = "Dec 04th\nHome hours")
p07 <- plot(myVario[[7]], myList[[7]], main = "Dec 05th\n Office hours")
p08 <- plot(myVario[[8]], myList[[8]], main = "Dec 05th\n Home hours")
p09 <- plot(myVario[[9]], myList[[9]], main = "Dec 06th\n Office hours")
p10 <- plot(myVario[[10]], myList[[10]], main = "Dec 06th\n Home hours")
p11 <- plot(myVario[[11]], myList[[11]], main = "Dec 07th\nOffice hours")
p12 <- plot(myVario[[12]], myList[[12]], main = "Dec 07th\nHome hours")
p13 <- plot(myVario[[13]], myList[[13]], main = "Dec 08th\nOffice hours")
p14 <- plot(myVario[[14]], myList[[14]], main = "Dec 08th\nHome hours")
p15 <- plot(myVario[[15]], myList[[15]], main = "Dec 09th\nOffice hours")
p16 <- plot(myVario[[16]], myList[[16]], main = "Dec 09th\nHome hours")
p17 <- plot(myVario[[17]], myList[[17]], main = "Dec 10th\n Office hours")
p18 <- plot(myVario[[18]], myList[[18]], main = "Dec 10th\n Home hours")
varplot <- grid.arrange(p01, p02, p03, p04, p05, p06, p07, p08, p09, p10,
p11, p12, p13, p14, p15, p16, p17, p18,
ncol = 6)
ggsave("semvario.png",varplot, width = 12, height = 7)#, scale = 1.5)
### Data Frame
seoul_grid <- data.frame(expand.grid(X = seq(min(no2.winter$X), max(no2.winter$X), length=200),
Y = seq(min(no2.winter$Y), max(no2.winter$Y), length=200)))
coordinates(seoul_grid) <- ~X+Y
proj4string(seoul_grid) <- CRS("+init=epsg:5181")
#https://gis.stackexchange.com/questions/157279/saving-results-in-automap-r-package-for-time-series-data
##############
#--Kriging--##
##############
pred.model <- seoul_grid@coords
var.model <- seoul_grid@coords
for(i in 1:18) {
kriging_new <- krige(top.dec@data[,i]~ X + Y,
top.dec,
seoul_grid,
nmin = 15,
nmax = 30,
model = myList[[i]])
kriging_new$var_model <- data.frame(kriging_new$var1.var)
var.model <- cbind(var.model, kriging_new$var_model)
xyz <- as.data.frame(kriging_new$var1.pred)
colnames(xyz) <- colnames(top.dec@data)[i]
pred.model <- cbind(pred.model, xyz)
}
##-- Add ColNames
colnames(pred.model) <- c("X", "Y", "dec02d", "dec02n","dec03d", "dec03n", "dec04d", "dec04n", "dec05d", "dec05n", "dec06d", "dec06n", "dec07d", "dec07n", "dec08d", "dec08n", "dec09d", "dec09n", "dec10d", "dec10n")
colnames(var.model) <- c("X", "Y", "dec02d", "dec02n","dec03d", "dec03n", "dec04d", "dec04n", "dec05d", "dec05n", "dec06d", "dec06n", "dec07d", "dec07n", "dec08d", "dec08n", "dec09d", "dec09n", "dec10d", "dec10n")
##-- Find Mean and variance
stat <- pred.model %>% dplyr::select(-c(X,Y)) %>%
gather(factor_key = T) %>%
group_by(key) %>% summarise(mean= round(mean(value),1), median = round(median(value),1),
sd= round(sd(value),1), max = max(value),min = min(value)) %>% rename(Hour = key)
statvar <- var.model %>% dplyr::select(-c(X,Y)) %>%
gather(factor_key = T) %>%
group_by(key) %>% summarise(mean= round(mean(value),1), median = round(median(value),1),
sd= round(sd(value),1), max = max(value),min = min(value)) %>% rename(Hour = key)
########
# RMSE #
########
pred <- pred.model
r.pred <- rasterFromXYZ(pred)
crs(r.pred) <- CRS('+init=epsg:5181')
obs <- as.data.frame(top.dec) %>% dplyr::select(X,Y,everything())
pred.df <- data.frame(X = obs$X, Y = obs$Y)
RMSE <- data.frame(X = obs$X, Y = obs$Y)
# for loop
for(i in 1:18){
pred.df[,i+2] <- extract(r.pred[[i]], top.dec)
RMSE[,i+2] <- (pred.df[,i+2] - obs[,i+2])^2
}
colnames(RMSE) <- colnames(obs)
stat$rmse <- RMSE %>% dplyr::select(-c(1:2)) %>% as.matrix() %>% colSums()
stat$rmse <- sqrt(stat$rmse / 60) %>% round(digits = 3)
#############
#- Plotting-#
#############
ras.krige.df <- pred.model %>%
reshape2::melt(id = c("X", "Y"), variable.name = "Hour", value.name = "NO2")
ras.krige.df %>%
ggplot() +
geom_tile(aes(x = X, y = Y, fill = NO2)) +
scale_fill_distiller(palette = "Spectral", na.value = NA, limits = c(0,125), breaks = c(0,25,50,75,100,125)) +
geom_contour(aes(x = X, y = Y, z = NO2),bins = 20, colour = "grey40", alpha = 0.7) +
geom_path(data = seoul, aes(x = long, y = lat), color = 'black', size = 1) +
geom_text(data = stat, aes(x = 187000, y = 434000, label = paste0("mean = " , mean)), size = 3) +
geom_text(data = stat, aes(x = 184000, y = 430500, label = paste0("sd = " , sd)), size = 3) +
geom_text(data = stat, aes(x = 188900, y = 469000, label = paste0("RMSE=" , rmse)), size = 3) +
facet_wrap(~ Hour, ncol = 8) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
strip.text.x = element_text(size = 20),
legend.title=element_text(size=15),
legend.text=element_text(size=15)
) -> kriged # 1200 x 550
# Export PNG
png("kriged_pred.png", width=1200, height=550, res=100)
kriged
dev.off()
ras.var.df <- var.model %>%
reshape2::melt(id = c("X", "Y"), variable.name = "Hour", value.name = "NO2")
ras.var.df %>%
ggplot() +
geom_tile(aes(x = X, y = Y, fill = NO2)) +
scale_fill_distiller(palette = "BrBG", na.value = NA) + #, limits = c(0,12), breaks = c(0,3,6,9,12)) +
geom_contour(aes(x = X, y = Y, z = NO2),bins = 20, colour = "grey40", alpha = 0.7) +
geom_path(data = seoul, aes(x = long, y = lat), color = 'black', size = 1) +
geom_text(data = statvar, aes(x = 187000, y = 434000, label = paste0("mean = " , mean)), size = 3) +
geom_text(data = statvar, aes(x = 184000, y = 430500, label = paste0("sd = " , sd)), size = 3) +
facet_wrap(~ Hour, ncol = 8) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
strip.text.x = element_text(size = 20),
legend.title=element_text(size=15),
legend.text=element_text(size=15)
) -> kriged_stv
# Export PNG
png("kriged_var.png", width=1200, height=550, res=100)
kriged_stv
dev.off()
######################
#--Cross Validation--#
######################
CV <- list()
CV.RMSE <- vector()
CV.R2 <- vector()
for(k in 1:20){
CV[[length(CV)+1]] <- krige.cv(get(colnames(top.jan@data[k]))~ 1,
no2.winter,
myList[[k]], # Semi-Variogram
nmax = 30,
nfold=10)
RMSE <- sqrt(mean((CV[[k]]$observed - CV[[k]]$residual)^2)) #RMSE, Ideally small
CV.RMSE <- append(CV.RMSE,RMSE)
R2 <- (cor(CV[[k]]$observed, CV[[k]]$residual))^2
CV.R2 <- append(CV.R2,R2)
}
CV.RMSE
CV.R2
library(gridExtra)
CV1 <- bubble(CV[[1]], "residual", main = "10-fold CV residuals\nFeb11 Day 2014")
CV2 <- bubble(CV[[2]], "residual", main = "10-fold CV residuals\nFeb11 Night 2014")
CV3 <- bubble(CV[[3]], "residual", main = "10-fold CV residuals\nFeb12 Day 2014")
CV4 <- bubble(CV[[4]], "residual", main = "10-fold CV residuals\nFeb12 Night 2014")
grid.arrange(CV1, CV2, CV3, CV4)
############################
# Convert to Raster Bricks #
############################
krige <- rasterFromXYZ(pred.model,
crs="+proj=tmerc +lat_0=38 +lon_0=127 +k=1 +x_0=200000 +y_0=500000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs",
digits=5)
ras.road <- raster("../seoul/road_10km_re.tif") # Import raster
res.mgcv <- resample(krige, ras.road, method = "bilinear") # resample
res.mgcv <- merge(ras.road, res.mgcv) # merge
# assign road
road_01 = road_02 = road_03 = road_04 = road_05 =
road_06 = road_07 = road_08 = road_09 = road_10 =
road_11 = road_12 = road_13 = road_14 = road_15 =
road_16 = road_17 = road_18 = road_19 = road_20 = ras.road
# stack raster and remove individual raster files
road.stack <- stack(road_01, road_02, road_03, road_04, road_05,
road_06, road_07, road_08, road_09, road_10,
road_11, road_12, road_13, road_14, road_15,
road_16, road_17, road_18, road_19, road_20
)
rm(road_01, road_02, road_03, road_04, road_05,
road_06, road_07, road_08, road_09, road_10,
road_11, road_12, road_13, road_14, road_15,
road_16, road_17, road_18, road_19, road_20
)
# add road ratio values to GAM raster
ratio.top.jan <- ratio.no2.win[1:20,]
for(i in 1:20){
#road.stack[[i]] <- road.stack[[i]] * ratio.no2.sum$ratio[i]
values(road.stack)[values(road.stack[[i]]) == 1] <- ratio.top.jan$Back.Road.Ratio[i]
values(road.stack)[values(road.stack[[i]]) == 2] <- ratio.top.jan$Back.High.Ratio[i]
}
# add no2 and road values
r.poll.rd <- overlay(res.mgcv, road.stack, fun = function(x,y){ifelse(y != 0, x*y, x)})
names(r.poll.rd) <- c("jan01d", "jan01n", "jan02d", "jan02n","jan03d", "jan03n", "jan04d", "jan04n", "jan05d", "jan05n", "jan06d", "jan06n", "jan07d", "jan07n", "jan08d", "jan08n", "jan09d", "jan09n", "jan10d", "jan10n")
#####################
#ras.mgcv.df <- as.data.frame(r.poll.rd, xy = TRUE) # easy way
# however, since we resampled and changed our data
# with different resolution imamges and extent, the easier way doesn't work
ras <- xyFromCell(r.poll.rd, 1:ncell(r.poll.rd))
krige.df <- as.data.frame(r.poll.rd)
##-- Find Mean and variance
ras.krige.stat <- data.frame(ras, krige.df)
stat1 <- ras.krige.stat %>% dplyr::select(-c(x,y)) %>%
gather(factor_key = T) %>%
group_by(key) %>% summarise(mean= round(mean(value),1), sd= round(sd(value),1), max = max(value),min = min(value)) %>%
rename(Hour = key)
#####
ras.krige.df <- data.frame(ras, krige.df) %>%
reshape2::melt(id = c("x", "y"), variable.name = "Hour", value.name = "NO2")
ras.krige.df %>%
ggplot() +
geom_tile(aes(x = x, y = y, fill = NO2)) +
scale_fill_distiller(palette = "Spectral", na.value = NA, limits = c(0,125), breaks = c(0,25,50,75,100,125)) +
geom_text(data = stat1, aes(x = 187000, y = 434000, label = paste0("mean = " , mean)), size = 3) +
geom_text(data = stat1, aes(x = 184000, y = 430500, label = paste0("sd = " , sd)), size = 3) +
geom_path(data = seoul, aes(x = long, y = lat), color = 'black', size = 1) +
facet_wrap(~ Hour, ncol = 8) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
strip.text.x = element_text(size = 20),
legend.title=element_text(size=15),
legend.text=element_text(size=15)
) -> final
# Export PNG
png("kriged_final.png", width=1200, height=550, res=100)
final
dev.off()
##############
#--Extract--##
##############
# Attributes of Monitoring stations
back_df <- stations_df %>%
filter(Name %in% c('Jongno-gu', 'Seongbuk-gu', 'Gwanak-gu', 'Gangnam-gu', 'Daeya', 'Onam')) %>%
dplyr::select(-c(Long, Lat, Province, City, `F/R`, Road_Dist, DEM))
road_df <- stations %>% filter(Name %in% c('Hangang-daero', 'Yeongdeungpo', 'Gangbyeonbuk-ro'))
# Spatial Points
back <- top.jan[c(2,8,17,18,42,48),] #'Jongno-gu', 'Seongbuk-gu', 'Gwanak-gu', 'Gangnam-gu', 'Daeya', 'Onam'
colnames(back@data) <- c("jan01d", "jan01n", "jan02d", "jan02n","jan03d", "jan03n", "jan04d", "jan04n", "jan05d", "jan05n", "jan06d", "jan06n", "jan07d", "jan07n", "jan08d", "jan08n", "jan09d", "jan09n", "jan10d", "jan10n")
# Silim Coordinates: 195793.5 442361.5
# Gwanakgu Office location: 193837.2 442737.9
# Final
back_pred <- back_df %>% cbind(extract(r.poll.rd, back))
back_obs <- back_df %>% cbind(back@data)
# Plot
plot_back_pred <- reshape2::melt(back_pred,
id = c("X","Y","Station","Name"),
variable.name = "Type",
value.name = "Value") %>%
mutate(Group = "pred")
plot_back_obs <- reshape2::melt(back_obs,
id = c("X","Y","Station","Name"),
variable.name = "Type",
value.name = "Value")%>%
mutate(Group = "obs")
plot_back_fin <- rbind(plot_back_pred, plot_back_obs)
plot_back_fin %>%
ggplot(aes(x = Type, y = Value, group = Group)) +
geom_line(aes(linetype = Group), size = 1, col = "grey") +
geom_point(aes(colour = Name), size =2) +
ylab("Concentration(ppb)") +
facet_wrap(~ Name) +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_text(size = 12, angle = 90),
axis.text.y=element_text(size = 12),
strip.text.x = element_text(size = 20),
legend.title=element_text(size=15),
legend.text=element_text(size=15)
) -> graphs
# Export Graph
png("Pred_Obs.png", width=1200, height=550, res=100)
graphs
dev.off()