Skip to content

2. Code Tutorial: Universal Kriging

Hyesop edited this page Jun 21, 2019 · 1 revision

This code is a single R sourcecode but splitted for the reader's convenience.

Import NO2 in Winter 2014

Here, we use 5 packages tidyverse, sf, raster, mgcv, and gstat. Wow, really? Of course, that is it. The next step is to load the pre-cleaned NO2 then merge them all to station points.

options(scipen = 100)  # extend the system's decimal points to a hundred
memory.size() # check memory size: Windows-specific
memory.limit(99999) # expand memory size: Windows-specific

library(tidyverse) # data tidy and visualisation
library(sf) # geospatial analysis: Vector
library(raster) # geospatial analysis: Raster
library(mgcv) # GAM
library(gstat) # Kriging

load("../RData/no2.RData")
rm(list=setdiff(ls(), c("no2.win.12hr", "ratio.no2.win")))
stations <- read_sf("../seoul/stations_10km.shp")
stations_df <- stations %>% st_set_geometry(NULL)
seoul <- read_sf("../seoul/Seoul_City.shp") %>% as('Spatial') %>% fortify()
no2.winter <- merge(no2.win.12hr, stations_df, by.x = c("Station.ID", "X", "Y"), by.y = c("Station", "X", "Y"))

Model Construction

##########
options(warn = -1) # don't print warnings
myVario <- list()
myList <- list()

top.dec <- no2.winter[118:135]

##Automated-Supervised variograms
for(i in 1:18){
  myVario[[length(myVario)+1]] <- variogram(top.dec[[i]] ~ 1, top.dec, cutoff = 50000)
  myList[[i]]  <- fit.variogram(myVario[[i]], 
                                vgm(psill = 250,
                                    range = 45000,
                                    nugget= 20,
                                    model = "Ste"),
                                fit.kappa = TRUE, fit.method = 6)
}

library(gridExtra)
p01 <- plot(myVario[[1]],  myList[[1]],  main = "Dec 02nd\nOffice hours")
p02 <- plot(myVario[[2]],  myList[[2]],  main = "Dec 02nd\nHome hours")
p03 <- plot(myVario[[3]],  myList[[3]],  main = "Dec 03rd\nOffice hours")
p04 <- plot(myVario[[4]],  myList[[4]],  main = "Dec 03rd\nHome hours")
p05 <- plot(myVario[[5]],  myList[[5]],  main = "Dec 04th\nOffice hours")
p06 <- plot(myVario[[6]],  myList[[6]],  main = "Dec 04th\nHome hours")
p07 <- plot(myVario[[7]],  myList[[7]],  main = "Dec 05th\n Office hours")
p08 <- plot(myVario[[8]],  myList[[8]],  main = "Dec 05th\n Home hours")
p09 <- plot(myVario[[9]],  myList[[9]],  main = "Dec 06th\n Office hours")
p10 <- plot(myVario[[10]], myList[[10]], main = "Dec 06th\n Home hours")
p11 <- plot(myVario[[11]], myList[[11]], main = "Dec 07th\nOffice hours")
p12 <- plot(myVario[[12]], myList[[12]], main = "Dec 07th\nHome hours")
p13 <- plot(myVario[[13]], myList[[13]], main = "Dec 08th\nOffice hours")
p14 <- plot(myVario[[14]], myList[[14]], main = "Dec 08th\nHome hours")
p15 <- plot(myVario[[15]], myList[[15]], main = "Dec 09th\nOffice hours")
p16 <- plot(myVario[[16]], myList[[16]], main = "Dec 09th\nHome hours")
p17 <- plot(myVario[[17]], myList[[17]], main = "Dec 10th\n Office hours")
p18 <- plot(myVario[[18]], myList[[18]], main = "Dec 10th\n Home hours")

varplot <- grid.arrange(p01, p02, p03, p04, p05, p06, p07, p08, p09, p10, 
                        p11, p12, p13, p14, p15, p16, p17, p18, 
                        ncol = 6)

ggsave("semvario.png",varplot, width = 12, height = 7)#, scale = 1.5)


### Data Frame
seoul_grid <- data.frame(expand.grid(X = seq(min(no2.winter$X), max(no2.winter$X), length=200),
                                     Y = seq(min(no2.winter$Y), max(no2.winter$Y), length=200)))
coordinates(seoul_grid) <- ~X+Y
proj4string(seoul_grid) <- CRS("+init=epsg:5181")

#https://gis.stackexchange.com/questions/157279/saving-results-in-automap-r-package-for-time-series-data


##############
#--Kriging--##
##############
pred.model <- seoul_grid@coords
var.model <- seoul_grid@coords

for(i in 1:18) {
  kriging_new <- krige(top.dec@data[,i]~ X + Y,
                       top.dec, 
                       seoul_grid,
                       nmin = 15,
                       nmax = 30,
                       model = myList[[i]])
  kriging_new$var_model <- data.frame(kriging_new$var1.var)
  var.model <- cbind(var.model, kriging_new$var_model)
  xyz <- as.data.frame(kriging_new$var1.pred)
  colnames(xyz) <- colnames(top.dec@data)[i]
  pred.model <- cbind(pred.model, xyz)
} 

##-- Add ColNames
colnames(pred.model) <- c("X", "Y", "dec02d", "dec02n","dec03d", "dec03n", "dec04d", "dec04n", "dec05d", "dec05n", "dec06d", "dec06n", "dec07d", "dec07n", "dec08d", "dec08n", "dec09d", "dec09n", "dec10d", "dec10n")

colnames(var.model) <- c("X", "Y", "dec02d", "dec02n","dec03d", "dec03n", "dec04d", "dec04n", "dec05d", "dec05n", "dec06d", "dec06n", "dec07d", "dec07n", "dec08d", "dec08n", "dec09d", "dec09n", "dec10d", "dec10n")

##-- Find Mean and variance
stat <- pred.model %>% dplyr::select(-c(X,Y)) %>% 
  gather(factor_key = T) %>% 
  group_by(key) %>% summarise(mean= round(mean(value),1), median = round(median(value),1), 
                              sd= round(sd(value),1), max = max(value),min = min(value)) %>% rename(Hour = key)

statvar <- var.model %>% dplyr::select(-c(X,Y)) %>% 
  gather(factor_key = T) %>% 
  group_by(key) %>% summarise(mean= round(mean(value),1), median = round(median(value),1), 
                              sd= round(sd(value),1), max = max(value),min = min(value)) %>% rename(Hour = key)


########
# RMSE #
########
pred <- pred.model
r.pred <- rasterFromXYZ(pred)
crs(r.pred) <- CRS('+init=epsg:5181')

obs <- as.data.frame(top.dec) %>% dplyr::select(X,Y,everything())
pred.df <- data.frame(X = obs$X, Y = obs$Y)
RMSE <- data.frame(X = obs$X, Y = obs$Y)

# for loop
for(i in 1:18){
  pred.df[,i+2] <- extract(r.pred[[i]], top.dec)
  RMSE[,i+2] <- (pred.df[,i+2] - obs[,i+2])^2
  
}

colnames(RMSE) <- colnames(obs)
stat$rmse <- RMSE %>% dplyr::select(-c(1:2)) %>% as.matrix() %>% colSums() 
stat$rmse <- sqrt(stat$rmse / 60) %>% round(digits = 3)

Visualisation

#############
#- Plotting-#
#############

ras.krige.df <- pred.model %>% 
  reshape2::melt(id = c("X", "Y"), variable.name = "Hour", value.name = "NO2") 

ras.krige.df %>% 
  ggplot() +
  geom_tile(aes(x = X, y = Y, fill = NO2)) +
  scale_fill_distiller(palette = "Spectral", na.value = NA, limits = c(0,125), breaks = c(0,25,50,75,100,125)) +
  geom_contour(aes(x = X, y = Y, z = NO2),bins = 20, colour = "grey40", alpha = 0.7) +
  geom_path(data = seoul, aes(x = long, y = lat), color = 'black', size = 1) +
  geom_text(data = stat, aes(x = 187000,  y = 434000, label = paste0("mean = " , mean)), size = 3) + 
  geom_text(data = stat, aes(x = 184000,  y = 430500, label = paste0("sd = " , sd)), size = 3) + 
  geom_text(data = stat, aes(x = 188900,  y = 469000, label = paste0("RMSE=" , rmse)), size = 3) + 
  facet_wrap(~ Hour, ncol = 8) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        strip.text.x = element_text(size = 20),
        legend.title=element_text(size=15), 
        legend.text=element_text(size=15)                                  
  ) -> kriged # 1200 x 550 

# Export PNG
png("kriged_pred.png", width=1200, height=550, res=100)
kriged
dev.off()

ras.var.df <- var.model %>% 
  reshape2::melt(id = c("X", "Y"), variable.name = "Hour", value.name = "NO2") 

ras.var.df %>% 
  ggplot() +
  geom_tile(aes(x = X, y = Y, fill = NO2)) +
  scale_fill_distiller(palette = "BrBG", na.value = NA) + #, limits = c(0,12), breaks = c(0,3,6,9,12)) +
  geom_contour(aes(x = X, y = Y, z = NO2),bins = 20, colour = "grey40", alpha = 0.7) +
  geom_path(data = seoul, aes(x = long, y = lat), color = 'black', size = 1) +
  geom_text(data = statvar, aes(x = 187000,  y = 434000, label = paste0("mean = " , mean)), size = 3) + 
  geom_text(data = statvar, aes(x = 184000,  y = 430500, label = paste0("sd = " , sd)), size = 3) + 
  facet_wrap(~ Hour, ncol = 8) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        strip.text.x = element_text(size = 20),
        legend.title=element_text(size=15), 
        legend.text=element_text(size=15)                                  
  ) -> kriged_stv 

# Export PNG
png("kriged_var.png", width=1200, height=550, res=100)
kriged_stv
dev.off()


######################
#--Cross Validation--#
######################
CV <- list()
CV.RMSE <- vector()
CV.R2 <- vector()

for(k in 1:20){
  CV[[length(CV)+1]] <- krige.cv(get(colnames(top.jan@data[k]))~ 1, 
                                 no2.winter, 
                                 myList[[k]], # Semi-Variogram
                                 nmax = 30,
                                 nfold=10)
  
  RMSE <- sqrt(mean((CV[[k]]$observed - CV[[k]]$residual)^2)) #RMSE, Ideally small
  CV.RMSE <- append(CV.RMSE,RMSE)
  R2 <- (cor(CV[[k]]$observed, CV[[k]]$residual))^2
  CV.R2 <- append(CV.R2,R2)
}

CV.RMSE
CV.R2

library(gridExtra)
CV1 <- bubble(CV[[1]], "residual", main = "10-fold CV residuals\nFeb11 Day 2014")
CV2 <- bubble(CV[[2]], "residual", main = "10-fold CV residuals\nFeb11 Night 2014")
CV3 <- bubble(CV[[3]], "residual", main = "10-fold CV residuals\nFeb12 Day 2014")
CV4 <- bubble(CV[[4]], "residual", main = "10-fold CV residuals\nFeb12 Night 2014")

grid.arrange(CV1, CV2, CV3, CV4)


############################
# Convert to Raster Bricks #
############################
krige <- rasterFromXYZ(pred.model, 
                       crs="+proj=tmerc +lat_0=38 +lon_0=127 +k=1 +x_0=200000 +y_0=500000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs",
                       digits=5)

ras.road <- raster("../seoul/road_10km_re.tif")  # Import raster
res.mgcv <- resample(krige, ras.road, method = "bilinear") # resample 
res.mgcv <- merge(ras.road, res.mgcv) # merge

# assign road
road_01 = road_02 = road_03 = road_04 = road_05 = 
road_06 = road_07 = road_08 = road_09 = road_10 =
road_11 = road_12 = road_13 = road_14 = road_15 = 
road_16 = road_17 = road_18 = road_19 = road_20 = ras.road

# stack raster and remove individual raster files
road.stack <- stack(road_01, road_02, road_03, road_04, road_05, 
                    road_06, road_07, road_08, road_09, road_10,
                    road_11, road_12, road_13, road_14, road_15, 
                    road_16, road_17, road_18, road_19, road_20
)
rm(road_01, road_02, road_03, road_04, road_05, 
   road_06, road_07, road_08, road_09, road_10,
   road_11, road_12, road_13, road_14, road_15, 
   road_16, road_17, road_18, road_19, road_20
)

# add road ratio values to GAM raster
ratio.top.jan <- ratio.no2.win[1:20,]


for(i in 1:20){
  #road.stack[[i]] <- road.stack[[i]] * ratio.no2.sum$ratio[i]
  values(road.stack)[values(road.stack[[i]]) == 1] <- ratio.top.jan$Back.Road.Ratio[i]
  values(road.stack)[values(road.stack[[i]]) == 2] <- ratio.top.jan$Back.High.Ratio[i]
}

# add no2 and road values
r.poll.rd <- overlay(res.mgcv, road.stack, fun = function(x,y){ifelse(y != 0, x*y, x)})
names(r.poll.rd) <- c("jan01d", "jan01n", "jan02d", "jan02n","jan03d", "jan03n", "jan04d", "jan04n", "jan05d", "jan05n", "jan06d", "jan06n", "jan07d", "jan07n", "jan08d", "jan08n", "jan09d", "jan09n", "jan10d", "jan10n")

#####################
#ras.mgcv.df <- as.data.frame(r.poll.rd, xy = TRUE) # easy way
# however, since we resampled and changed our data
# with different resolution imamges and extent, the easier way doesn't work
ras <- xyFromCell(r.poll.rd, 1:ncell(r.poll.rd))
krige.df <- as.data.frame(r.poll.rd) 

##-- Find Mean and variance

ras.krige.stat <- data.frame(ras, krige.df)

stat1 <- ras.krige.stat %>% dplyr::select(-c(x,y)) %>% 
  gather(factor_key = T) %>% 
  group_by(key) %>% summarise(mean= round(mean(value),1), sd= round(sd(value),1), max = max(value),min = min(value)) %>% 
  rename(Hour = key)

#####
ras.krige.df <- data.frame(ras, krige.df) %>% 
  reshape2::melt(id = c("x", "y"), variable.name = "Hour", value.name = "NO2") 

ras.krige.df %>% 
  ggplot() +
  geom_tile(aes(x = x, y = y, fill = NO2)) +
  scale_fill_distiller(palette = "Spectral", na.value = NA, limits = c(0,125), breaks = c(0,25,50,75,100,125)) +
  geom_text(data = stat1, aes(x = 187000,  y = 434000, label = paste0("mean = " , mean)), size = 3) + 
  geom_text(data = stat1, aes(x = 184000,  y = 430500, label = paste0("sd = " , sd)), size = 3) + 
  geom_path(data = seoul, aes(x = long, y = lat), color = 'black', size = 1) +
  facet_wrap(~ Hour, ncol = 8) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        strip.text.x = element_text(size = 20),
        legend.title=element_text(size=15), 
        legend.text=element_text(size=15)                                  
  ) -> final 

# Export PNG
png("kriged_final.png", width=1200, height=550, res=100)
final
dev.off()

Validation

##############
#--Extract--##
##############
# Attributes of Monitoring stations
back_df <- stations_df %>% 
  filter(Name %in% c('Jongno-gu', 'Seongbuk-gu', 'Gwanak-gu', 'Gangnam-gu', 'Daeya', 'Onam')) %>% 
  dplyr::select(-c(Long, Lat, Province, City, `F/R`, Road_Dist, DEM))
road_df <- stations %>% filter(Name %in% c('Hangang-daero', 'Yeongdeungpo', 'Gangbyeonbuk-ro')) 

# Spatial Points
back <- top.jan[c(2,8,17,18,42,48),] #'Jongno-gu', 'Seongbuk-gu', 'Gwanak-gu', 'Gangnam-gu', 'Daeya', 'Onam'
colnames(back@data) <- c("jan01d", "jan01n", "jan02d", "jan02n","jan03d", "jan03n", "jan04d", "jan04n", "jan05d", "jan05n", "jan06d", "jan06n", "jan07d", "jan07n", "jan08d", "jan08n", "jan09d", "jan09n", "jan10d", "jan10n")

# Silim Coordinates: 195793.5 442361.5
# Gwanakgu Office location: 193837.2 442737.9

# Final
back_pred <- back_df %>% cbind(extract(r.poll.rd, back))
back_obs <- back_df %>% cbind(back@data)


# Plot
plot_back_pred <- reshape2::melt(back_pred, 
                                 id = c("X","Y","Station","Name"), 
                                 variable.name = "Type", 
                                 value.name = "Value") %>% 
                  mutate(Group = "pred")

plot_back_obs <- reshape2::melt(back_obs, 
                                id = c("X","Y","Station","Name"), 
                                variable.name = "Type", 
                                value.name = "Value")%>% 
                  mutate(Group = "obs")
plot_back_fin <- rbind(plot_back_pred, plot_back_obs)



plot_back_fin %>% 
  ggplot(aes(x = Type, y = Value, group = Group)) +
  geom_line(aes(linetype = Group), size = 1, col = "grey") +
  geom_point(aes(colour = Name), size =2) +
  ylab("Concentration(ppb)") +
  facet_wrap(~ Name) +
  theme_bw() +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(size = 12, angle = 90),
        axis.text.y=element_text(size = 12),
        strip.text.x = element_text(size = 20),
        legend.title=element_text(size=15), 
        legend.text=element_text(size=15)                                  
  ) -> graphs
  
# Export Graph
png("Pred_Obs.png", width=1200, height=550, res=100)
graphs
dev.off()