-
Notifications
You must be signed in to change notification settings - Fork 4
/
R18_Spatial_lag_model.R
151 lines (142 loc) · 4.25 KB
/
R18_Spatial_lag_model.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#### Spatial lag model
#
#
#
# Example of a spatial lag model for NUTS2 regions of
# Austria, Czech Republic, Germany, Hungary, Poland, Slovakia
#
#
#
# Data, data description & plots
library(ggplot2) # install.packages("ggplot2")
library(spdep) # install.packages("spdep")
library(spatialreg)
rm(list=ls())
CE_data <- read.csv("datasets/NUTS2_data.csv")
head(CE_data, 15)
tail(CE_data, 10)
plot(CE_data[ , c(4:7)])
#
#
# Data description:
#
# U_pc_2012 Dependent variable, the general rate of unemployment
# for a NUTS2 region i at time t (2012)
# EUR_HAB_EU_2011 region’s GDP per capita (current EUR prices of 2011)
# expressed as percentage of EU average
# EUR_HAB_EU_2010
# TechEmp_2012 percentage of employees working in the “high-tech industry”
# (NACE r.2 code HTC) in a given region and t = 2012
# NUTS_ID NUTS2 region-identifier (NUTS.2010)
# long, lat coordinates of regions' centroids
#
#
# Unemployment model to be estimated:
#
# U_pc_2012 <- I(EUR_HAB_EU_2011-EUR_HAB_EU_2010) + TechEmp_2012
#
#
# Distance based neighbors - maximum neighbor distance threshold: 250 km
#
#
#
# Step 1 Prepare spatial objects for subsequent analysis:
#
# (a) coordinates and IDs
coords <- CE_data[,c("long", "lat")]
coords <- coordinates(coords)
IDs <- CE_data$NUTS_ID
# (b) identify neighbors given tau distance threshold
nb250km <- dnearneigh(coords, d1=0, d2=250, longlat=T, row.names = IDs)
summary(nb250km)
# (c) calculate the spatial weights matrix
W.matrix <- nb2listw(nb250km)
summary(W.matrix)
#
#
#
#
#
## Quick exercise 1
## Test the dependent variable of our model "U_pc_2012"
## for spatial dependency and show "Moran plot" for this variable
?moran.test
?moran.plot
##
##
#
#
# Step 2 Basic OLS model + model selection tests
#
OLS.1 <- lm(U_pc_2012 ~ I(EUR_HAB_EU_2011-EUR_HAB_EU_2010) + TechEmp_2012, data=CE_data)
summary(OLS.1)
#
?lm.LMtests
lm.LMtests(OLS.1, W.matrix, test=c("LMlag", "LMerr", "RLMlag", "RLMerr"))
# Both LMlag and RLMlag reject H0 of no spatial dependence
# LMerr test rejects H0, but its robust version RLMerr does not.
# Hence, we prefer & use the spatial lag model specificaton.
#
#
#
#
#
# Step 3 Spatial lag model estimation, tests & plots
#
?lagsarlm # note e.g. the "Dubin" argument
spatial.lag <- lagsarlm(U_pc_2012 ~ I(EUR_HAB_EU_2011-EUR_HAB_EU_2010) + TechEmp_2012,
data=CE_data, W.matrix)
summary(spatial.lag)
?LR.sarlm # Test the spatial lag specification against OLS model
LR1.sarlm(spatial.lag)
LR.sarlm(spatial.lag, OLS.1)
#
# Test for spatial randomness in residuals from the spatial lag model
moran.test(spatial.lag$residuals, W.matrix,
randomisation=FALSE, alternative="two.sided")
#
# Breusch-Pagan test for heteroskedasticity, generalized for spatial models,
# .. taking rho coefficient into account
bptest.sarlm(spatial.lag)
#
# Basic fitted vs. actual plot:
# plot(CE_data$U_pc_2012, spatial.lag$fitted.values, pch=16)
#
# Enhanced ggplot2 figure:
Fitted_Lag_250km <- spatial.lag$fitted.values
plot.df <- as.data.frame(Fitted_Lag_250km)
plot.df$Actual <- CE_data$U_pc_2012
plot.df$CountryCode <- substring(CE_data$NUTS_ID, 1,2)
#
ggplot(plot.df, aes(Actual, Fitted_Lag_250km)) +
scale_x_continuous(limits = c(0,20), expand = c(0,0)) +
scale_y_continuous(limits = c(0,20), expand = c(0,0)) +
geom_segment(aes(x=0, xend=20, y=0, yend=20), linetype=2) +
geom_point(aes(color = CountryCode), size = 3) +
ggtitle("General unemployment rate in %") +
theme_bw()
#
#
#
#
## Quick exercise 2
##
## 1) Enhance the SLM model by allowing for spatial lag in regressors
## .. ie use SDM specicifaction
# spatial.Durbin <-
## 2) Use LR test to evaluate
## - spatial.Durbin against OLS-based model
## - spatial.Durbin against spatial.lag
## 3) Evaluate spatial dependency and heteroskedasticity in residuals
## 4) Plot Actual vs Fitted values
## .. review the code starting on line 106 to include output
## .. from the spatial.Durbin (not spatial.lag) model
## 5) Repeat the above steps while setting tau to 170 km.
##
##
#
#
#
#
#
#