-
Notifications
You must be signed in to change notification settings - Fork 2
/
SEM_IN_PYTHON.PY
147 lines (134 loc) · 9.03 KB
/
SEM_IN_PYTHON.PY
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import pandas as pd
import os
import datetime
import matplotlib.pyplot as plt
import numpy as np
from semopy import Model
from semopy import Optimizer
from semopy.inspector import inspect
import matplotlib.dates as mdates
os.chdir(r'D:\COVID-19\PNAS_SECOND')
myFmt = mdates.DateFormatter('%b-%d')
def sem_function(mod, Agg_Trips_tem, Start_date):
model = Model(mod)
model.load_dataset(Agg_Trips_tem)
opt = Optimizer(model)
objective_function_value = opt.optimize()
coeff_tem = inspect(opt)
coeff_tem['Date'] = Start_date
return coeff_tem
def sem_all_state(Agg_Trips_1, mod, xvar, xvar_laglog):
Agg_Trips_1['Is_ReopenState'] = True # If all states
# Moving-SEM
Agg_Trips_1 = Agg_Trips_1.sort_values(by=['CTFIPS', 'Date']).reset_index(drop=True)
Start_date = datetime.datetime(2020, 3, 10)
All_corr_Reopen = pd.DataFrame()
All_corr_Close = pd.DataFrame()
for jj in range(0, (max(Agg_Trips_1['Date']) - Start_date).days - 7):
print(jj)
Agg_Trips_tem = \
Agg_Trips_1[(Agg_Trips_1['Date'] <= Start_date + datetime.timedelta(time_window)) &
(Agg_Trips_1['Date'] > Start_date) & (Agg_Trips_1['New_cases'] > 1 / 1e3)
& (Agg_Trips_1[xvar] > 0.01)]
Agg_Trips_tem = Agg_Trips_tem.dropna().reset_index(drop=True)
Reopen_tem = Agg_Trips_tem[Agg_Trips_tem['Is_ReopenState']]
try:
coeff_tem0 = sem_function(mod, Reopen_tem, Start_date)
coeff_tem0['Date'] = Start_date
All_corr_Reopen = All_corr_Reopen.append(coeff_tem0)
except:
pass
Start_date = Start_date + datetime.timedelta(1)
All_corr_1_Reopen = All_corr_Reopen[
(All_corr_Reopen['lval'] == 'Log_New_cases') & (All_corr_Reopen['rval'] == xvar_laglog) & (
All_corr_Reopen['Date'] > datetime.datetime(2020, 3, 10)) & (All_corr_Reopen['P-value'] < 0.1)]
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(8, 5))
ax.errorbar(All_corr_1_Reopen['Date'], All_corr_1_Reopen['Value'], All_corr_1_Reopen['SE'], fmt='-',
color='#10375c', capsize=1, markersize=0, alpha=0.8)
plt.ylabel('Coefficient (moving average)')
plt.xlabel('Date')
plt.tight_layout()
return All_corr_Reopen
def sem_split_state(Agg_Trips_1, mod, xvar, Idea_Reopen_State, xvar_laglog, ntail=3000):
# PICK TOP CASES
MAX_CT = Agg_Trips_1[Agg_Trips_1['Date'] == max(Agg_Trips_1['Date'])][['CTFIPS', 'Agg_cases']]
MAX_CT = MAX_CT.sort_values(by='Agg_cases').reset_index(drop=True)
Agg_Trips_11 = Agg_Trips_1[Agg_Trips_1['CTFIPS'].isin(MAX_CT.tail(ntail)['CTFIPS'])]
# SPLIT THE STATE
Agg_Trips_11['Is_ReopenState'] = Agg_Trips_11['STFIPS'].isin(Idea_Reopen_State)
# Moving-SEM
Agg_Trips_11 = Agg_Trips_11.sort_values(by=['CTFIPS', 'Date']).reset_index(drop=True)
Start_date = datetime.datetime(2020, 3, 15)
All_corr_Reopen = pd.DataFrame()
All_corr_Close = pd.DataFrame()
for jj in range(0, (max(Agg_Trips_11['Date']) - Start_date).days - 7):
print(jj)
Agg_Trips_tem = \
Agg_Trips_11[(Agg_Trips_11['Date'] <= Start_date + datetime.timedelta(time_window)) &
(Agg_Trips_11['Date'] > Start_date) & (Agg_Trips_11['New_cases'] > 1 / 1e3)
& (Agg_Trips_11[xvar] > 0.01)]
Agg_Trips_tem = Agg_Trips_tem.dropna().reset_index(drop=True)
Reopen_tem = Agg_Trips_tem[Agg_Trips_tem['Is_ReopenState']]
Close_tem = Agg_Trips_tem[~Agg_Trips_tem['Is_ReopenState']]
try:
coeff_tem0 = sem_function(mod, Reopen_tem, Start_date)
coeff_tem1 = sem_function(mod, Close_tem, Start_date)
coeff_tem0['Date'] = Start_date
All_corr_Reopen = All_corr_Reopen.append(coeff_tem0)
coeff_tem1['Date'] = Start_date
All_corr_Close = All_corr_Close.append(coeff_tem1)
except:
pass
Start_date = Start_date + datetime.timedelta(1)
All_corr_1_Reopen = All_corr_Reopen[
(All_corr_Reopen['lval'] == 'Log_New_cases') & (All_corr_Reopen['rval'] == xvar_laglog) & (
All_corr_Reopen['Date'] > datetime.datetime(2020, 3, 10)) & (All_corr_Reopen['P-value'] < 0.1)]
All_corr_1_Close = All_corr_Close[
(All_corr_Close['lval'] == 'Log_New_cases') & (All_corr_Close['rval'] == xvar_laglog) & (
All_corr_Close['Date'] > datetime.datetime(2020, 3, 14)) & (All_corr_Close['P-value'] < 0.1)]
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(8, 5))
ax.errorbar(All_corr_1_Reopen['Date'], All_corr_1_Reopen['Value'], All_corr_1_Reopen['SE'], fmt='-',
color='#10375c', capsize=1, markersize=0, alpha=0.8)
ax.errorbar(All_corr_1_Close['Date'], All_corr_1_Close['Value'], All_corr_1_Close['SE'], fmt='-', color='#b83b5e',
capsize=1, markersize=0, alpha=0.8)
plt.legend(['"Reopened" counties', '"Locked-down" counties'], frameon=False)
plt.ylabel('Coefficient (moving average)')
plt.xlabel('Date')
plt.tight_layout()
return All_corr_Reopen, All_corr_Close
# Read input data
Agg_Trips_1 = pd.read_csv('All_XY_Features_To_R_County_Level_0731_toR.csv', index_col=0)
Agg_Trips_1['Date'] = pd.to_datetime(Agg_Trips_1['Date'])
Agg_Trips_1['CTFIPS'] = Agg_Trips_1['CTFIPS'].astype(str).apply(lambda x: x.zfill(5))
# ALL STATE, Log_InFlow_Weight
All_corr_Reopen = sem_all_state(Agg_Trips_1, """ Log_New_cases ~ Log_InFlow_Weight + Lag1_Log_New_cases + Is_Weekend + Population_density + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65
Log_InFlow_Weight ~ Log_National_Cases + Lag1_Log_InFlow_Weight + Is_Weekend + Population_density + Employment_density + PRCP + TMAX + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65 + Med_House_Income + Pct_Black + Pct_White
""", 'InFlow_Weight', 'Log_InFlow_Weight')
# ALL STATE, RISK_INFLOW
All_corr_Reopen = sem_all_state(Agg_Trips_1, """ Log_New_cases ~ Log_Risked_WInput + Lag1_Log_New_cases + Is_Weekend + Population_density + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65
Log_Risked_WInput ~ Log_National_Cases + Lag1_Log_Risked_WInput + Is_Weekend + Population_density + Employment_density + PRCP_NEW + TMAX + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65 + Med_House_Income + Pct_Black + Pct_White
""", 'Risked_WInput', 'Log_Risked_WInput')
# ALL STATE, LAG7_RISK_INFLOW
All_corr_Reopen = sem_all_state(Agg_Trips_1, """ Log_New_cases ~ Lag7_Log_Risked_WInput + Lag1_Log_New_cases + Is_Weekend + Population_density + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65
Log_Risked_WInput ~ Log_National_Cases + Lag1_Log_Risked_WInput + Is_Weekend + Population_density + Employment_density + PRCP_NEW + TMAX + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65 + Med_House_Income + Pct_Black + Pct_White
""", 'Risked_WInput', 'Lag7_Log_Risked_WInput')
# SPLIT STATE, Log_InFlow_Weight
# ['51', '37', '27', '49', '04', '48', '12', '28', '01', '06', '19']
# ['01', '04', '08', '13', '16', '17', '18', '19', '23', '27', '28', '35', '38', '40', '45', '46', '47', '48', '49']
# ['12', '06', '22', '13', '01', '17', '04', '47', '37', '45', '32', '51']
All_corr_Reopen, All_corr_Close = sem_split_state(Agg_Trips_1, """ Log_New_cases ~ Log_InFlow_Weight + Lag1_Log_New_cases + Is_Weekend + Population_density + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65
Log_InFlow_Weight ~ Log_National_Cases + Lag1_Log_InFlow_Weight + Is_Weekend + Population_density + Employment_density + PRCP + TMAX + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65 + Med_House_Income + Pct_Black + Pct_White
""", 'InFlow_Weight', ['12', '06', '22', '13', '01', '17', '04', '47', '37', '45', '32', '51'],
'Log_InFlow_Weight')
# SPLIT STATE, RISK_INFLOW
All_corr_Reopen, All_corr_Close = sem_split_state(Agg_Trips_1, """ Log_New_cases ~ Log_Risked_WInput + Lag1_Log_New_cases + Is_Weekend + Population_density + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65
Log_Risked_WInput ~ Log_National_Cases + Lag1_Log_Risked_WInput + Is_Weekend + Population_density + Employment_density + PRCP_NEW + TMAX + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65 + Med_House_Income + Pct_Black + Pct_White
""", 'Risked_WInput', ['51', '37', '27', '49', '04', '48', '12', '28', '01', '06', '19'], 'Log_Risked_WInput')
# SPLIT STATE, LAG7_RISK_INFLOW
All_corr_Reopen, All_corr_Close = sem_split_state(Agg_Trips_1, """ Log_New_cases ~ Lag7_Log_Risked_WInput + Lag1_Log_New_cases + Is_Weekend + Population_density + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65
Log_Risked_WInput ~ Log_National_Cases + Lag1_Log_Risked_WInput + Is_Weekend + Population_density + Employment_density + PRCP_NEW + TMAX + Pct_Age_0_24 + Pct_Age_25_40 + Pct_Age_40_65 + Med_House_Income + Pct_Black + Pct_White
""", 'Risked_WInput', ['51', '37', '27', '49', '04', '48', '12', '28', '01', '06', '19'],
'Lag7_Log_Risked_WInput')
# Other factors
All_corr_Reopen[All_corr_Reopen['P-value'] < 0.1].groupby(['lval', 'op', 'rval']).describe()['Value'][
['count', 'mean', '50%']]