forked from david-edu-morales/TAAP_py
-
Notifications
You must be signed in to change notification settings - Fork 0
/
taap_plot_monthlyAvg.py
140 lines (108 loc) · 6.33 KB
/
taap_plot_monthlyAvg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# %%
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import pandas as pd
from sklearn import linear_model
import csv
import seaborn as sns
sns.set(rc={'figure.figsize':(11, 4)})
# %%
# Set up variables
keylist_mx = [26013, 26057, 26164] # create list of climate station keys
varsAvg_mx = ['tmax', 'tmin','evap'] # specifiy variables to be resampled
csvFile = 'data/historicalTrends_monthlyAvg.csv' # csv filename to collect linRegCoefs
headerList = ['key', 'variable', 'month', 'coef'] # header names for csv of linRegCoefs
month_str = ['Jan', 'Feb', 'Mar', 'Apr', 'May','Jun',\
'Jul','Aug','Sep','Oct','Nov','Dec'] # setup month names for graph
degree_sign = u'\N{DEGREE SIGN}' # degree sign code
# %%
# *** MEXICAN CLIMATE STATIONS ***
# Read the files into a df
# Create a dictionary of keys and filenames to call dataframes into another dictionary
filenameDict = {keylist_mx[key]: 'data/'+str(keylist_mx[key])+'_clean-data.csv' for key in range(len(keylist_mx))}
# Create a dictionary of keys and corresponding dataframes
dictCleanData = {key: pd.read_csv(filename,
index_col = 'date',
parse_dates=True)
for (key, filename) in filenameDict.items()}
# %%
# Determine the monthly averages (Tn, Tx, and ET) from the climatological data
dictMonthlyAvg = {} # create dictionary to receive for loop outputs
for key in keylist_mx:
grouped = dictCleanData[key].groupby('variable') # group data by variable
# First, calculate monthly averages
monthlyAvg = grouped.resample('M')[['measurement']].mean() # calc monthly mean of all variables
monthlyAvg = monthlyAvg.loc[['evap', 'tmax', 'tmin']] # drop the resampled precip data
monthlyAvg = monthlyAvg.reset_index(level='variable') # return multiindex to 'variable' column
# Second, reset 'month' and 'year' columns
monthlyAvg['month'] = monthlyAvg.index.month
monthlyAvg['year'] = monthlyAvg.index.year
# Third, create element to append to dictionary
data = {key: monthlyAvg} # temporary element for update
dictMonthlyAvg.update(data) # append element to dictionary
# %%
# Create monthlyAvg csv files for MCA
for key in keylist_mx:
dictMonthlyAvg[key].to_csv('data/'+str(key)+'_monthlyAvg.csv')
# %%
# Re-create the 12-month plots for each station/variable using the quality-controlled data
with open(csvFile, 'w') as file: # set mode to write w/ truncation
dw = csv.DictWriter(file, delimiter=',',
fieldnames=headerList)
dw.writeheader()
# Set up data & variables
start, end = 1976, 2016 # set time frame to last forty years
for key in keylist_mx:
dfKey = dictMonthlyAvg[key] # rename working database for ease of reading
for var in varsAvg_mx:
fig = plt.figure(figsize=(24,16))
fig.subplots_adjust(hspace=0.2, wspace=0.2)
# Set figure title
fig.suptitle("Monthly Mean for "+var+"\nClimate Station "+str(key), fontsize=30)
for month in range(1,13):
ax = fig.add_subplot(3,4,month) # creates a 12-plot fig (3r x 4c)
# select data to plot
df = dfKey[(dfKey.index.month == month) & (dfKey.variable == var)]
end = df.index.year[-1]
start = end - 39
x = df.loc[str(start):str(end)].index.year
y = df.loc[str(start):str(end)].measurement
ax.plot(x,y) # this plots the col values
# Var-alike subplot formatting
ax.set_title(month_str[month-1], fontsize=20, fontweight='bold')
# Make the linear regression
database = df.loc[str(start):str(end)][['measurement','year']]
database = database.dropna()
# Reshape data for use in LinReg builder
x_data = database['year'].values.reshape(database.shape[0],1)
y_data = database['measurement'].values.reshape(database.shape[0],1)
timespan = x_data[-1,0] - x_data[0,0] + 1
reg = linear_model.LinearRegression().fit(x_data, y_data)
coef = reg.coef_
inter= reg.intercept_
y_estimate = coef*x_data+inter # y=mx+b, possible option to upgrade
ax.plot(x_data,y_estimate) # this plots the linear regression
# Save the observed trends to a csv to be plotted on monte carlo distribution
saveLine = '\n'+str(key)+','+str(var)+','+str(month)+','+str(timespan*coef[0,0])
saveFile = open(csvFile, 'a') # reopen csv file
saveFile.write(saveLine) # append the saved row
saveFile.close()
# Set number of x-axis tick marks to max (5) and only as integers
ax.xaxis.set_major_locator(MaxNLocator(5, integer=True))
# Var-dependent subplot formatting
if var == varsAvg_mx[-1]:
ax.set_ylabel('mm')
ax.text(.1, .8,
str(round(timespan*coef[0,0],2))+'mm/'+str(timespan)+'yr',
transform=ax.transAxes,
fontsize=24,
color='red')
else:
ax.set_ylabel(degree_sign+'C')
ax.text(.1, .8,
str(round(timespan*coef[0,0],2))+degree_sign+'C/'+str(timespan)+'yr',
transform=ax.transAxes,
fontsize=24,
color='red')
plt.savefig('graphs/avgPlots/'+str(key)+'_'+var+'-avg')
# %%