-
Notifications
You must be signed in to change notification settings - Fork 6
/
process_meteo_drivers.py
329 lines (292 loc) · 8.89 KB
/
process_meteo_drivers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
#!/usr/bin/env python
"""Some convenience files to grab meteorological data and convert
to CABO format to use within WOFOST. So far, using ERA5
"""
import json
import struct
import logging
import datetime as dt
from textwrap import dedent
from collections import namedtuple
from pathlib import Path
import tqdm
import numpy as np
from osgeo import gdal
from netCDF4 import Dataset, date2index
from concurrent.futures import ThreadPoolExecutor
from functools import partial
LOG = logging.getLogger(__name__)
ERAPARAMS = namedtuple(
"ERAPARAMS", ["ssrd", "mx2t", "mn2t", "tp", "u10", "v10", "d2m"]
)
def humidity_from_dewpoint(tdew):
"""Calculates humidity from dewpoint temperature
Parameters
----------
tdew : float
Dewpoint temperature in degrees Kelvin
Returns
-------
Relative humidity
"""
tdew = tdew - 273.15
tmp = (17.27 * tdew) / (tdew + 237.3)
ea = 0.6108 * np.exp(tmp)
return ea
def retrieve_pixel_value(lon, lat, data_source):
"""Retrieve pixel value from a GDAL-friendly dataset.
We assume the data type of the raster here!!!!
Parameters
----------
lon : float
Longitude in decimal degrees
lat : float
Latitude in decimal degrees
data_source : str
An existing GDAL-readable dataset. Can be remote.
Returns
-------
int
The value of the pixel.
"""
dataset = gdal.Open(data_source)
gt = dataset.GetGeoTransform()
the_band = dataset.GetRasterBand(1)
px = int((lon - gt[0]) / gt[1]) # x pixel
py = int((lat - gt[3]) / gt[5]) # y pixel
buf = the_band.ReadRaster(px, py, 1, 1, buf_type=gdal.GDT_Int16)
elev = struct.unpack("h", buf)
return elev[0]
def era_to_cabo(
site_name,
year,
lon,
lat,
elev,
cabo_file,
nc_file,
parnames,
size=0.25,
c1=-0.18,
c2=-0.55,
):
"""Convert ERA5 dataset to CABO format
Parameters
----------
site_name : str
A site name descriptor. Can be anything, I think
year : int
The year
lon : float
The longitude in decimal degrees
lat : float
The latitude in decimal degrees
cabo_file : Pathlib object
Path where the CABO file will be written
nc_file : Pathlib object
Path with the netcdf file
parnames : iter
A list of parameters of interest
size : float
ERA grid size, by default 0.25
"""
# Open netCDF file, and stuff parameters into useful
# data structure
ds = Dataset(str(nc_file))
variables = (ds.variables[var][:] for var in parnames)
pars = ERAPARAMS(*variables)
# Check corners
uplat = ds.variables["latitude"][:].max()
dnlat = ds.variables["latitude"][:].min()
uplon = ds.variables["longitude"][:].max()
dnlon = ds.variables["longitude"][:].min()
x = int((lon - dnlon + size / 2) / size)
y = int((lat - uplat - size / 2) / -size)
times = ds.variables["time"]
# Preprocess data: calculate daily means/aggregates
# Get the right units.
rad = (
np.sum(
pars.ssrd.reshape(-1, 24, pars.ssrd.shape[1], pars.ssrd.shape[2]),
axis=1,
)
/ 1000.0
)
tmax = (
np.max(
pars.mx2t.reshape(-1, 24, pars.mx2t.shape[1], pars.mx2t.shape[2]),
axis=1,
)
- 273.15
)
tmin = (
np.min(
pars.mn2t.reshape(-1, 24, pars.mn2t.shape[1], pars.mn2t.shape[2]),
axis=1,
)
- 273.15
)
prec = (
np.sum(
pars.tp.reshape(-1, 24, pars.tp.shape[1], pars.tp.shape[2]), axis=1
)
* 1000.0
)
prec[prec < 0.01] = 0
wind_u = np.mean(
pars.u10.reshape(-1, 24, pars.u10.shape[1], pars.u10.shape[2]), axis=1
)
wind_v = np.mean(
pars.v10.reshape(-1, 24, pars.v10.shape[1], pars.v10.shape[2]), axis=1
)
wind = np.sqrt(np.square(wind_u) + np.square(wind_v))
hum = humidity_from_dewpoint(
np.mean(
pars.d2m.reshape(-1, 24, pars.d2m.shape[1], pars.d2m.shape[2]),
axis=1,
)
)
hdr_chunk = f"""\
*---------------------------------------------------
* Station: {site_name:s}
* Year: {year:d}
* Origin: ERA5-Reanalysis
* Columns & units
* ===================
* 1. station number
* 2. year
* 3. Day of Year
* 4. Irradiance (kJ·m-2·d-1)
* 5. Daily minimum temperature (degC)
* 6. Daily maximum temperature (degC)
* 7. Vapour pressure (kPa)
* 8. Mean wind speed (m·s-1)
* 9. Precipitation (mm·d-1)
** WCCDESCRIPTION={site_name:s}
** WCCFORMAT=2
** WCCYEARNR={year:d}
*------------------------------------------------------------*
{lon:.2f} {lat:.2f} {elev:.2f} {c1:.2f} {c2:.2f}
"""
hdr_chunk = dedent(hdr_chunk)
# Dump data file...
station_number = 1
with cabo_file.open("w") as fp:
fp.write(hdr_chunk)
for d in range(rad.shape[0]):
fp.write(
f"{station_number:d}\t{year:d}\t{d+1:d}\t"
+ f"{round(rad[d,y,x]):5.1f}\t"
+ f"{round(tmin[d,y,x]*10/10):5.1f}\t"
+ f"{round(tmax[d,y,x]*10/10):5.1f}\t"
+ f"{round(hum[d,y,x]*1000/1000):5.3f}\t"
+ f"{round(wind[d,y,x]*10/10):4.1f}\t"
+ f"{round(prec[d,y,x]*10/10):4.1f}\n"
)
LOG.info(f"Saved CABO file {str(cabo_file):s}.")
def grab_meteo_data(
lat,
lon,
year,
era_fname,
nc_dir,
site_name="default_site",
data_dir="./",
size=0.25,
c1=-0.18,
c2=-0.55,
station_number=1,
dem_file="/vsicurl/http://www2.geog.ucl.ac.uk/"
+ "~ucfafyi/eles/global_dem.vrt",
era_lat_chunk=1.0,
era_lon_chunk=1.0,
):
"""Grab meteorological data and set it up to use with WOFOST.
At present, we download the data from ERA5, but other sources may
be considered.
Parameters
----------
lat : float
Latitude in decimal degrees.
lon : float
Longitude in decimal degrees.
year: int
Year of interest
region: str
Region name
data_dir : str
The location where the ERA5 file will be downloaded to, and
also where the CABO file will be written to
size : float
The grid size in degrees
c1 : float
I think this is a parameter related to the Armstrong exponent
c2 : float
I think this is a parameter related to the Armstrong exponent
station_number : int
A random number needed for CABO file
dem_file : str
A GDAL-readable file with a DEM to fish out the elevation of the site.
Returns
-------
dict
A dictionary with the relevant CABO files indexed by year.
"""
# This is the site name. Use the longitude/latitude to make it unique
# Grab the elevation
elevation = retrieve_pixel_value(lon, lat, dem_file)
# These are the parameters
parnames = ["ssrd", "mx2t", "mn2t", "tp", "u10", "v10", "d2m"]
return_files = {}
cabo_file = Path(data_dir) / f"{site_name:s}.{year:d}"
if not cabo_file.exists():
LOG.info(f"No CABO file for {year:d}...")
nc_file = Path(nc_dir) / (era_fname)
if not nc_file.exists():
LOG.info(f"No netCDF file for {year:d}...")
raise ValueError("No NETCDF file!")
LOG.info(f"Converting {str(nc_file):s} to CABO")
LOG.info("Converting units to daily etc.")
era_to_cabo(
site_name,
year,
lon,
lat,
elevation,
cabo_file,
nc_file,
parnames,
size=size,
)
return cabo_file
if __name__ == "__main__":
# Extract meteo inputs for all districts using centroid coordinates!
districts = json.load(
open("carto/Districts/centroid_regions.geojson", "r")
)
coords = [
feat["geometry"]["coordinates"] for feat in districts["features"]
]
names = [
feat["properties"]["RGN_NM2012"] for feat in districts["features"]
]
locations = dict(zip(names, coords))
years = np.arange(2010, 2019).astype(np.int)
for location, (lon, lat) in tqdm.tqdm(locations.items()):
print(location, lon, lat)
loc_name = location.replace(" ", "_")
meteo_folder = Path(f"./data/meteo/{loc_name}")
meteo_folder.mkdir(parents=True, exist_ok=True)
wrapper = lambda year: grab_meteo_data(
lat,
lon,
year,
f"era5_africa_{year:d}.nc",
"/data/geospatial_08/ucfajlg/ERA5_meteo",
data_dir=meteo_folder.as_posix(),
site_name=loc_name,
)
# create a thread pool of 10 threads
with ThreadPoolExecutor(max_workers=2) as executor:
for _ in executor.map(wrapper, years):
pass