diff --git a/.gitignore b/.gitignore index b241241..418c9ca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +validation/data __pycache__/ diff --git a/bav_lib.py b/bav_lib.py index e06dbe3..0b4c468 100644 --- a/bav_lib.py +++ b/bav_lib.py @@ -10,11 +10,55 @@ import numpy as np import plotly.graph_objects as go import matplotlib.pyplot as plt -from matplotlib import cm import matplotlib as mpl from mpl_toolkits.axes_grid1 import make_axes_locatable import pandas as pd +from scipy.stats import gaussian_kde +from sklearn import linear_model +from sklearn.metrics import mean_squared_error, r2_score +import datetime +import matplotlib.dates as mdates +years = mdates.YearLocator() # every year +months = mdates.MonthLocator() # every month +years_fmt = mdates.DateFormatter('%Y') +def multi_plot(data_out, + sites = ['KAN_M', 'KAN_U'],sp1 = 4, sp2 = 2, + title = '', OutputFolder='figures/', + filename_out = 'plot'): + + f1, ax = plt.subplots(sp1,sp2,figsize=(15, 15)) + f1.subplots_adjust(hspace=0.2, wspace=0.1, + left = 0.08 , right = 0.95 , + bottom = 0.2 , top = 0.9) + count = -1 + for site in sites: + print(site) + count = count+1 + i,j = np.unravel_index(count, ax.shape) + tmp = data_out[data_out.station==site] + + ax[i,j].plot(tmp.date,tmp.PROMICE_alb,label='PROMICE') + ax[i,j].plot(tmp.date,tmp.BBA_emp,label='Empirical') + ax[i,j].plot(tmp.date,tmp.albedo_bb_planar_sw,label='SICE.fv5.2/pySICEv1.4') + ax[i,j].grid(True) + ax[i,j].set_title(site) + ax[i,j].set_xlim([datetime.date(2017, 1, 1), datetime.date(2020, 1, 1)]) + ax[i,j].xaxis.set_major_locator(years) + ax[i,j].xaxis.set_major_formatter(years_fmt) + ax[i,j].xaxis.set_minor_locator(months) + ax[i,j].set_xlabel("") + + if count1.1)] + R = R[~(R>1.1)] + + input_name= Rad_in+str(count) + + density_scatter(R,alb,ax[i, j],ss) + + R=R.values + alb=alb.values + R=R.reshape(-1,1) + alb=alb.reshape(-1,1) + lr = linear_model.LinearRegression() + lr.fit(R, alb) + print(Rad_in+str(count)) + print('Coefficients: \n', lr.coef_) + + preds = lr.predict(R) + + tmp = np.array([np.min(R), np.max(R)]) + tmp = tmp.reshape(-1,1) + ax[i, j].plot(tmp, lr.predict(tmp)) +# x_ticks = np.linspace(0.25,1,4) +# ax[i, j].set_xticklabels(x_ticks,fontsize=20) +# ax[i, j].set_yticklabels(x_ticks,fontsize=20) + + ax[i, j].text(0.08, 0.93, + '%s' % input_name, + fontsize=18, + color='black', + fontweight='bold', + horizontalalignment='left', + verticalalignment='top', + bbox=dict(boxstyle="square", + ec='lightskyblue', + alpha=0.8)) + ax[i, j].text(0.7, 0.5, + 'R2=%.3f\nRMSE=%.2f\nN=%.0f' % (r2_score(alb,preds), mean_squared_error(alb,preds),len(R)), + fontsize=13, + color='white', + fontweight='bold', + horizontalalignment='left', + verticalalignment='top', + bbox=dict(boxstyle="square", + ec='lightskyblue', + alpha=0.8)) + ax[i, j].set_xlim([0.01, 1.05]) + ax[i, j].set_ylim([0.01, 1.05]) + ax[i, j].grid(True) + fig.savefig('./output/linear_'+Rad_in+'oa.png',bbox_inches='tight') + diff --git a/bav_lib.py.bak b/bav_lib.py.bak new file mode 100644 index 0000000..e0111a8 --- /dev/null +++ b/bav_lib.py.bak @@ -0,0 +1,227 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Oct 17 14:03:28 2019 + +@author: bav +""" + +#%matplotlib qt + +import numpy as np +import plotly.graph_objects as go +import matplotlib.pyplot as plt +from matplotlib import cm +import matplotlib as mpl +from mpl_toolkits.axes_grid1 import make_axes_locatable +import pandas as pd + +def OutlookRaster(var,title): + l,b,r,t = var.bounds + res = var.res + x = np.arange(l,r, res[0]) + y = np.arange(t,b, -res[0]) + z=var.read(1) + nan_col = ~np.all(np.isnan(z), axis=0) + z=z[:,nan_col] + x=x[nan_col] + + nan_row = ~np.all(np.isnan(z), axis=1) + z=z[nan_row,:] + y=y[nan_row] + + fig = go.Figure( + data=go.Heatmap(x=x, + y=y, + z=z, + type = 'heatmap', + colorscale='Jet', + colorbar=dict(title=title), + showscale=True)) + fig.update_layout( + autosize=False, + width=500, + height=500) + fig.show() +# fig.write_image(title+".jpeg") + return x,y,z + +#%% +def heatmap(var, title='', col_lim=(np.nan, np.nan) ,cmap_in='gnuplot'): + if np.isnan(col_lim[0]): + col_lim=(np.nanmin(var), np.nanmax(var)) + z=var + nan_col = ~np.all(np.isnan(z), axis=0) + z=z[:,nan_col] + + nan_row = ~np.all(np.isnan(z), axis=1) + z=z[nan_row,:] + + cmap = plt.get_cmap(cmap_in) + cmap.set_bad(color='gray') + +# fig,ax = plt.subplots() + im = plt.imshow(z, cmap=cmap, interpolation='nearest',vmin=col_lim[0], vmax=col_lim[1]) + cb= plt.colorbar(im) + cb.ax.set_ylabel(title) +# fig.show() +# fig.write_image(title+".jpeg") + return z +#%% +def heatmap_discrete(var, title='', col_lim=(np.nan, np.nan) ,cmap_in='tab20_r'): + if np.isnan(col_lim[0]): + col_lim=(np.nanmin(var), np.nanmax(var)) + z=var + nan_col = ~np.all(np.isnan(z), axis=0) + z=z[:,nan_col] + + nan_row = ~np.all(np.isnan(z), axis=1) + z=z[nan_row,:] + + cmap = plt.get_cmap(cmap_in) + cmap.set_bad(color='white') + + bounds = np.unique(var)[np.logical_not(np.isnan(np.unique(var)))] + bounds = np.append(bounds, bounds[len(bounds)-1]+1) + norm = mpl.colors.BoundaryNorm(bounds, cmap.N+1) + + fig,ax = plt.subplots(figsize=(10,15)) + im = ax.imshow(z+1e-6, cmap=cmap, norm = norm, interpolation='Nearest') + divider = make_axes_locatable(ax) + cax = divider.append_axes("right", size="5%" , pad= 0.1) + cb = mpl.colorbar.ColorbarBase(ax=cax, cmap=cmap, norm=norm) + + tic_locs =bounds[0:len(bounds)-1] - (bounds[0:len(bounds)-1]-bounds[1:len(bounds)])/2 + cb.set_ticks(tic_locs) + cb.ax.set_yticklabels(bounds[0:len(bounds)-1]) + cb.ax.set_ylabel(title) + + fig.show() +# fig.write_image(title+".jpeg") + return fig,ax + +#%% Plot OLCI bands +def plot_OLCI_bands(ax): +# pots olci bands in the background + + olci_bands = pd.read_excel (r'misc/olci_bands.xlsx',header=0,thousands=' ') + #Out[409]: Index(['Band', 'λ centre (nm)', 'Width (nm)', 'Function'], dtype='object') + + ax.set_xlabel('Wavelength (nm)') + #ax.autoscale(enable=True, tight=True) + ax.bar(olci_bands['λ centre (nm)'], ax.get_ylim()[1], olci_bands['Width (nm)'], alpha=0.5, edgecolor ='darkblue', label='OLCI bands') + + ax.set_xlim(350, 1050) + + height_text = np.array((2, 2.3, 2.3, 2.3, 2.3, 2.3, + 2.3, 2, 2.4 , + 1.7,2, 1.6, 2, 2.5, + 1.25, 1.7, 2, 1.7,2,2, 2))/3 *ax.get_ylim()[1] + for i in range(1,22): + ax.annotate(str(i), + (olci_bands['λ centre (nm)'][i-1], height_text[i-1]), + ha='center', + bbox=dict(boxstyle="circle", fc="w"), + fontsize =13) + return ax + +#%% Density scatter + +def density_scatter(x,y,ax,ss): + if isinstance(x, pd.coxe.series.Series)==False: + x = pd.DataFrame(x) + x = x[0] + if isinstance(y, pd.core.series.Series)==False: + y = pd.DataFrame(y) + y=y[y.columns[0]] + + y=y[~np.isnan(x)] + x=x[~np.isnan(x)] + x=x[~np.isnan(y)] + y=y[~np.isnan(y)] + + xy = np.vstack([x,y]) + z = gaussian_kde(xy)(xy) + # Sort the points by density, so that the densest points are plotted last + idx = z.argsort() + x, y, z = x.iloc[idx], y.iloc[idx], z[idx] + ax.scatter(x, y, c=z, s=ss) + return ax +# ============================================================================= +# +# ============================================================================= + +#%% Plotting top of atmosphere refletance +#%matplotlib qt +#%matplotlib inline +def mosaic_albedo_fit(df, Rad_in): + fig, ax = plt.subplots(7,3, sharex='col', sharey='row',figsize=(15, 15)) + fig.subplots_adjust(hspace=0, wspace=0) + if Rad_in=='Rt': + fig.text(0.5, 0.05, 'Top of atmosphere OLCI reflectance', ha='center', size = 20) + elif Rad_in=='Rb': + fig.text(0.5, 0.05, 'Bottom of atmosphere OLCI reflectance', ha='center', size = 20) + fig.text(0.05, 0.5, 'PROMICE albedo', va='center', rotation='vertical', size = 20) + + # axes are in a two-dimensional array, indexed by [row, col] + count=0 + ss=5 + + for i in range(7): + for j in range(3): + # Calculate the point density + count = count+1 + R=df[Rad_in+str(count)] + alb = df['PROMICE alb'] + alb=alb[~np.isnan(R)] + R=R[~np.isnan(R)] + alb = alb[~(R>1.1)] + R = R[~(R>1.1)] + + input_name= Rad_in+str(count) + + density_scatter(R,alb,ax[i, j],ss) + + R=R.values + alb=alb.values + R=R.reshape(-1,1) + alb=alb.reshape(-1,1) + lr = linear_model.LinearRegression() + lr.fit(R, alb) + print(Rad_in+str(count)) + print('Coefficients: \n', lr.coef_) + + preds = lr.predict(R) + + tmp = np.array([np.min(R), np.max(R)]) + tmp = tmp.reshape(-1,1) + ax[i, j].plot(tmp, lr.predict(tmp)) +# x_ticks = np.linspace(0.25,1,4) +# ax[i, j].set_xticklabels(x_ticks,fontsize=20) +# ax[i, j].set_yticklabels(x_ticks,fontsize=20) + + ax[i, j].text(0.08, 0.93, + '%s' % input_name, + fontsize=18, + color='black', + fontweight='bold', + horizontalalignment='left', + verticalalignment='top', + bbox=dict(boxstyle="square", + ec='lightskyblue', + alpha=0.8)) + ax[i, j].text(0.7, 0.5, + 'R2=%.3f\nRMSE=%.2f\nN=%.0f' % (r2_score(alb,preds), mean_squared_error(alb,preds),len(R)), + fontsize=13, + color='white', + fontweight='bold', + horizontalalignment='left', + verticalalignment='top', + bbox=dict(boxstyle="square", + ec='lightskyblue', + alpha=0.8)) + ax[i, j].set_xlim([0.01, 1.05]) + ax[i, j].set_ylim([0.01, 1.05]) + ax[i, j].grid(True) + fig.savefig('./output/linear_'+Rad_in+'oa.png',bbox_inches='tight') + + diff --git a/fortran/bav_lib.py b/fortran/bav_lib.py deleted file mode 100644 index c570e5b..0000000 --- a/fortran/bav_lib.py +++ /dev/null @@ -1,105 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Thu Oct 17 14:03:28 2019 - -@author: bav -""" - -#%matplotlib qt - -import numpy as np -import plotly.graph_objects as go -import matplotlib.pyplot as plt -from matplotlib import cm -import matplotlib as mpl -from mpl_toolkits.axes_grid1 import make_axes_locatable - -def OutlookRaster(var,title): - l,b,r,t = var.bounds - res = var.res - x = np.arange(l,r, res[0]) - y = np.arange(t,b, -res[0]) - z=var.read(1) - nan_col = ~np.all(np.isnan(z), axis=0) - z=z[:,nan_col] - x=x[nan_col] - - nan_row = ~np.all(np.isnan(z), axis=1) - z=z[nan_row,:] - y=y[nan_row] - - fig = go.Figure( - data=go.Heatmap(x=x, - y=y, - z=z, - type = 'heatmap', - colorscale='Jet', - colorbar=dict(title=title), - showscale=True)) - fig.update_layout( - autosize=False, - width=500, - height=500) - fig.show() -# fig.write_image(title+".jpeg") - return x,y,z - -#%% -def heatmap(var, title='', col_lim=(np.nan, np.nan) ,cmap_in='gnuplot'): - if np.isnan(col_lim[0]): - col_lim=(np.nanmin(var), np.nanmax(var)) - z=var - nan_col = ~np.all(np.isnan(z), axis=0) - z=z[:,nan_col] - - nan_row = ~np.all(np.isnan(z), axis=1) - z=z[nan_row,:] - - cmap = plt.get_cmap(cmap_in) - cmap.set_bad(color='gray') - -# fig,ax = plt.subplots() - im = plt.imshow(z, cmap=cmap, interpolation='nearest',vmin=col_lim[0], vmax=col_lim[1]) - cb= plt.colorbar(im) - cb.ax.set_ylabel(title) -# fig.show() -# fig.write_image(title+".jpeg") - return z -#%% -def heatmap_discrete(var, title='', col_lim=(np.nan, np.nan) ,cmap_in='gnuplot'): - if np.isnan(col_lim[0]): - col_lim=(np.nanmin(var), np.nanmax(var)) - z=var - nan_col = ~np.all(np.isnan(z), axis=0) - z=z[:,nan_col] - - nan_row = ~np.all(np.isnan(z), axis=1) - z=z[nan_row,:] - - cmap = plt.get_cmap(cmap_in) - cmap.set_bad(color='gray') - - bounds = np.unique(var)[np.logical_not(np.isnan(np.unique(var)))] - norm = mpl.colors.BoundaryNorm(bounds, cmap.N+1) - - fig,ax = plt.subplots(figsize=(10,15)) - im = ax.imshow(z+1e-6, cmap=cmap, norm = norm, interpolation='Nearest') - divider = make_axes_locatable(ax) - cax = divider.append_axes("right", size="5%" , pad= 0.1) - cb = mpl.colorbar.ColorbarBase(ax=cax, cmap=cmap, norm=norm) - - tic_locs =bounds[0:len(bounds)-1] - (bounds[0:len(bounds)-1]-bounds[1:len(bounds)])/2 - cb.set_ticks(tic_locs) - cb.ax.set_yticklabels(bounds[0:len(bounds)-1]) - cb.ax.set_ylabel(title) - - fig.show() -# fig.write_image(title+".jpeg") - return fig,ax - - #%% ================================================================= -def tmp(**kwargs): - for arg_name in kwargs: - return arg_name - - diff --git a/fortran/sice_f.py b/fortran/sice_f.py deleted file mode 100644 index 49b368c..0000000 --- a/fortran/sice_f.py +++ /dev/null @@ -1,160 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Fri Oct 25 11:35:10 2019 - -This script reads a set of input tif files and outputs the csv needed to run sice.f - -You will need to update - InputFoder - the path to water_vod and ozone_vod - the output path - -@author: bav@geus.dk -""" - -import numpy as np -import rasterio as rio -import time -import os -import sys -import bav_lib as bl - -start_time = time.time() - -InputFolder = sys.argv[1] + '/' -# InputFolder = 'out/SICE_2020_py1.4/' - -print(os.getcwd()) - - -#%% turning geotiffs into sice input -print("Reading input ") - -Oa01 = rio.open(InputFolder+'r_TOA_01.tif') -meta = Oa01.meta - -def WriteOutput(var,var_name,in_folder): - # this functions write tif files based on a model file, here "Oa01" - # opens a file for writing - with rio.open(in_folder+var_name+'.tif', 'w+', **meta) as dst: - dst.write(var.astype('float32'),1) - -toa = np.tile(Oa01.read(1)*np.nan, (21,1,1)) - -for i in range(21): - dat = rio.open((InputFolder+'r_TOA_'+str(i+1).zfill(2)+'.tif')) - toa[i,:,:] = dat.read(1) - -ozone = rio.open(InputFolder+'O3.tif').read(1) -water = rio.open(InputFolder+'WV.tif').read(1) -sza = rio.open(InputFolder+'SZA.tif').read(1) -saa = rio.open(InputFolder+'SAA.tif').read(1) -vza = rio.open(InputFolder+'OZA.tif').read(1) -vaa = rio.open(InputFolder+'OAA.tif').read(1) -height = rio.open(InputFolder+'height.tif').read(1) - -sza[np.isnan(toa[0,:,:])] = np.nan -saa[np.isnan(toa[0,:,:])] = np.nan -vza[np.isnan(toa[0,:,:])] = np.nan -vaa[np.isnan(toa[0,:,:])] = np.nan - -# ns,alat,alon,sza,vza,saa,vaa,height, (toa(iks),iks=1,21),OZON,WATER -olci_toa = np.vstack((np.arange(1,len(sza.flatten())+1), - sza.flatten()*np.nan, - sza.flatten()*np.nan, - sza.flatten(), - vza.flatten(), - saa.flatten(), - vaa.flatten(), - height.flatten())) - -for i in range(21): - olci_toa = np.vstack((olci_toa, toa[i,:,:].flatten())) - -olci_toa = np.vstack((olci_toa, ozone.flatten())) -olci_toa = np.vstack((olci_toa, water.flatten())) - -olci_toa=olci_toa.T -olci_toa_save = olci_toa -ind_good_pixels = np.logical_not(np.isnan(toa[0,:,:]).flatten()) -olci_toa = olci_toa[ind_good_pixels,:] -olci_toa[np.isnan(olci_toa)] = 999 -OutputFolder= InputFolder+'fortran/' -#os.mkdir(OutputFolder) -np.savetxt(OutputFolder+'olci_toa_newformat.dat', X=olci_toa, delimiter='\t', \ - fmt='%i '+ '%10.5f '*6 + '%i '+ '%10.5f'*23) - -#%% You can now run sice.f -print('bash ./fortran/sice_f.sh -i '+OutputFolder) -os.system('bash ./fortran/sice_f.sh -i '+OutputFolder) - - -#%% Loading result from sice_debug.f -Oa01 = rio.open(InputFolder+'r_TOA_01.tif') -meta = Oa01.meta -with rio.Env(): - meta.update(compress='DEFLATE') -def output_sice_f(file_name,var_name,var_id): - sice_out = np.loadtxt(file_name) - ind_orig = np.arange(1,len(Oa01.read(1).flatten())+1) - var = ind_orig*np.nan - ind_pix = sice_out[:,0].astype(int) - var[ind_pix-1] = sice_out[:,var_id] - var_mat = np.reshape(var,np.shape(Oa01.read(1))) - var_mat[var_mat==999] = np.nan - with rio.open(OutputFolder+var_name+'.tif', 'w+', **meta) as dst: - dst.write(var_mat.astype('float32'),1) - -output_sice_f(OutputFolder+"bba.dat",'albedo_bb_planar_sw',4) -output_sice_f(OutputFolder+"bba.dat",'albedo_bb_spherical_sw',5) -output_sice_f(OutputFolder+"size.dat",'D',4) -output_sice_f(OutputFolder+"size.dat",'area',5) -output_sice_f(OutputFolder+"size.dat",'al',6) -output_sice_f(OutputFolder+"size.dat",'r0',7) -output_sice_f(OutputFolder+"bba_alex_reduced.dat",'diagnostic_retrieval',3) - -for i in range(21): - output_sice_f(OutputFolder+"spherical_albedo.dat",'albedo_spectral_spherical_'+str(i+1).zfill(2),4+i) - output_sice_f(OutputFolder+"planar_albedo.dat",'albedo_spectral_planar_'+str(i+1).zfill(2),4+i) - output_sice_f(OutputFolder+"boar.dat",'rBRR_'+str(i+1),4+i) - -# OUTPUT files: -# file= 'spherical_albedo.dat' ) ns,ndate(3),alat,alon,(answer(i),i=1,21),isnow -# file= 'planar_albedo.dat' ) ns,ndate(3),alat,alon,(rp(i),i=1,21),isnow -# file= 'boar.dat' ) ns,ndate(3),alat,alon,(refl(i),i=1,21),isnow -# file= 'size.dat' ) ns,ndate(3),alat,alon,D,area,al,r0, andsi,andbi,indexs,indexi,indexd,isnow -# file= 'impurity.dat' ) ns,ndate(3),alat,alon,ntype,conc,bf,bm,thv, toa(1),isnow -# file= 'bba.dat' ) ns,ndate(3),alat,alon,rp3,rs3, isnow -# file= 'bba_alex_reduced.dat') ns,ndate(3),rp3,isnow -# file= 'notsnow.dat') ns,ndate(3),icloud,iice -# file='retrieved_O3.dat') ns,alat,alon,BXXX,totadu,deltak,sza,vza,amf - -#%% plotting oulooks -try: - os.mkdir(OutputFolder+'plots') -except: - print('folder exist') - -import matplotlib.pyplot as plt - -# fig,ax=bl.heatmap_discrete(rio.open(OutputFolder+'diagnostic_retrieval.tif').read(1), -# 'diagnostic_retrieval ') -# ax.set_title(OutputFolder) -# fig.savefig(OutputFolder+'plots/diagnostic_retrieval.png',bbox_inches='tight') - -var_list = ('albedo_bb_planar_sw','albedo_bb_spherical_sw') -for i in range(len(var_list)): - var_1 = rio.open(OutputFolder+var_list[i]+'.tif').read(1) - plt.figure(figsize=(10,15)) - bl.heatmap(var_1,var_list[i], col_lim=(0, 1) ,cmap_in='jet') - plt.title(OutputFolder) - plt.savefig(OutputFolder+'plots/'+var_list[i]+'_diff.png',bbox_inches='tight') -plt.ioff() -for i in np.append(np.arange(11), np.arange(21)): - var_name = 'albedo_spectral_spherical_'+ str(i+1).zfill(2) - var_1 = rio.open(OutputFolder+var_name+'.tif').read(1) - plt.figure(figsize=(10,15)) - bl.heatmap(var_1,var_name, col_lim=(0, 1) ,cmap_in='jet') - plt.title(OutputFolder) - plt.savefig(OutputFolder+'plots/'+var_name+'.png',bbox_inches='tight') -plt.ion() \ No newline at end of file diff --git a/fortran/sice_f.sh b/fortran/sice_f.sh index 06e9066..be2fed5 100644 --- a/fortran/sice_f.sh +++ b/fortran/sice_f.sh @@ -12,8 +12,7 @@ MSG_ERR() { printf "${RED}ERROR: ${1}${NC}\n"; } timing() { if [[ $TIMING == 1 ]]; then MSG_OK "$(date)"; fi; } -while [[ $# -gt 0 ]] -do +while [[ $# -gt 0 ]]; do key="$1" case $key in @@ -39,7 +38,7 @@ fi DEST=${INPATH} timing - +MSG_OK ${DEST} # input for sice # ns,alat,alon,sza,vza,saa,vaa,height,(toa(iks),iks=1,21), ozone, water # The number of lines in this file MUST be equal to the number of lines in the file 'nlines.dat' diff --git a/sice.py b/sice.py index cdd2ff1..8a570d4 100644 --- a/sice.py +++ b/sice.py @@ -81,45 +81,75 @@ import rasterio as rio import time import sys +import os +import pandas as pd from constants import w, bai, sol1_clean, sol2, sol3_clean, sol1_pol, sol3_pol, asol np.seterr(invalid='ignore') start_time = time.process_time() - -InputFolder = sys.argv[1] + '/' - -#%% ========= input tif ================ -Oa01 = rio.open(InputFolder+'r_TOA_01.tif') -meta = Oa01.meta -with rio.Env(): - meta.update(compress='DEFLATE') - -def WriteOutput(var,var_name,in_folder): - # this functions write tif files based on a model file, here "Oa01" - # opens a file for writing - - with rio.open(in_folder+var_name+'.tif', 'w+', **meta) as dst: - dst.write(var.astype('float32'),1) +print(sys.argv[1]) + +#%% input text file +if os.path.isfile(sys.argv[1]): + InputFolder = os.path.dirname(os.path.dirname(sys.argv[1]))+'/' + print('\nText file input') + # data_in = pd.read_csv(sys.argv[1]) + data_in = pd.read_csv(sys.argv[1]) + toa = np.expand_dims(data_in[[c for c in data_in.columns if c.find('reflec')>=0]].to_numpy().transpose(), axis=2) -toa = np.tile(Oa01.read(1)*np.nan, (21,1,1)) - -for i in range(21): - dat = rio.open((InputFolder+'r_TOA_'+str(i+1).zfill(2)+'.tif')) - toa[i,:,:] = dat.read(1) + + ozone = np.expand_dims(data_in['total_ozone'], axis=1) + water = np.expand_dims(data_in['total_columnar_water_vapour'], axis=1) + sza = np.expand_dims(data_in['sza'], axis=1) + saa = np.expand_dims(data_in['saa'], axis=1) + vza = np.expand_dims(data_in['vza'], axis=1) + vaa = np.expand_dims(data_in['vaa'], axis=1) + height = np.expand_dims(data_in['altitude'], axis=1) + + sza[np.isnan(toa[0,:,:])] = np.nan + saa[np.isnan(toa[0,:,:])] = np.nan + vza[np.isnan(toa[0,:,:])] = np.nan + vaa[np.isnan(toa[0,:,:])] = np.nan + +#%% ========= input tif =============== +elif os.path.isdir(sys.argv[1]): + InputFolder = sys.argv[1] + '/' + print("\nTiff files input") + Oa01 = rio.open(InputFolder+'r_TOA_01.tif') + meta = Oa01.meta + with rio.Env(): + meta.update(compress='DEFLATE') + + def WriteOutput(var,var_name,in_folder): + # this functions write tif files based on a model file, here "Oa01" + # opens a file for writing -ozone = rio.open(InputFolder+'O3.tif').read(1) -water = rio.open(InputFolder+'WV.tif').read(1) -sza = rio.open(InputFolder+'SZA.tif').read(1) -saa = rio.open(InputFolder+'SAA.tif').read(1) -vza = rio.open(InputFolder+'OZA.tif').read(1) -vaa = rio.open(InputFolder+'OAA.tif').read(1) -height = rio.open(InputFolder+'height.tif').read(1) - -sza[np.isnan(toa[0,:,:])] = np.nan -saa[np.isnan(toa[0,:,:])] = np.nan -vza[np.isnan(toa[0,:,:])] = np.nan -vaa[np.isnan(toa[0,:,:])] = np.nan + with rio.open(in_folder+var_name+'.tif', 'w+', **meta) as dst: + dst.write(var.astype('float32'),1) + + toa = np.tile(Oa01.read(1)*np.nan, (21,1,1)) + + for i in range(21): + dat = rio.open((InputFolder+'r_TOA_'+str(i+1).zfill(2)+'.tif')) + toa[i,:,:] = dat.read(1) + + ozone = rio.open(InputFolder+'O3.tif').read(1) + water = rio.open(InputFolder+'WV.tif').read(1) + sza = rio.open(InputFolder+'SZA.tif').read(1) + saa = rio.open(InputFolder+'SAA.tif').read(1) + vza = rio.open(InputFolder+'OZA.tif').read(1) + vaa = rio.open(InputFolder+'OAA.tif').read(1) + height = rio.open(InputFolder+'height.tif').read(1) + + sza[np.isnan(toa[0,:,:])] = np.nan + saa[np.isnan(toa[0,:,:])] = np.nan + vza[np.isnan(toa[0,:,:])] = np.nan + vaa[np.isnan(toa[0,:,:])] = np.nan + +else: + print("\n Input path neither file or directory" ) +# %% water and ozone spectral optical density water_vod = genfromtxt('./tg_water_vod.dat', delimiter=' ') voda = water_vod[range(21),1] @@ -320,20 +350,44 @@ def func_solv(albedo): alb_sph[:, ind_pol], asol, sol1_pol, sol2, sol3_pol) #%% Output -WriteOutput(BXXX, 'O3_SICE', InputFolder) -WriteOutput(D, 'grain_diameter',InputFolder) -WriteOutput(area, 'snow_specific_area', InputFolder) -WriteOutput(al, 'al', InputFolder) -WriteOutput(r0, 'r0',InputFolder) -WriteOutput(isnow,'diagnostic_retrieval',InputFolder) -WriteOutput(conc, 'conc',InputFolder) -WriteOutput(rp3, 'albedo_bb_planar_sw',InputFolder) -WriteOutput(rs3, 'albedo_bb_spherical_sw',InputFolder) - -for i in np.append(np.arange(11), np.arange(15,21)): -# for i in np.arange(21): - WriteOutput(alb_sph[i,:,:], 'albedo_spectral_spherical_'+str(i+1).zfill(2), InputFolder) - WriteOutput(rp[i,:,:], 'albedo_spectral_planar_'+str(i+1).zfill(2), InputFolder) - WriteOutput(refl[i,:,:], 'rBRR_'+str(i+1).zfill(2), InputFolder) +if os.path.isfile(sys.argv[1]): + + print('\nText file output') + # data_in = pd.read_csv(sys.argv[1]) + data_out = data_in + data_out['grain_diameter']=D + data_out['snow_specific_area']=area + data_out['al']=al + data_out['r0']=r0 + data_out['diagnostic_retrieval']=isnow + data_out['conc']=conc + data_out['albedo_bb_planar_sw']=rp3 + data_out['albedo_bb_spherical_sw']=rs3 + + + for i in np.append(np.arange(11), np.arange(15,21)): + # for i in np.arange(21): + data_out['albedo_spectral_spherical_'+str(i+1).zfill(2)]=alb_sph[i,:,:] + for i in np.append(np.arange(11), np.arange(15,21)): + data_out['rBRR_'+str(i+1).zfill(2)]=rp[i,:,:] + + data_out.to_csv(sys.argv[1][:-4]+'_out.csv') +# ========= input tif =============== +elif os.path.isdir(sys.argv[1]): + WriteOutput(BXXX, 'O3_SICE', InputFolder) + WriteOutput(D, 'grain_diameter',InputFolder) + WriteOutput(area, 'snow_specific_area', InputFolder) + WriteOutput(al, 'al', InputFolder) + WriteOutput(r0, 'r0',InputFolder) + WriteOutput(isnow,'diagnostic_retrieval',InputFolder) + WriteOutput(conc, 'conc',InputFolder) + WriteOutput(rp3, 'albedo_bb_planar_sw',InputFolder) + WriteOutput(rs3, 'albedo_bb_spherical_sw',InputFolder) + + for i in np.append(np.arange(11), np.arange(15,21)): + # for i in np.arange(21): + WriteOutput(alb_sph[i,:,:], 'albedo_spectral_spherical_'+str(i+1).zfill(2), InputFolder) + WriteOutput(rp[i,:,:], 'albedo_spectral_planar_'+str(i+1).zfill(2), InputFolder) + WriteOutput(refl[i,:,:], 'rBRR_'+str(i+1).zfill(2), InputFolder) print("End SICE.py %s --- %s CPU seconds ---" % (InputFolder, time.process_time() - start_time)) diff --git a/sice_f.py b/sice_f.py new file mode 100644 index 0000000..fbff5a2 --- /dev/null +++ b/sice_f.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Oct 25 11:35:10 2019 + +This script reads a set of input tif files and outputs the csv needed to run sice.f + +You will need to update + InputFoder + the path to water_vod and ozone_vod + the output path + +@author: bav@geus.dk +""" + +import numpy as np +import rasterio as rio +import time +import os +import sys +import bav_lib as bl +import pandas as pd + +start_time = time.time() + +InputFolder = sys.argv[1] + '/' +# InputFolder = 'out/SICE_2020_py1.4/' + +print(os.getcwd()) + + +#%% turning geotiffs into sice input +print("Reading input ") +print(sys.argv[1]) + +#%% input text file +if os.path.isfile(sys.argv[1]): + InputFolder = os.path.dirname(os.path.dirname(sys.argv[1]))+'/' + print('\nText file input') + # data_in = pd.read_csv(sys.argv[1]) + data_in = pd.read_csv(sys.argv[1]) + toa = np.expand_dims(data_in[[c for c in data_in.columns if c.find('reflec')>=0]].to_numpy().transpose(), axis=2) + + + ozone = np.expand_dims(data_in['total_ozone'], axis=1) + water = np.expand_dims(data_in['total_columnar_water_vapour'], axis=1) + sza = np.expand_dims(data_in['sza'], axis=1) + saa = np.expand_dims(data_in['saa'], axis=1) + vza = np.expand_dims(data_in['vza'], axis=1) + vaa = np.expand_dims(data_in['vaa'], axis=1) + height = np.expand_dims(data_in['altitude'], axis=1) + + sza[np.isnan(toa[0,:,:])] = np.nan + saa[np.isnan(toa[0,:,:])] = np.nan + vza[np.isnan(toa[0,:,:])] = np.nan + vaa[np.isnan(toa[0,:,:])] = np.nan + +#%% ========= input tif =============== +elif os.path.isdir(sys.argv[1]): + print('\n tiff input') + InputFolder = sys.argv[1] + '/' + Oa01 = rio.open(InputFolder+'r_TOA_01.tif') + meta = Oa01.meta + + def WriteOutput(var,var_name,in_folder): + # this functions write tif files based on a model file, here "Oa01" + # opens a file for writing + with rio.open(in_folder+var_name+'.tif', 'w+', **meta) as dst: + dst.write(var.astype('float32'),1) + + toa = np.tile(Oa01.read(1)*np.nan, (21,1,1)) + + for i in range(21): + dat = rio.open((InputFolder+'r_TOA_'+str(i+1).zfill(2)+'.tif')) + toa[i,:,:] = dat.read(1) + + ozone = rio.open(InputFolder+'O3.tif').read(1) + water = rio.open(InputFolder+'WV.tif').read(1) + sza = rio.open(InputFolder+'SZA.tif').read(1) + saa = rio.open(InputFolder+'SAA.tif').read(1) + vza = rio.open(InputFolder+'OZA.tif').read(1) + vaa = rio.open(InputFolder+'OAA.tif').read(1) + height = rio.open(InputFolder+'height.tif').read(1) + + sza[np.isnan(toa[0,:,:])] = np.nan + saa[np.isnan(toa[0,:,:])] = np.nan + vza[np.isnan(toa[0,:,:])] = np.nan + vaa[np.isnan(toa[0,:,:])] = np.nan + +# ns,alat,alon,sza,vza,saa,vaa,height, (toa(iks),iks=1,21),OZON,WATER +olci_toa = np.vstack((np.arange(1,len(sza.flatten())+1), + sza.flatten()*np.nan, + sza.flatten()*np.nan, + sza.flatten(), + vza.flatten(), + saa.flatten(), + vaa.flatten(), + height.flatten())) + +for i in range(21): + olci_toa = np.vstack((olci_toa, toa[i,:,:].flatten())) + +olci_toa = np.vstack((olci_toa, ozone.flatten())) +olci_toa = np.vstack((olci_toa, water.flatten())) + +olci_toa=olci_toa.T +olci_toa_save = olci_toa +ind_good_pixels = np.logical_not(np.isnan(toa[0,:,:]).flatten()) +olci_toa = olci_toa[ind_good_pixels,:] +olci_toa[np.isnan(olci_toa)] = 999 +OutputFolder= InputFolder+'fortran/' +#os.mkdir(OutputFolder) +print('\nInput file saved: '+OutputFolder+'olci_toa_newformat.dat') +np.savetxt(OutputFolder+'olci_toa_newformat.dat', X=olci_toa, delimiter='\t', \ + fmt='%i '+ '%10.5f '*6 + '%i '+ '%10.5f'*23) + +#%% You can now run sice.f +# print('bash ./fortran/sice_f.sh -i '+OutputFolder) +# os.system('bash ./fortran/sice_f.sh -i '+OutputFolder) + + +cmnd = 'bash ./fortran/sice_f.sh -i '+OutputFolder +print(cmnd) +import subprocess + +try: + output = subprocess.check_output( + cmnd, stderr=subprocess.STDOUT, shell=True, universal_newlines=True) +except subprocess.CalledProcessError as exc: + print("Status : FAIL", exc.returncode, exc.output) +else: + print("Output: \n{}\n".format(output)) + +#%% Loading result from sice_debug.f +# OUTPUT files: +# file= 'spherical_albedo.dat' ) ns,ndate(3),alat,alon,(answer(i),i=1,21),isnow +# file= 'planar_albedo.dat' ) ns,ndate(3),alat,alon,(rp(i),i=1,21),isnow +# file= 'boar.dat' ) ns,ndate(3),alat,alon,(refl(i),i=1,21),isnow +# file= 'size.dat' ) ns,ndate(3),alat,alon,D,area,al,r0, andsi,andbi,indexs,indexi,indexd,isnow +# file= 'impurity.dat' ) ns,ndate(3),alat,alon,ntype,conc,bf,bm,thv, toa(1),isnow +# file= 'bba.dat' ) ns,ndate(3),alat,alon,rp3,rs3, isnow +# file= 'bba_alex_reduced.dat') ns,ndate(3),rp3,isnow +# file= 'notsnow.dat') ns,ndate(3),icloud,iice +# file='retrieved_O3.dat') ns,alat,alon,BXXX,totadu,deltak,sza,vza,amf + +if os.path.isfile(sys.argv[1]): + print('\nText file output') + + impurity = pd.read_csv(OutputFolder+'impurity.dat', + names=['ns','ndate','alat','alon','ntype','conc', + 'bf','bm','thv', 'toa(1)','isnow'], + sep=' ', skipinitialspace=True, header=None) + size = pd.read_csv(OutputFolder+'size.dat', + names=['ns','ndate','alat','alon','D','area','al', + 'r0', 'andsi','andbi','indexs','indexi','indexd','isnow'], + sep=' ', skipinitialspace=True, header=None) + bba = pd.read_csv(OutputFolder+'bba.dat', + names=['ns','ndate','alat','alon','rp3','rs3','isnow'], + sep=' ', skipinitialspace=True, header=None) + data_out = data_in + data_out['grain_diameter']=np.nan + data_out.loc[size.ns-1, ['grain_diameter']]=size.D + data_out['snow_specific_area']=np.nan + data_out.loc[size.ns-1,['snow_specific_area']]=size.area + data_out['al']=np.nan + data_out.loc[size.ns-1,['al']]=size.al + data_out['r0']=np.nan + data_out.loc[size.ns-1,['r0']]=size.r0 + data_out['diagnostic_retrieval']=np.nan + data_out.loc[size.ns-1,['diagnostic_retrieval']]=bba.isnow + data_out['conc']=np.nan + data_out.loc[size.ns-1,['conc']]=impurity.conc + data_out['albedo_bb_planar_sw']=np.nan + data_out.loc[size.ns-1,['albedo_bb_planar_sw']]=bba.rp3 + data_out['albedo_bb_spherical_sw']=np.nan + data_out.loc[size.ns-1,['albedo_bb_spherical_sw']]=bba.rs3 + spherical_albedo = pd.read_csv(OutputFolder+'spherical_albedo.dat', + sep=' ', skipinitialspace=True, header=None).values + planar_albedo = pd.read_csv(OutputFolder+'planar_albedo.dat', + sep=' ', skipinitialspace=True, header=None).values + + for i in np.append(np.arange(11), np.arange(15,21)): + # for i in np.arange(21): + data_out['albedo_spectral_spherical_'+str(i+1).zfill(2)]=np.nan + data_out.loc[size.ns-1,['albedo_spectral_spherical_'+str(i+1).zfill(2)]]=spherical_albedo[:,4+i] + + for i in np.append(np.arange(11), np.arange(15,21)): + data_out['rBRR_'+str(i+1).zfill(2)]=np.nan + data_out.loc[size.ns-1,['rBRR_'+str(i+1).zfill(2)]]=planar_albedo[:,4+i] + + data_out.to_csv(sys.argv[1][:-4]+'_fortran_out.csv') +# ========= input tif =============== +elif os.path.isdir(sys.argv[1]): + Oa01 = rio.open(InputFolder+'r_TOA_01.tif') + meta = Oa01.meta + with rio.Env(): + meta.update(compress='DEFLATE') + def output_sice_f(file_name,var_name,var_id): + sice_out = np.loadtxt(file_name) + ind_orig = np.arange(1,len(Oa01.read(1).flatten())+1) + var = ind_orig*np.nan + ind_pix = sice_out[:,0].astype(int) + var[ind_pix-1] = sice_out[:,var_id] + var_mat = np.reshape(var,np.shape(Oa01.read(1))) + var_mat[var_mat==999] = np.nan + with rio.open(OutputFolder+var_name+'.tif', 'w+', **meta) as dst: + dst.write(var_mat.astype('float32'),1) + + output_sice_f(OutputFolder+"bba.dat",'albedo_bb_planar_sw',4) + output_sice_f(OutputFolder+"bba.dat",'albedo_bb_spherical_sw',5) + output_sice_f(OutputFolder+"size.dat",'D',4) + output_sice_f(OutputFolder+"size.dat",'area',5) + output_sice_f(OutputFolder+"size.dat",'al',6) + output_sice_f(OutputFolder+"size.dat",'r0',7) + output_sice_f(OutputFolder+"bba_alex_reduced.dat",'diagnostic_retrieval',3) + + for i in range(21): + output_sice_f(OutputFolder+"spherical_albedo.dat",'albedo_spectral_spherical_'+str(i+1).zfill(2),4+i) + output_sice_f(OutputFolder+"planar_albedo.dat",'albedo_spectral_planar_'+str(i+1).zfill(2),4+i) + output_sice_f(OutputFolder+"boar.dat",'rBRR_'+str(i+1),4+i) + + #%% plotting oulooks + try: + os.mkdir(OutputFolder+'plots') + except: + print('folder exist') + + import matplotlib.pyplot as plt + + # fig,ax=bl.heatmap_discrete(rio.open(OutputFolder+'diagnostic_retrieval.tif').read(1), + # 'diagnostic_retrieval ') + # ax.set_title(OutputFolder) + # fig.savefig(OutputFolder+'plots/diagnostic_retrieval.png',bbox_inches='tight') + + var_list = ('albedo_bb_planar_sw','albedo_bb_spherical_sw') + for i in range(len(var_list)): + var_1 = rio.open(OutputFolder+var_list[i]+'.tif').read(1) + plt.figure(figsize=(10,15)) + bl.heatmap(var_1,var_list[i], col_lim=(0, 1) ,cmap_in='jet') + plt.title(OutputFolder) + plt.savefig(OutputFolder+'plots/'+var_list[i]+'_diff.png',bbox_inches='tight') + plt.ioff() + for i in np.append(np.arange(11), np.arange(21)): + var_name = 'albedo_spectral_spherical_'+ str(i+1).zfill(2) + var_1 = rio.open(OutputFolder+var_name+'.tif').read(1) + plt.figure(figsize=(10,15)) + bl.heatmap(var_1,var_name, col_lim=(0, 1) ,cmap_in='jet') + plt.title(OutputFolder) + plt.savefig(OutputFolder+'plots/'+var_name+'.png',bbox_inches='tight') + plt.ion() \ No newline at end of file diff --git a/val_PROMICE.py b/val_PROMICE.py new file mode 100644 index 0000000..7c87a92 --- /dev/null +++ b/val_PROMICE.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Jul 24 08:58:21 2020 +Data processing for the evaluation of SICE against PROMICE measurements +@author: bav +""" + +import numpy as np +import pandas as pd +import os +import errno +from pathlib import Path +import bav_lib as bl +import matplotlib.pyplot as plt + +path_to_file = 'validation/data/S3_PROMICE.csv' + +# Read merged csv +data_all = pd.read_csv(path_to_file) + +#%% Running pySICE +cmd = '%run sice_f.py '+path_to_file +# %run sice.py validation/data/data_in.csv +print(cmd) +eval(cmd) +#%% Analysis +#%matplotlib qt +#%matplotlib inline +data_out = pd.read_csv('validation/data/S3_PROMICE_fortran_out.csv') + +# Removing cloudy measurements +cloud_thd = 0.3 +data_cloud = data_out[data_out['cloud']>cloud_thd] +data_out=data_out[data_out['cloud']<=cloud_thd] +print('\nRemoving '+str(data_cloud.shape[0]) +'/'+ + str(data_cloud.shape[0]+data_out.shape[0]) + ' due to clouds') + +#%% Plot results ## +fs = 15 +ss=5 +f1, ax = plt.subplots(1,3,figsize=(15, 7)) +f1.subplots_adjust(hspace=0.2, wspace=0.1, + left = 0.08 , right = 0.95 , + bottom = 0.2 , top = 0.9) +# ax[0] = bl.density_scatter(data_out['albedo_bb_planar_sw'], data_out['PROMICE_alb'],ax[0],ss) +ax[0].scatter(data_out['albedo_bb_planar_sw'], data_out['PROMICE_alb'], c= "black",s=5) +plt.tight_layout() +ax[0].set_xlabel("Albedo from pySICEv1.4 (-)",fontsize=fs) +ax[0].set_ylabel("PROMICE albedo (-)",fontsize=fs) + +ax[1] = bl.density_scatter(data_out['BBA_emp'], data_out['PROMICE_alb'],ax[1],ss) +ax[1].scatter(data_out['BBA_emp'], data_out['PROMICE_alb'],s=ss,c="black") +plt.tight_layout() +ax[1].set_xlabel("Empirical (-)",fontsize=fs) +ax[1].set_ylabel("PROMICE albedo (-)",fontsize=fs) + +var= 'rBRR_20' +# ax[2] = bl.density_scatter(data_out[var], data_out['PROMICE_alb'],ax[2],ss) +ax[2].scatter(data_out[var], data_out['PROMICE_alb'],s=ss,c="black") +plt.tight_layout() +ax[2].set_xlabel(var,fontsize=fs) +ax[2].set_ylabel("PROMICE albedo (-)",fontsize=fs) + +f1.savefig('validation/figures/scatter.png') + +#%% +data_out['date'] = pd.to_datetime(data_out[['year','month','day','hour','minute','second']]) +data_out.set_index(['date', 'station']) +data_out.sort_values(by=['station','date'],inplace=True) +data_out.head(5) +data_out.station.unique() + +bl.multi_plot(data_out, title = 'Albedo (-)', + sites =['KAN_L','KAN_M','KAN_U','KPC_L','KPC_U','SCO_L','SCO_U','EGP'], + OutputFolder = 'validation/figures/', filename_out='PROMICE_comp_1') +bl.multi_plot(data_out, title='Albedo (-)', + sites =['QAS_L','QAS_U','TAS_L','TAS_A','THU_L','THU_U','UPE_L','UPE_U'], + OutputFolder = 'validation/figures/', filename_out='PROMICE_comp_2') + diff --git a/validation/.ipynb_checkpoints/fetch_PROMICE_data-checkpoint.ipynb b/validation/.ipynb_checkpoints/fetch_PROMICE_data-checkpoint.ipynb new file mode 100644 index 0000000..5d74b66 --- /dev/null +++ b/validation/.ipynb_checkpoints/fetch_PROMICE_data-checkpoint.ipynb @@ -0,0 +1,161 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fetch PROMICE data\n", + "This codeblock fetches down relevant weather data from https://promice.org/PromiceDataPortal/#Automaticweatherstations (unless the file is already available in the folder, then it skips). \n", + "This file has taken its source in the following thread: https://stackoverflow.com/questions/51455112/python-script-to-download-data-from-noaa\n", + "Script does take a while to run (30+ minutes)...." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "Collapsed": "false" + }, + "outputs": [], + "source": [ + "import errno\n", + "import os\n", + "import wget # conda install: https://anaconda.org/anaconda/pywget\n", + "from pathlib import Path\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "path = \"data/PROMICE\" \n", + "\n", + "# Array information of stations available at PROMICE official site: https://promice.org/WeatherStations.html\n", + "PROMICE_stations = [('EGP',(75.6247,-35.9748), 2660), \n", + " ('KAN_B',(67.1252,-50.1832), 350), \n", + " ('KAN_L',(67.0955,-35.9748), 670), \n", + " ('KAN_M',(67.0670,-48.8355), 1270), \n", + " ('KAN_U',(67.0003,-47.0253), 1840), \n", + " ('KPC_L',(79.9108,-24.0828), 370),\n", + " ('KPC_U',(79.8347,-25.1662), 870), \n", + " ('MIT',(65.6922,-37.8280), 440), \n", + " ('NUK_K',(64.1623,-51.3587), 710), \n", + " ('NUK_L',(64.4822,-49.5358), 530),\n", + " ('NUK_U',(64.5108,-49.2692), 1120),\n", + " ('QAS_L',(61.0308,-46.8493), 280),\n", + " ('QAS_M',(61.0998,-46.8330), 630), \n", + " ('QAS_U',(61.1753,-46.8195), 900), \n", + " ('SCO_L',(72.2230,-26.8182), 460),\n", + " ('SCO_U',(72.3933,-27.2333), 970),\n", + " ('TAS_A',(65.7790,-38.8995), 890),\n", + " ('TAS_L',(65.6402,-38.8987), 250),\n", + " ('THU_L',(76.3998,-68.2665), 570),\n", + " ('THU_U',(76.4197,-68.1463), 760),\n", + " ('UPE_L',(72.8932,-54.2955), 220), \n", + " ('UPE_U',(72.8878,-53.5783), 940)]\n", + "\n", + "# Function for making directories if they do not exists. \n", + "def mkdir_p(path):\n", + " try:\n", + " os.makedirs(path)\n", + " return 'Path created.'\n", + " except OSError as exc:\n", + " if exc.errno == errno.EEXIST and os.path.isdir(path):\n", + " return 'Path already exists!'\n", + " else:\n", + " raise\n", + " \n", + "mkdir_p(path)\n", + "\n", + "\n", + "# Goes through each station and fetch down data online. Necessary manipulations and sorting are made.\n", + "for ws in PROMICE_stations:\n", + " url = f'https://promice.org/PromiceDataPortal/api/download/f24019f7-d586-4465-8181-d4965421e6eb/v03/hourly/csv/{ws[0]}_hour_v03.txt'\n", + " file_x = path + f'/{ws[0]}'\n", + " if Path(file_x + '.csv').is_file():\n", + " pass\n", + " else:\n", + " filename = wget.download(url, out=file_x + '.txt')\n", + " #https://stackoverflow.com/a/19759515\n", + " with open(file_x + '.txt') as infile, open(file_x + '_fixed.txt', 'w') as outfile:\n", + " for line in infile:\n", + " outfile.write(\" \".join(line.split()).replace(' ', ','))\n", + " outfile.write(\",\") # trailing comma shouldn't matter\n", + " outfile.write(\"\\n\")\n", + " df = pd.read_csv (file_x + '_fixed.txt')\n", + " df = df[['Year','MonthOfYear','DayOfYear','HourOfDay(UTC)','CloudCover','TiltToEast(d)','TiltToNorth(d)','Albedo_theta<70d']]\n", + " df = df[df.Year > 2015]\n", + " #df = df[df.MonthOfYear > 4]\n", + " #df = df[df.MonthOfYear < 9]\n", + " #df = df[df['HourOfDay(UTC)'] > 9] #\n", + " #df = df[df['HourOfDay(UTC)'] < 21]\n", + " df = df.round({'CloudCover': 2, 'TiltToEast(d)': 2, 'TiltToNorth(d)': 2, 'Albedo_theta<70d': 2})\n", + " #df = df[df['Albedo_theta<70d'] < 1.00]\n", + " #df = df[df['Albedo_theta<70d'] > 0.00]\n", + " df['station_name'] = ws[0]\n", + " df['latitude N'] = ws[1][0]\n", + " df['longitude W'] = ws[1][1]\n", + " df['elevation'] = float(ws[2])\n", + " df['TiltToEast(d)'].replace(-999.0, np.nan, inplace=True)\n", + " df['TiltToEast(d)'] = df['TiltToEast(d)'].interpolate(method='nearest', limit_direction='both')\n", + " df['TiltToNorth(d)'].replace(-999.0, np.nan, inplace=True)\n", + " df['TiltToNorth(d)'] = df['TiltToNorth(d)'].interpolate(method='nearest', limit_direction='both')\n", + "\n", + " df.to_csv(file_x + '.csv', index=None)\n", + "\n", + "# Removes all temporary files made:\n", + "filelist = [ f for f in os.listdir(path) if f.endswith(\".txt\") ]\n", + "for f in filelist:\n", + " os.remove(os.path.join(path, f))\n", + "\n", + "# Create one big file \"PROMICE.csv\" with all the stations data. \n", + "if Path(f'{path}/PROMICE.csv').is_file():\n", + " pass\n", + "else: \n", + " PROMICE = pd.DataFrame()\n", + " filelist = [ f for f in os.listdir(path) if f.endswith(\".csv\") ]\n", + " for f in filelist:\n", + " PROMICE = PROMICE.append(pd.read_csv(f'{path}/{f}'))\n", + " \n", + " PROMICE['TiltToEast(d)'] = PROMICE['TiltToEast(d)'].abs()\n", + " PROMICE['TiltToNorth(d)'] = PROMICE['TiltToNorth(d)'].abs()\n", + " PROMICE['tilt'] = PROMICE[['TiltToEast(d)','TiltToNorth(d)']].values.max(1)\n", + " PROMICE.drop(['TiltToNorth(d)','TiltToEast(d)'], axis=1, inplace=True)\n", + " PROMICE.rename(columns={'Year': 'year', \n", + " 'MonthOfYear': 'month',\n", + " 'DayOfYear':'dayofyear',\n", + " 'HourOfDay(UTC)':'hour',\n", + " 'CloudCover':'cloud',\n", + " 'station_name':'station',\n", + " 'Albedo_theta<70d':'PROMICE_alb'}, inplace=True)\n", + " PROMICE = PROMICE.round({'latitude N': 4, 'longitude W': 4})\n", + " PROMICE.to_csv(f'{path}/PROMICE.csv', index=None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/validation/.ipynb_checkpoints/merge_PROMICE_and_S3_data-checkpoint.ipynb b/validation/.ipynb_checkpoints/merge_PROMICE_and_S3_data-checkpoint.ipynb new file mode 100644 index 0000000..5d73a25 --- /dev/null +++ b/validation/.ipynb_checkpoints/merge_PROMICE_and_S3_data-checkpoint.ipynb @@ -0,0 +1,146 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Merge PROMICE with S3_extract data\n", + "Process the S3-data and merges it with PROMICE data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "s3_extract = pd.read_csv('./data/OLCI/S3_extract.csv') #Put in path to S3_extract data\n", + "full_size = s3_extract.shape[0]\n", + "s3_extract = s3_extract.drop_duplicates(keep='first') #dropping duplicates except first sample, if any.\n", + "print(f'Removed duplicates: {full_size-s3_extract.shape[0]}')\n", + "\n", + "# Specify columns to keep\n", + "s3_extract = s3_extract[['station', 'year', 'month', 'minute', 'dayofyear', 'hour', 'sza', 'vza', 'saa', 'vaa', 'slope', 'ndsi', 'ndbi', 'humidity', 'total_columnar_water_vapour', 'total_ozone',\n", + " 'Oa01_reflectance','Oa02_reflectance','Oa03_reflectance','Oa04_reflectance','Oa05_reflectance','Oa06_reflectance','Oa07_reflectance',\n", + " 'Oa08_reflectance','Oa09_reflectance', 'Oa10_reflectance','Oa11_reflectance', 'Oa12_reflectance','Oa13_reflectance','Oa14_reflectance',\n", + " 'Oa15_reflectance','Oa16_reflectance','Oa17_reflectance','Oa18_reflectance','Oa19_reflectance','Oa20_reflectance','Oa21_reflectance',\n", + " ]]\n", + "\n", + "# the PROMICE data is only given in hourly interval. Hence the hour have to be corrected in the s3_extract data.\n", + "#s3_extract['hour'] = s3_extract['hour'] - 1 if s3_extract['minute'].astype('int64') < 30 else (s3_extract['hour'] + 1) \n", + "\n", + "# Process data to the right format\n", + "s3_extract = s3_extract.round({\n", + " 'sza': 2,\n", + " 'vza': 2,\n", + " 'saa': 2,\n", + " 'vaa': 2,\n", + " 'slope': 2,\n", + " 'ndsi': 2, \n", + " 'ndbi': 2, \n", + " 'humidity': 2, \n", + " 'total_columnar_water_vapour': 2,\n", + " 'Oa01_reflectance': 2,\n", + " 'Oa02_reflectance': 2,\n", + " 'Oa03_reflectance': 2,\n", + " 'Oa04_reflectance': 2,\n", + " 'Oa05_reflectance': 2,\n", + " 'Oa06_reflectance': 2,\n", + " 'Oa07_reflectance': 2,\n", + " 'Oa08_reflectance': 2,\n", + " 'Oa09_reflectance': 2,\n", + " 'Oa10_reflectance': 2,\n", + " 'Oa11_reflectance': 2,\n", + " 'Oa12_reflectance': 2,\n", + " 'Oa13_reflectance': 2,\n", + " 'Oa14_reflectance': 2,\n", + " 'Oa15_reflectance': 2,\n", + " 'Oa16_reflectance': 2,\n", + " 'Oa17_reflectance': 2,\n", + " 'Oa18_reflectance': 2,\n", + " 'Oa19_reflectance': 2,\n", + " 'Oa20_reflectance': 2,\n", + " 'Oa21_reflectance': 2\n", + "})\n", + "\n", + "# Rename columns\n", + "s3_extract.rename(columns={'station':'station',\n", + " 'year': 'year', \n", + " 'month': 'month',\n", + " 'dayofyear': 'dayofyear',\n", + " 'hour': 'hour',\n", + " 'sza': 'sza',\n", + " 'vza': 'vza',\n", + " 'saa': 'saa', \n", + " 'vaa': 'vaa',\n", + " 'slope': 'slope',\n", + " 'ndsi': 'ndsi',\n", + " 'ndbi': 'ndbi',\n", + " 'humidity': 'humidity',\n", + " 'total_columnar_water_vapour': 'total_columnar_water_vapour',\n", + " 'total_ozone': 'total_ozone',\n", + " 'Oa01_reflectance': 'Oa01',\n", + " 'Oa02_reflectance': 'Oa02',\n", + " 'Oa03_reflectance': 'Oa03',\n", + " 'Oa04_reflectance': 'Oa04',\n", + " 'Oa05_reflectance': 'Oa05',\n", + " 'Oa06_reflectance': 'Oa06',\n", + " 'Oa07_reflectance': 'Oa07',\n", + " 'Oa08_reflectance': 'Oa08',\n", + " 'Oa09_reflectance': 'Oa09',\n", + " 'Oa10_reflectance': 'Oa10',\n", + " 'Oa11_reflectance': 'Oa11',\n", + " 'Oa12_reflectance': 'Oa12',\n", + " 'Oa13_reflectance': 'Oa13',\n", + " 'Oa14_reflectance': 'Oa14',\n", + " 'Oa15_reflectance': 'Oa15',\n", + " 'Oa16_reflectance': 'Oa16',\n", + " 'Oa17_reflectance': 'Oa17',\n", + " 'Oa18_reflectance': 'Oa18',\n", + " 'Oa19_reflectance': 'Oa19',\n", + " 'Oa20_reflectance': 'Oa20',\n", + " 'Oa21_reflectance': 'Oa21' \n", + " }, \n", + " inplace=True)\n", + "\n", + "# Merge images with PROMICE data\n", + "OLCI_with_PROMICE = pd.merge(left=s3_extract, right=pd.read_csv('data/PROMICE/PROMICE.csv'), \n", + " left_on=['year','month','dayofyear','hour','station'], \n", + " right_on=['year','month','dayofyear','hour', 'station'])\n", + "# Create merged csv\n", + "OLCI_with_PROMICE.to_csv(f'data/OLCI/S3_extract_with_PROMICE.csv', index=None) " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/validation/figures/PROMICE_comp_1.png b/validation/figures/PROMICE_comp_1.png new file mode 100644 index 0000000..1412ee1 Binary files /dev/null and b/validation/figures/PROMICE_comp_1.png differ diff --git a/validation/figures/PROMICE_comp_2.png b/validation/figures/PROMICE_comp_2.png new file mode 100644 index 0000000..5548a32 Binary files /dev/null and b/validation/figures/PROMICE_comp_2.png differ diff --git a/validation/figures/scatter.png b/validation/figures/scatter.png new file mode 100644 index 0000000..8e1be38 Binary files /dev/null and b/validation/figures/scatter.png differ diff --git a/validation/merge_S3_PROMICE.py b/validation/merge_S3_PROMICE.py new file mode 100644 index 0000000..a640990 --- /dev/null +++ b/validation/merge_S3_PROMICE.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Jul 29 07:01:37 2020 + +@author: bav +""" + +# -*- coding: utf-8 -*- +""" +Created on Fri Jul 24 08:58:21 2020 +Data processing for the evaluation of SICE against PROMICE measurements +@author: bav +""" + +import numpy as np +import pandas as pd +import os +import errno +from pathlib import Path +import wget # conda install: https://anaconda.org/anaconda/pywget + +#%% preparing input file +path = './data/' +filename = 'S3_28072020' +data = pd.read_csv(path+filename+'.csv'); + +cols = data.columns +cols = [c for c in cols if c.find('radiance')<0] +cols = [c for c in cols if c.find('solar')<0] +cols = [c for c in cols if c.find('temperature')<0] +cols = [c for c in cols if c.find('spectral')<0] +cols = [c for c in cols if c.find('rBRR')<0] +cols = [c for c in cols if c.find('albedo')<0] +cols = [c for c in cols if c.find('variance')<0] +cols = [c for c in cols if c.find('grain')<0] +cols = [c for c in cols if c.find('snow')<0] +cols = [c for c in cols if c.find('ndsi')<0] +cols = [c for c in cols if c.find('ndbi')<0] +cols = [c for c in cols if c.find('wind')<0] +cols = [c for c in cols if c.find('OAA')<0] +cols = [c for c in cols if c.find('SAA')<0] +cols = [c for c in cols if c.find('OZA')<0] +cols = [c for c in cols if c.find('SZA')<0] +cols = [c for c in cols if c.find('plat')<0] +cols = [c for c in cols if c.find('aspect')<0] +cols = [c for c in cols if c.find('_y')<0] +cols = [c for c in cols if c.find('Unname')<0] +data=data[cols] + +for c in cols: + if c.find('_x')>0: + data.rename(columns={c:c.replace('_x', '')}, inplace=True) + + +#%% Adding PROMICE observations +# Array information of stations available at PROMICE official site: https://promice.org/WeatherStations.html +PROMICE_stations = [('EGP',(75.6247,-35.9748), 2660), + ('KAN_B',(67.1252,-50.1832), 350), + ('KAN_L',(67.0955,-35.9748), 670), + ('KAN_M',(67.0670,-48.8355), 1270), + ('KAN_U',(67.0003,-47.0253), 1840), + ('KPC_L',(79.9108,-24.0828), 370), + ('KPC_U',(79.8347,-25.1662), 870), + ('MIT',(65.6922,-37.8280), 440), + ('NUK_K',(64.1623,-51.3587), 710), + ('NUK_L',(64.4822,-49.5358), 530), + ('NUK_U',(64.5108,-49.2692), 1120), + ('QAS_L',(61.0308,-46.8493), 280), + ('QAS_M',(61.0998,-46.8330), 630), + ('QAS_U',(61.1753,-46.8195), 900), + ('SCO_L',(72.2230,-26.8182), 460), + ('SCO_U',(72.3933,-27.2333), 970), + ('TAS_A',(65.7790,-38.8995), 890), + ('TAS_L',(65.6402,-38.8987), 250), + ('THU_L',(76.3998,-68.2665), 570), + ('THU_U',(76.4197,-68.1463), 760), + ('UPE_L',(72.8932,-54.2955), 220), + ('UPE_U',(72.8878,-53.5783), 940)] + +path_to_PROMICE = "./data/PROMICE" + +# Function for making directories if they do not exists. +def mkdir_p(path): + try: + os.makedirs(path) + return 'Path created.' + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(path): + return 'Path already exists!' + else: + raise + +mkdir_p(path_to_PROMICE) + +# Goes through each station and fetch down data online. Necessary manipulations and sorting are made. +for ws in PROMICE_stations: + if Path(f'{path_to_PROMICE}/{ws[0]}_hour_v03.txt').is_file(): + print('\nPROMICE.csv file already exists.') + pass + else: + print(ws) + url = f'https://promice.org/PromiceDataPortal/api/download/f24019f7-d586-4465-8181-d4965421e6eb/v03/hourly/csv/{ws[0]}_hour_v03.txt' + filename = wget.download(url, out= path_to_PROMICE + f'/{ws[0]}_hour_v03.txt') + +if Path(f'{path_to_PROMICE}/PROMICE.csv').is_file(): + print('\nPROMICE.csv file already exists.') + pass +else: + # Create one big file "PROMICE.csv" with all the stations data. + # reading PROMICE file and joining them together + for ws in PROMICE_stations: + filepath = path_to_PROMICE + '/' + ws[0] + '_hour_v03.txt' + + df = pd.read_csv (filepath, delim_whitespace=True) + df = df[['Year','MonthOfYear','DayOfYear','HourOfDay(UTC)','CloudCover', + 'TiltToEast(d)','TiltToNorth(d)','Albedo_theta<70d']] + df = df[df.Year > 2015] + df['station_name'] = ws[0] + df['latitude N'] = ws[1][0] + df['longitude W'] = ws[1][1] + df['elevation'] = float(ws[2]) + df['TiltToEast(d)'].replace(-999.0, np.nan, inplace=True) + try : + df['TiltToEast(d)'] = df['TiltToEast(d)'].interpolate(method='nearest', limit_direction='both') + except: + print('no tilt at '+ws[0]) + df['TiltToNorth(d)'].replace(-999.0, np.nan, inplace=True) + try: + df['TiltToNorth(d)'] = df['TiltToNorth(d)'].interpolate(method='nearest', limit_direction='both') + except: + print('no tilt at '+ws[0]) + + df.to_csv(path_to_PROMICE + '/' + ws[0] + '.csv', index=None) + + PROMICE = pd.DataFrame() + filelist = [ f for f in os.listdir(path_to_PROMICE) if f.endswith(".csv") ] + for f in filelist: + print(f) + PROMICE = PROMICE.append(pd.read_csv(f'{path_to_PROMICE}/{f}')) + + PROMICE['TiltToEast(d)'] = PROMICE['TiltToEast(d)'].abs() + PROMICE['TiltToNorth(d)'] = PROMICE['TiltToNorth(d)'].abs() + PROMICE['tilt'] = PROMICE[['TiltToEast(d)','TiltToNorth(d)']].values.max(1) + PROMICE.drop(['TiltToNorth(d)','TiltToEast(d)'], axis=1, inplace=True) + PROMICE.rename(columns={'Year': 'year', + 'MonthOfYear': 'month', + 'DayOfYear':'dayofyear', + 'HourOfDay(UTC)':'hour', + 'CloudCover':'cloud', + 'station_name':'station', + 'Albedo_theta<70d':'PROMICE_alb'}, inplace=True) + PROMICE.to_csv(f'{path_to_PROMICE}/PROMICE.csv', index=None) + +#%% Merging +data_PROMICE = pd.read_csv(path_to_PROMICE+'/PROMICE.csv') +data.rename(columns={'site': 'station'}, inplace=True) + +full_size = data.shape[0] +data = data.drop_duplicates(keep='first') #dropping duplicates except first sample, if any. +print(f'Removed duplicates: {full_size-data.shape[0]}') + +# the PROMICE data is only given in hourly interval. Hence the hour have to be corrected in the s3_extract data. +#s3_extract['hour'] = s3_extract['hour'] - 1 if s3_extract['minute'].astype('int64') < 30 else (s3_extract['hour'] + 1) + +# Merge images with PROMICE data +data_all = pd.merge(left=data, right=data_PROMICE, how='inner', + left_on=['year','month','dayofyear','hour','station'], + right_on=['year','month','dayofyear','hour', 'station']) +# data_all.drop(columns=['Unnamed: 0','Unnamed: 0.1'],inplace=True) +# keeping only good data +print(data_all.shape[0]) +data_all = data_all.replace(to_replace=-999,value=np.nan) +data_all = data_all[data_all['PROMICE_alb'].notnull()] +data_all = data_all[data_all['PROMICE_alb']<1] +data_all = data_all[data_all['PROMICE_alb']>0] +data_all = data_all[data_all['sza']<70] +print(data_all.shape[0]) +tmp =data_all.shape[0] + +data_all['BBA_emp'] = 0.945*(data_all["Oa01_reflectance"] + data_all["Oa06_reflectance"] + data_all["Oa17_reflectance"] + data_all["Oa21_reflectance"]) / 4.0+ 0.055 + +# Create merged csv +data_all.to_csv('./data/S3_PROMICE.csv', index=None) diff --git a/validation/old/.ipynb_checkpoints/fetch_PROMICE_data-checkpoint.ipynb b/validation/old/.ipynb_checkpoints/fetch_PROMICE_data-checkpoint.ipynb new file mode 100644 index 0000000..3d760b4 --- /dev/null +++ b/validation/old/.ipynb_checkpoints/fetch_PROMICE_data-checkpoint.ipynb @@ -0,0 +1,173 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fetch PROMICE data\n", + "This codeblock fetches down relevant weather data from https://promice.org/PromiceDataPortal/#Automaticweatherstations (unless the file is already available in the folder, then it skips). \n", + "This file has taken its source in the following thread: https://stackoverflow.com/questions/51455112/python-script-to-download-data-from-noaa\n", + "Script does take a while to run (30+ minutes)...." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'wget'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0merrno\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mwget\u001b[0m \u001b[1;31m# conda install: https://anaconda.org/anaconda/pywget\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpathlib\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mPath\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'wget'" + ] + } + ], + "source": [ + "import errno\n", + "import os\n", + "import wget # conda install: https://anaconda.org/anaconda/pywget\n", + "from pathlib import Path\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "path = \"data/PROMICE\" \n", + "\n", + "# Array information of stations available at PROMICE official site: https://promice.org/WeatherStations.html\n", + "PROMICE_stations = [('EGP',(75.6247,-35.9748), 2660), \n", + " ('KAN_B',(67.1252,-50.1832), 350), \n", + " ('KAN_L',(67.0955,-35.9748), 670), \n", + " ('KAN_M',(67.0670,-48.8355), 1270), \n", + " ('KAN_U',(67.0003,-47.0253), 1840), \n", + " ('KPC_L',(79.9108,-24.0828), 370),\n", + " ('KPC_U',(79.8347,-25.1662), 870), \n", + " ('MIT',(65.6922,-37.8280), 440), \n", + " ('NUK_K',(64.1623,-51.3587), 710), \n", + " ('NUK_L',(64.4822,-49.5358), 530),\n", + " ('NUK_U',(64.5108,-49.2692), 1120),\n", + " ('QAS_L',(61.0308,-46.8493), 280),\n", + " ('QAS_M',(61.0998,-46.8330), 630), \n", + " ('QAS_U',(61.1753,-46.8195), 900), \n", + " ('SCO_L',(72.2230,-26.8182), 460),\n", + " ('SCO_U',(72.3933,-27.2333), 970),\n", + " ('TAS_A',(65.7790,-38.8995), 890),\n", + " ('TAS_L',(65.6402,-38.8987), 250),\n", + " ('THU_L',(76.3998,-68.2665), 570),\n", + " ('THU_U',(76.4197,-68.1463), 760),\n", + " ('UPE_L',(72.8932,-54.2955), 220), \n", + " ('UPE_U',(72.8878,-53.5783), 940)]\n", + "\n", + "# Function for making directories if they do not exists. \n", + "def mkdir_p(path):\n", + " try:\n", + " os.makedirs(path)\n", + " return 'Path created.'\n", + " except OSError as exc:\n", + " if exc.errno == errno.EEXIST and os.path.isdir(path):\n", + " return 'Path already exists!'\n", + " else:\n", + " raise\n", + " \n", + "mkdir_p(path)\n", + "\n", + "\n", + "# Goes through each station and fetch down data online. Necessary manipulations and sorting are made.\n", + "for ws in PROMICE_stations:\n", + " url = f'https://promice.org/PromiceDataPortal/api/download/f24019f7-d586-4465-8181-d4965421e6eb/v03/hourly/csv/{ws[0]}_hour_v03.txt'\n", + " file_x = path + f'/{ws[0]}'\n", + " if Path(file_x + '.csv').is_file():\n", + " pass\n", + " else:\n", + " filename = wget.download(url, out=file_x + '.txt')\n", + " #https://stackoverflow.com/a/19759515\n", + " with open(file_x + '.txt') as infile, open(file_x + '_fixed.txt', 'w') as outfile:\n", + " for line in infile:\n", + " outfile.write(\" \".join(line.split()).replace(' ', ','))\n", + " outfile.write(\",\") # trailing comma shouldn't matter\n", + " outfile.write(\"\\n\")\n", + " df = pd.read_csv (file_x + '_fixed.txt')\n", + " df = df[['Year','MonthOfYear','DayOfYear','HourOfDay(UTC)','CloudCover','TiltToEast(d)','TiltToNorth(d)','Albedo_theta<70d']]\n", + " df = df[df.Year > 2015]\n", + " #df = df[df.MonthOfYear > 4]\n", + " #df = df[df.MonthOfYear < 9]\n", + " #df = df[df['HourOfDay(UTC)'] > 9] #\n", + " #df = df[df['HourOfDay(UTC)'] < 21]\n", + " df = df.round({'CloudCover': 2, 'TiltToEast(d)': 2, 'TiltToNorth(d)': 2, 'Albedo_theta<70d': 2})\n", + " #df = df[df['Albedo_theta<70d'] < 1.00]\n", + " #df = df[df['Albedo_theta<70d'] > 0.00]\n", + " df['station_name'] = ws[0]\n", + " df['latitude N'] = ws[1][0]\n", + " df['longitude W'] = ws[1][1]\n", + " df['elevation'] = float(ws[2])\n", + " df['TiltToEast(d)'].replace(-999.0, np.nan, inplace=True)\n", + " df['TiltToEast(d)'] = df['TiltToEast(d)'].interpolate(method='nearest', limit_direction='both')\n", + " df['TiltToNorth(d)'].replace(-999.0, np.nan, inplace=True)\n", + " df['TiltToNorth(d)'] = df['TiltToNorth(d)'].interpolate(method='nearest', limit_direction='both')\n", + "\n", + " df.to_csv(file_x + '.csv', index=None)\n", + "\n", + "# Removes all temporary files made:\n", + "filelist = [ f for f in os.listdir(path) if f.endswith(\".txt\") ]\n", + "for f in filelist:\n", + " os.remove(os.path.join(path, f))\n", + "\n", + "# Create one big file \"PROMICE.csv\" with all the stations data. \n", + "if Path(f'{path}/PROMICE.csv').is_file():\n", + " pass\n", + "else: \n", + " PROMICE = pd.DataFrame()\n", + " filelist = [ f for f in os.listdir(path) if f.endswith(\".csv\") ]\n", + " for f in filelist:\n", + " PROMICE = PROMICE.append(pd.read_csv(f'{path}/{f}'))\n", + " \n", + " PROMICE['TiltToEast(d)'] = PROMICE['TiltToEast(d)'].abs()\n", + " PROMICE['TiltToNorth(d)'] = PROMICE['TiltToNorth(d)'].abs()\n", + " PROMICE['tilt'] = PROMICE[['TiltToEast(d)','TiltToNorth(d)']].values.max(1)\n", + " PROMICE.drop(['TiltToNorth(d)','TiltToEast(d)'], axis=1, inplace=True)\n", + " PROMICE.rename(columns={'Year': 'year', \n", + " 'MonthOfYear': 'month',\n", + " 'DayOfYear':'dayofyear',\n", + " 'HourOfDay(UTC)':'hour',\n", + " 'CloudCover':'cloud',\n", + " 'station_name':'station',\n", + " 'Albedo_theta<70d':'PROMICE_alb'}, inplace=True)\n", + " PROMICE = PROMICE.round({'latitude N': 4, 'longitude W': 4})\n", + " PROMICE.to_csv(f'{path}/PROMICE.csv', index=None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/validation/old/fetch_PROMICE_data.ipynb b/validation/old/fetch_PROMICE_data.ipynb new file mode 100644 index 0000000..a128ee2 --- /dev/null +++ b/validation/old/fetch_PROMICE_data.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fetch PROMICE data\n", + "This codeblock fetches down relevant weather data from https://promice.org/PromiceDataPortal/#Automaticweatherstations (unless the file is already available in the folder, then it skips). \n", + "This file has taken its source in the following thread: https://stackoverflow.com/questions/51455112/python-script-to-download-data-from-noaa\n", + "Script does take a while to run (30+ minutes)...." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "Collapsed": "false" + }, + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[1;32mpass\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 54\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 55\u001b[1;33m \u001b[0mfilename\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mwget\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdownload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mfile_x\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m'.txt'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 56\u001b[0m \u001b[1;31m#https://stackoverflow.com/a/19759515\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 57\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfile_x\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m'.txt'\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0minfile\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfile_x\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m'_fixed.txt'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'w'\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0moutfile\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\wget.py\u001b[0m in \u001b[0;36mdownload\u001b[1;34m(url, out, bar)\u001b[0m\n\u001b[0;32m 524\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 525\u001b[0m \u001b[0mbinurl\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 526\u001b[1;33m \u001b[1;33m(\u001b[0m\u001b[0mtmpfile\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mulib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0murlretrieve\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbinurl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtmpfile\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 527\u001b[0m \u001b[0mfilename\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdetect_filename\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 528\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0moutdir\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\urllib\\request.py\u001b[0m in \u001b[0;36murlretrieve\u001b[1;34m(url, filename, reporthook, data)\u001b[0m\n\u001b[0;32m 274\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 275\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 276\u001b[1;33m \u001b[0mblock\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 277\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mblock\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 278\u001b[0m \u001b[1;32mbreak\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\http\\client.py\u001b[0m in \u001b[0;36mread\u001b[1;34m(self, amt)\u001b[0m\n\u001b[0;32m 455\u001b[0m \u001b[1;31m# Amount is given, implement using readinto\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 456\u001b[0m \u001b[0mb\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbytearray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mamt\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 457\u001b[1;33m \u001b[0mn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreadinto\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 458\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mmemoryview\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mn\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtobytes\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 459\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\http\\client.py\u001b[0m in \u001b[0;36mreadinto\u001b[1;34m(self, b)\u001b[0m\n\u001b[0;32m 499\u001b[0m \u001b[1;31m# connection, and the user is reading more bytes than will be provided\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 500\u001b[0m \u001b[1;31m# (for example, reading in 1k chunks)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 501\u001b[1;33m \u001b[0mn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreadinto\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 502\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mn\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 503\u001b[0m \u001b[1;31m# Ideally, we would raise IncompleteRead if the content-length\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[1;34m(self, b)\u001b[0m\n\u001b[0;32m 587\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 588\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 589\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 590\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 591\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\ssl.py\u001b[0m in \u001b[0;36mrecv_into\u001b[1;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[0;32m 1069\u001b[0m \u001b[1;34m\"non-zero flags not allowed in calls to recv_into() on %s\"\u001b[0m \u001b[1;33m%\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1070\u001b[0m self.__class__)\n\u001b[1;32m-> 1071\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnbytes\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1072\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1073\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbuffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\ssl.py\u001b[0m in \u001b[0;36mread\u001b[1;34m(self, len, buffer)\u001b[0m\n\u001b[0;32m 927\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 928\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mbuffer\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 929\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 930\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 931\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "import errno\n", + "import os\n", + "import wget # conda install: https://anaconda.org/anaconda/pywget\n", + "from pathlib import Path\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "path = \"data/PROMICE\" \n", + "\n", + "# Array information of stations available at PROMICE official site: https://promice.org/WeatherStations.html\n", + "PROMICE_stations = [('EGP',(75.6247,-35.9748), 2660), \n", + " ('KAN_B',(67.1252,-50.1832), 350), \n", + " ('KAN_L',(67.0955,-35.9748), 670), \n", + " ('KAN_M',(67.0670,-48.8355), 1270), \n", + " ('KAN_U',(67.0003,-47.0253), 1840), \n", + " ('KPC_L',(79.9108,-24.0828), 370),\n", + " ('KPC_U',(79.8347,-25.1662), 870), \n", + " ('MIT',(65.6922,-37.8280), 440), \n", + " ('NUK_K',(64.1623,-51.3587), 710), \n", + " ('NUK_L',(64.4822,-49.5358), 530),\n", + " ('NUK_U',(64.5108,-49.2692), 1120),\n", + " ('QAS_L',(61.0308,-46.8493), 280),\n", + " ('QAS_M',(61.0998,-46.8330), 630), \n", + " ('QAS_U',(61.1753,-46.8195), 900), \n", + " ('SCO_L',(72.2230,-26.8182), 460),\n", + " ('SCO_U',(72.3933,-27.2333), 970),\n", + " ('TAS_A',(65.7790,-38.8995), 890),\n", + " ('TAS_L',(65.6402,-38.8987), 250),\n", + " ('THU_L',(76.3998,-68.2665), 570),\n", + " ('THU_U',(76.4197,-68.1463), 760),\n", + " ('UPE_L',(72.8932,-54.2955), 220), \n", + " ('UPE_U',(72.8878,-53.5783), 940)]\n", + "\n", + "# Function for making directories if they do not exists. \n", + "def mkdir_p(path):\n", + " try:\n", + " os.makedirs(path)\n", + " return 'Path created.'\n", + " except OSError as exc:\n", + " if exc.errno == errno.EEXIST and os.path.isdir(path):\n", + " return 'Path already exists!'\n", + " else:\n", + " raise\n", + " \n", + "mkdir_p(path)\n", + "\n", + "\n", + "# Goes through each station and fetch down data online. Necessary manipulations and sorting are made.\n", + "for ws in PROMICE_stations:\n", + " url = f'https://promice.org/PromiceDataPortal/api/download/f24019f7-d586-4465-8181-d4965421e6eb/v03/hourly/csv/{ws[0]}_hour_v03.txt'\n", + " file_x = path + f'/{ws[0]}'\n", + " if Path(file_x + '.csv').is_file():\n", + " pass\n", + " else:\n", + " filename = wget.download(url, out=file_x + '.txt')\n", + " #https://stackoverflow.com/a/19759515\n", + " with open(file_x + '.txt') as infile, open(file_x + '_fixed.txt', 'w') as outfile:\n", + " for line in infile:\n", + " outfile.write(\" \".join(line.split()).replace(' ', ','))\n", + " outfile.write(\",\") # trailing comma shouldn't matter\n", + " outfile.write(\"\\n\")\n", + " df = pd.read_csv (file_x + '_fixed.txt')\n", + " df = df[['Year','MonthOfYear','DayOfYear','HourOfDay(UTC)','CloudCover','TiltToEast(d)','TiltToNorth(d)','Albedo_theta<70d']]\n", + " df = df[df.Year > 2015]\n", + " #df = df[df.MonthOfYear > 4]\n", + " #df = df[df.MonthOfYear < 9]\n", + " #df = df[df['HourOfDay(UTC)'] > 9] #\n", + " #df = df[df['HourOfDay(UTC)'] < 21]\n", + " df = df.round({'CloudCover': 2, 'TiltToEast(d)': 2, 'TiltToNorth(d)': 2, 'Albedo_theta<70d': 2})\n", + " #df = df[df['Albedo_theta<70d'] < 1.00]\n", + " #df = df[df['Albedo_theta<70d'] > 0.00]\n", + " df['station_name'] = ws[0]\n", + " df['latitude N'] = ws[1][0]\n", + " df['longitude W'] = ws[1][1]\n", + " df['elevation'] = float(ws[2])\n", + " df['TiltToEast(d)'].replace(-999.0, np.nan, inplace=True)\n", + " df['TiltToEast(d)'] = df['TiltToEast(d)'].interpolate(method='nearest', limit_direction='both')\n", + " df['TiltToNorth(d)'].replace(-999.0, np.nan, inplace=True)\n", + " df['TiltToNorth(d)'] = df['TiltToNorth(d)'].interpolate(method='nearest', limit_direction='both')\n", + "\n", + " df.to_csv(file_x + '.csv', index=None)\n", + "\n", + "# Removes all temporary files made:\n", + "filelist = [ f for f in os.listdir(path) if f.endswith(\".txt\") ]\n", + "for f in filelist:\n", + " os.remove(os.path.join(path, f))\n", + "\n", + "# Create one big file \"PROMICE.csv\" with all the stations data. \n", + "if Path(f'{path}/PROMICE.csv').is_file():\n", + " pass\n", + "else: \n", + " PROMICE = pd.DataFrame()\n", + " filelist = [ f for f in os.listdir(path) if f.endswith(\".csv\") ]\n", + " for f in filelist:\n", + " PROMICE = PROMICE.append(pd.read_csv(f'{path}/{f}'))\n", + " \n", + " PROMICE['TiltToEast(d)'] = PROMICE['TiltToEast(d)'].abs()\n", + " PROMICE['TiltToNorth(d)'] = PROMICE['TiltToNorth(d)'].abs()\n", + " PROMICE['tilt'] = PROMICE[['TiltToEast(d)','TiltToNorth(d)']].values.max(1)\n", + " PROMICE.drop(['TiltToNorth(d)','TiltToEast(d)'], axis=1, inplace=True)\n", + " PROMICE.rename(columns={'Year': 'year', \n", + " 'MonthOfYear': 'month',\n", + " 'DayOfYear':'dayofyear',\n", + " 'HourOfDay(UTC)':'hour',\n", + " 'CloudCover':'cloud',\n", + " 'station_name':'station',\n", + " 'Albedo_theta<70d':'PROMICE_alb'}, inplace=True)\n", + " PROMICE = PROMICE.round({'latitude N': 4, 'longitude W': 4})\n", + " PROMICE.to_csv(f'{path}/PROMICE.csv', index=None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/validation/old/merge_PROMICE_and_S3_data.ipynb b/validation/old/merge_PROMICE_and_S3_data.ipynb new file mode 100644 index 0000000..516b7cd --- /dev/null +++ b/validation/old/merge_PROMICE_and_S3_data.ipynb @@ -0,0 +1,146 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Merge PROMICE with S3_extract data\n", + "Process the S3-data and merges it with PROMICE data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "s3_extract = pd.read_csv('./data/OLCI/S3_extract.csv') #Put in path to S3_extract data\n", + "full_size = s3_extract.shape[0]\n", + "s3_extract = s3_extract.drop_duplicates(keep='first') #dropping duplicates except first sample, if any.\n", + "print(f'Removed duplicates: {full_size-s3_extract.shape[0]}')\n", + "\n", + "# Specify columns to keep\n", + "s3_extract = s3_extract[['station', 'year', 'month', 'minute', 'dayofyear', 'hour', 'sza', 'vza', 'saa', 'vaa', 'slope', 'ndsi', 'ndbi', 'humidity', 'total_columnar_water_vapour', 'total_ozone',\n", + " 'Oa01_reflectance','Oa02_reflectance','Oa03_reflectance','Oa04_reflectance','Oa05_reflectance','Oa06_reflectance','Oa07_reflectance',\n", + " 'Oa08_reflectance','Oa09_reflectance', 'Oa10_reflectance','Oa11_reflectance', 'Oa12_reflectance','Oa13_reflectance','Oa14_reflectance',\n", + " 'Oa15_reflectance','Oa16_reflectance','Oa17_reflectance','Oa18_reflectance','Oa19_reflectance','Oa20_reflectance','Oa21_reflectance',\n", + " ]]\n", + "\n", + "# the PROMICE data is only given in hourly interval. Hence the hour have to be corrected in the s3_extract data.\n", + "#s3_extract['hour'] = s3_extract['hour'] - 1 if s3_extract['minute'].astype('int64') < 30 else (s3_extract['hour'] + 1) \n", + "\n", + "# Process data to the right format\n", + "s3_extract = s3_extract.round({\n", + " 'sza': 2,\n", + " 'vza': 2,\n", + " 'saa': 2,\n", + " 'vaa': 2,\n", + " 'slope': 2,\n", + " 'ndsi': 2, \n", + " 'ndbi': 2, \n", + " 'humidity': 2, \n", + " 'total_columnar_water_vapour': 2,\n", + " 'Oa01_reflectance': 2,\n", + " 'Oa02_reflectance': 2,\n", + " 'Oa03_reflectance': 2,\n", + " 'Oa04_reflectance': 2,\n", + " 'Oa05_reflectance': 2,\n", + " 'Oa06_reflectance': 2,\n", + " 'Oa07_reflectance': 2,\n", + " 'Oa08_reflectance': 2,\n", + " 'Oa09_reflectance': 2,\n", + " 'Oa10_reflectance': 2,\n", + " 'Oa11_reflectance': 2,\n", + " 'Oa12_reflectance': 2,\n", + " 'Oa13_reflectance': 2,\n", + " 'Oa14_reflectance': 2,\n", + " 'Oa15_reflectance': 2,\n", + " 'Oa16_reflectance': 2,\n", + " 'Oa17_reflectance': 2,\n", + " 'Oa18_reflectance': 2,\n", + " 'Oa19_reflectance': 2,\n", + " 'Oa20_reflectance': 2,\n", + " 'Oa21_reflectance': 2\n", + "})\n", + "\n", + "# Rename columns\n", + "s3_extract.rename(columns={'station':'station',\n", + " 'year': 'year', \n", + " 'month': 'month',\n", + " 'dayofyear': 'dayofyear',\n", + " 'hour': 'hour',\n", + " 'sza': 'sza',\n", + " 'vza': 'vza',\n", + " 'saa': 'saa', \n", + " 'vaa': 'vaa',\n", + " 'slope': 'slope',\n", + " 'ndsi': 'ndsi',\n", + " 'ndbi': 'ndbi',\n", + " 'humidity': 'humidity',\n", + " 'total_columnar_water_vapour': 'total_columnar_water_vapour',\n", + " 'total_ozone': 'total_ozone',\n", + " 'Oa01_reflectance': 'Oa01',\n", + " 'Oa02_reflectance': 'Oa02',\n", + " 'Oa03_reflectance': 'Oa03',\n", + " 'Oa04_reflectance': 'Oa04',\n", + " 'Oa05_reflectance': 'Oa05',\n", + " 'Oa06_reflectance': 'Oa06',\n", + " 'Oa07_reflectance': 'Oa07',\n", + " 'Oa08_reflectance': 'Oa08',\n", + " 'Oa09_reflectance': 'Oa09',\n", + " 'Oa10_reflectance': 'Oa10',\n", + " 'Oa11_reflectance': 'Oa11',\n", + " 'Oa12_reflectance': 'Oa12',\n", + " 'Oa13_reflectance': 'Oa13',\n", + " 'Oa14_reflectance': 'Oa14',\n", + " 'Oa15_reflectance': 'Oa15',\n", + " 'Oa16_reflectance': 'Oa16',\n", + " 'Oa17_reflectance': 'Oa17',\n", + " 'Oa18_reflectance': 'Oa18',\n", + " 'Oa19_reflectance': 'Oa19',\n", + " 'Oa20_reflectance': 'Oa20',\n", + " 'Oa21_reflectance': 'Oa21' \n", + " }, \n", + " inplace=True)\n", + "\n", + "# Merge images with PROMICE data\n", + "OLCI_with_PROMICE = pd.merge(left=s3_extract, right=pd.read_csv('data/PROMICE/PROMICE.csv'), \n", + " left_on=['year','month','dayofyear','hour','station'], \n", + " right_on=['year','month','dayofyear','hour', 'station'])\n", + "# Create merged csv\n", + "OLCI_with_PROMICE.to_csv(f'data/OLCI/S3_extract_with_PROMICE.csv', index=None) " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}