diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index 8eaf8fe7..4c848284 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -102,16 +102,16 @@ def toL2( # calculating realtive humidity with regard to ice T_100 = _getTempK(T_0) - ds['rh_u_cor'] = correctHumidity(ds['rh_u'], ds['t_u'], + ds['rh_u_wrt_ice_or_water'] = adjustHumidity(ds['rh_u'], ds['t_u'], T_0, T_100, ews, ei0) if ds.attrs['number_of_booms']==2: - ds['rh_l_cor'] = correctHumidity(ds['rh_l'], ds['t_l'], + ds['rh_l_wrt_ice_or_water'] = adjustHumidity(ds['rh_l'], ds['t_l'], T_0, T_100, ews, ei0) if hasattr(ds,'t_i'): if ~ds['t_i'].isnull().all(): - ds['rh_i_cor'] = correctHumidity(ds['rh_i'], ds['t_i'], + ds['rh_i_wrt_ice_or_water'] = adjustHumidity(ds['rh_i'], ds['t_i'], T_0, T_100, ews, ei0) # Determiune cloud cover for on-ice stations @@ -419,9 +419,12 @@ def calcTilt(tilt_x, tilt_y, deg2rad): return phi_sensor_rad, theta_sensor_rad -def correctHumidity(rh, T, T_0, T_100, ews, ei0): #TODO figure out if T replicate is needed - '''Correct relative humidity using Groff & Gratch method, where values are - set when freezing and remain the original values when not freezing +def adjustHumidity(rh, T, T_0, T_100, ews, ei0): #TODO figure out if T replicate is needed + '''Adjust relative humidity so that values are given with respect to + saturation over ice in subfreezing conditions, and with respect to + saturation over water (as given by the instrument) above the melting + point temperature. Saturation water vapors are calculated after + Groff & Gratch method. Parameters ---------- @@ -440,7 +443,7 @@ def correctHumidity(rh, T, T_0, T_100, ews, ei0): #TODO f Returns ------- - rh_cor : xarray.DataArray + rh_wrt_ice_or_water : xarray.DataArray Corrected relative humidity ''' # Convert to hPa (Groff & Gratch) @@ -458,8 +461,8 @@ def correctHumidity(rh, T, T_0, T_100, ews, ei0): #TODO f freezing = (T < 0) & (T > -100).values # Set to Groff & Gratch values when freezing, otherwise just rh - rh_cor = rh.where(~freezing, other = rh*(e_s_wtr / e_s_ice)) - return rh_cor + rh_wrt_ice_or_water = rh.where(~freezing, other = rh*(e_s_wtr / e_s_ice)) + return rh_wrt_ice_or_water def correctPrecip(precip, wspd): diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index 8cd0ea5c..d6d7859d 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -def toL3(L2, +def toL3(L2, data_adjustments_dir: Path, station_config={}, T_0=273.15): @@ -23,46 +23,46 @@ def toL3(L2, - smoothed and inter/extrapolated GPS coordinates - continuous surface height, ice surface height, snow height - thermistor depths - - + + Parameters ---------- L2 : xarray:Dataset L2 AWS data station_config : Dict - Dictionary containing the information necessary for the processing of + Dictionary containing the information necessary for the processing of L3 variables (relocation dates for coordinates processing, or thermistor string maintenance date for the thermistors depth) - T_0 : int + T_0 : int Freezing point temperature. Default is 273.15. ''' ds = L2 ds.attrs['level'] = 'L3' - T_100 = T_0+100 # Get steam point temperature as K - + T_100 = T_0+100 # Get steam point temperature as K + # Turbulent heat flux calculation if ('t_u' in ds.keys()) and \ ('p_u' in ds.keys()) and \ - ('rh_u_cor' in ds.keys()): + ('rh_u_wrt_ice_or_water' in ds.keys()): # Upper boom bulk calculation T_h_u = ds['t_u'].copy() # Copy for processing p_h_u = ds['p_u'].copy() - RH_cor_h_u = ds['rh_u_cor'].copy() - - q_h_u = calculate_specific_humidity(T_0, T_100, T_h_u, p_h_u, RH_cor_h_u) # Calculate specific humidity + rh_h_u_wrt_ice_or_water = ds['rh_u_wrt_ice_or_water'].copy() + + q_h_u = calculate_specific_humidity(T_0, T_100, T_h_u, p_h_u, rh_h_u_wrt_ice_or_water) # Calculate specific humidity if ('wspd_u' in ds.keys()) and \ ('t_surf' in ds.keys()) and \ ('z_boom_u' in ds.keys()): WS_h_u = ds['wspd_u'].copy() Tsurf_h = ds['t_surf'].copy() # T surf from derived upper boom product. TODO is this okay to use with lower boom parameters? - z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer - z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer - - if not ds.attrs['bedrock']: + z_WS_u = ds['z_boom_u'].copy() + 0.4 # Get height of Anemometer + z_T_u = ds['z_boom_u'].copy() - 0.1 # Get height of thermometer + + if not ds.attrs['bedrock']: SHF_h_u, LHF_h_u= calculate_tubulent_heat_fluxes(T_0, T_h_u, Tsurf_h, WS_h_u, # Calculate latent and sensible heat fluxes - z_WS_u, z_T_u, q_h_u, p_h_u) - + z_WS_u, z_T_u, q_h_u, p_h_u) + ds['dshf_u'] = (('time'), SHF_h_u.data) ds['dlhf_u'] = (('time'), LHF_h_u.data) else: @@ -71,80 +71,80 @@ def toL3(L2, q_h_u = 1000 * q_h_u # Convert sp.humid from kg/kg to g/kg ds['qh_u'] = (('time'), q_h_u.data) else: - logger.info('t_u, p_u or rh_u_cor missing, cannot calulate tubrulent heat fluxes') + logger.info('t_u, p_u or rh_u_wrt_ice_or_water missing, cannot calulate tubrulent heat fluxes') # Lower boom bulk calculation if ds.attrs['number_of_booms']==2: if ('t_l' in ds.keys()) and \ ('p_l' in ds.keys()) and \ - ('rh_l_cor' in ds.keys()): + ('rh_l_wrt_ice_or_water' in ds.keys()): T_h_l = ds['t_l'].copy() # Copy for processing - p_h_l = ds['p_l'].copy() - RH_cor_h_l = ds['rh_l_cor'].copy() + p_h_l = ds['p_l'].copy() + rh_h_l_wrt_ice_or_water = ds['rh_l_wrt_ice_or_water'].copy() - q_h_l = calculate_specific_humidity(T_0, T_100, T_h_l, p_h_l, RH_cor_h_l) # Calculate sp.humidity + q_h_l = calculate_specific_humidity(T_0, T_100, T_h_l, p_h_l, rh_h_l_wrt_ice_or_water) # Calculate sp.humidity if ('wspd_l' in ds.keys()) and \ ('t_surf' in ds.keys()) and \ ('z_boom_l' in ds.keys()): - z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W - z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer - WS_h_l = ds['wspd_l'].copy() - if not ds.attrs['bedrock']: - SHF_h_l, LHF_h_l= calculate_tubulent_heat_fluxes(T_0, T_h_l, Tsurf_h, WS_h_l, # Calculate latent and sensible heat fluxes - z_WS_l, z_T_l, q_h_l, p_h_l) - + z_WS_l = ds['z_boom_l'].copy() + 0.4 # Get height of W + z_T_l = ds['z_boom_l'].copy() - 0.1 # Get height of thermometer + WS_h_l = ds['wspd_l'].copy() + if not ds.attrs['bedrock']: + SHF_h_l, LHF_h_l= calculate_tubulent_heat_fluxes(T_0, T_h_l, Tsurf_h, WS_h_l, # Calculate latent and sensible heat fluxes + z_WS_l, z_T_l, q_h_l, p_h_l) + ds['dshf_l'] = (('time'), SHF_h_l.data) ds['dlhf_l'] = (('time'), LHF_h_l.data) else: logger.info('wspd_l, t_surf or z_boom_l missing, cannot calulate tubrulent heat fluxes') - + q_h_l = 1000 * q_h_l # Convert sp.humid from kg/kg to g/kg ds['qh_l'] = (('time'), q_h_l.data) else: - logger.info('t_l, p_l or rh_l_cor missing, cannot calulate tubrulent heat fluxes') + logger.info('t_l, p_l or rh_l_wrt_ice_or_water missing, cannot calulate tubrulent heat fluxes') if len(station_config)==0: logger.warning('\n***\nThe station configuration file is missing or improperly passed to pypromice. Some processing steps might fail.\n***\n') - - # Smoothing and inter/extrapolation of GPS coordinates + + # Smoothing and inter/extrapolation of GPS coordinates for var in ['gps_lat', 'gps_lon', 'gps_alt']: ds[var.replace('gps_','')] = ('time', gps_coordinate_postprocessing(ds, var, station_config)) - + # processing continuous surface height, ice surface height, snow height try: ds = process_surface_height(ds, data_adjustments_dir, station_config) except Exception as e: logger.error("Error processing surface height at %s"%L2.attrs['station_id']) logging.error(e, exc_info=True) - + # making sure dataset has the attributes contained in the config files if 'project' in station_config.keys(): ds.attrs['project'] = station_config['project'] else: logger.error('No project info in station_config. Using \"PROMICE\".') ds.attrs['project'] = "PROMICE" - + if 'location_type' in station_config.keys(): ds.attrs['location_type'] = station_config['location_type'] else: logger.error('No project info in station_config. Using \"ice sheet\".') ds.attrs['location_type'] = "ice sheet" - + return ds def process_surface_height(ds, data_adjustments_dir, station_config={}): """ - Process surface height data for different site types and create + Process surface height data for different site types and create surface height variables. Parameters ---------- ds : xarray.Dataset The dataset containing various measurements and attributes including - 'site_type' which determines the type of site (e.g., 'ablation', - 'accumulation', 'bedrock') and other relevant data variables such as + 'site_type' which determines the type of site (e.g., 'ablation', + 'accumulation', 'bedrock') and other relevant data variables such as 'z_boom_u', 'z_stake', 'z_pt_cor', etc. Returns @@ -164,18 +164,18 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}): if ds.z_stake.notnull().any(): first_valid_index = ds.time.where((ds.z_stake + ds.z_boom_u).notnull(), drop=True).data[0] ds['z_surf_2'] = ds.z_surf_1.sel(time=first_valid_index) + ds.z_stake.sel(time=first_valid_index) - ds['z_stake'] - + # Use corrected point data if available if 'z_pt_cor' in ds.data_vars: ds['z_ice_surf'] = ('time', ds['z_pt_cor'].data) - + else: # Calculate surface heights for other site types first_valid_index = ds.time.where(ds.z_boom_u.notnull(), drop=True).data[0] ds['z_surf_1'] = ds.z_boom_u.sel(time=first_valid_index) - ds['z_boom_u'] if 'z_stake' in ds.data_vars and ds.z_stake.notnull().any(): first_valid_index = ds.time.where(ds.z_stake.notnull(), drop=True).data[0] - ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake'] + ds['z_surf_2'] = ds.z_stake.sel(time=first_valid_index) - ds['z_stake'] if 'z_boom_l' in ds.data_vars: # need a combine first because KAN_U switches from having a z_stake # to having a z_boom_l @@ -191,7 +191,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}): (ds['z_surf_combined'], ds['z_ice_surf'], ds['z_surf_1_adj'], ds['z_surf_2_adj']) = combine_surface_height(df_in, ds.attrs['site_type']) - + if ds.attrs['site_type'] == 'ablation': # Calculate rolling minimum for ice surface height and snow height @@ -217,22 +217,22 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}): z_ice_surf = (ts_interpolated .rolling('1D', center=True, min_periods=1) .median()) - + z_ice_surf = z_ice_surf.loc[ds.time] # here we make sure that the periods where both z_stake and z_pt are # missing are also missing in z_ice_surf msk = ds['z_ice_surf'].notnull() | ds['z_surf_2_adj'].notnull() - z_ice_surf = z_ice_surf.where(msk) - + z_ice_surf = z_ice_surf.where(msk) + # taking running minimum to get ice z_ice_surf = z_ice_surf.cummin() # filling gaps only if they are less than a year long and if values on both # sides are less than 0.01 m appart - + # Forward and backward fill to identify bounds of gaps df_filled = z_ice_surf.fillna(method='ffill').fillna(method='bfill') - + # Identify gaps and their start and end dates gaps = pd.DataFrame(index=z_ice_surf[z_ice_surf.isna()].index) gaps['prev_value'] = df_filled.shift(1) @@ -241,17 +241,17 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}): gaps['gap_end'] = gaps.index.to_series().shift(-1) gaps['gap_duration'] = (gaps['gap_end'] - gaps['gap_start']).dt.days gaps['value_diff'] = (gaps['next_value'] - gaps['prev_value']).abs() - + # Determine which gaps to fill mask = (gaps['gap_duration'] < 365) & (gaps['value_diff'] < 0.01) gaps_to_fill = gaps[mask].index - + # Fill gaps in the original Series z_ice_surf.loc[gaps_to_fill] = df_filled.loc[gaps_to_fill] - + # bringing the variable into the dataset ds['z_ice_surf'] = z_ice_surf - + ds['z_surf_combined'] = np.maximum(ds['z_surf_combined'], ds['z_ice_surf']) ds['snow_height'] = np.maximum(0, ds['z_surf_combined'] - ds['z_ice_surf']) ds['z_ice_surf'] = ds['z_ice_surf'].where(ds.snow_height.notnull()) @@ -274,43 +274,43 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}): station_config) for var in df_out.columns: ds[var] = ('time', df_out[var].values) - + return ds def combine_surface_height(df, site_type, threshold_ablation = -0.0002): '''Combines the data from three sensor: the two sonic rangers and the pressure transducer, to recreate the surface height, the ice surface height and the snow depth through the years. For the accumulation sites, it is - only the average of the two sonic rangers (after manual adjustments to - correct maintenance shifts). For the ablation sites, first an ablation + only the average of the two sonic rangers (after manual adjustments to + correct maintenance shifts). For the ablation sites, first an ablation period is estimated each year (either the period when z_pt_cor decreases - or JJA if no better estimate) then different adjustmnents are conducted + or JJA if no better estimate) then different adjustmnents are conducted to stitch the three time series together: z_ice_surface (adjusted from z_pt_cor) or if unvailable, z_surf_2 (adjusted from z_stake) are used in the ablation period while an average of z_surf_1 and z_surf_2 - are used otherwise, after they are being adjusted to z_ice_surf at the end + are used otherwise, after they are being adjusted to z_ice_surf at the end of the ablation season. - + Parameters ---------- df : pandas.dataframe Dataframe with datetime index and variables z_surf_1, z_surf_2 and z_ice_surf - site_type : str + site_type : str Either 'accumulation' or 'ablation' threshold_ablation : float Threshold to which a z_pt_cor hourly decrease is compared. If the decrease is higher, then there is ablation. ''' logger.info('Combining surface height') - + if 'z_surf_2' not in df.columns: logger.info('-> did not find z_surf_2') df["z_surf_2"] = df["z_surf_1"].values*np.nan - + if 'z_ice_surf' not in df.columns: logger.info('-> did not find z_ice_surf') df["z_ice_surf"] = df["z_surf_1"].values*np.nan - + if site_type in ['accumulation', 'bedrock']: logger.info('-> no z_pt or accumulation site: averaging z_surf_1 and z_surf_2') df["z_surf_1_adj"] = hampel(df["z_surf_1"].interpolate(limit=72)).values @@ -326,7 +326,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): df['z_surf_combined'] = df.z_surf_2_adj.values # df["z_surf_combined"] = hampel(df["z_surf_combined"].interpolate(limit=72)).values - return (df['z_surf_combined'], df["z_surf_combined"]*np.nan, + return (df['z_surf_combined'], df["z_surf_combined"]*np.nan, df["z_surf_1_adj"], df["z_surf_2_adj"]) else: @@ -340,7 +340,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): df["z_surf_2_adj"] = hampel(df["z_surf_2"].interpolate(limit=72), k=24, t0=5).values # defining ice ablation period from the decrease of a smoothed version of z_pt - # meaning when smoothed_z_pt.diff() < threshold_ablation + # meaning when smoothed_z_pt.diff() < threshold_ablation # first smoothing smoothed_PT = (df['z_ice_surf'] .resample('h') @@ -354,14 +354,14 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): # smoothed_PT.loc[df.z_ice_surf.isnull()] = np.nan # logical index where ablation is detected - ind_ablation = np.logical_and(smoothed_PT.diff().values < threshold_ablation, + ind_ablation = np.logical_and(smoothed_PT.diff().values < threshold_ablation, np.isin(smoothed_PT.diff().index.month, [6, 7, 8, 9])) # finding the beginning and end of each period with True idx = np.argwhere(np.diff(np.r_[False,ind_ablation, False])).reshape(-1, 2) idx[:, 1] -= 1 - + # fill small gaps in the ice ablation periods. for i in range(len(idx)-1): ind = idx[i] @@ -371,20 +371,20 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): # season if df.index[ind_next[0]]-df.index[ind[1]]= period_start) & (df.index < period_end) ind_ablation[exclusion_period] = False - + hs1=df["z_surf_1_adj"].interpolate(limit=24*2).copy() hs2=df["z_surf_2_adj"].interpolate(limit=24*2).copy() z=df["z_ice_surf_adj"].interpolate(limit=24*2).copy() @@ -397,9 +397,9 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): if any(~np.isnan(hs2.iloc[:24*7])) & any(~np.isnan(hs1.iloc[:24*7])): hs2 = hs2 + hs1.iloc[:24*7].mean() - hs2.iloc[:24*7].mean() - + if any(~np.isnan(z.iloc[:24*7])): - # expressing ice surface height relative to its mean value in the + # expressing ice surface height relative to its mean value in the # first week of the record z = z - z.iloc[:24*7].mean() elif z.notnull().any(): @@ -414,16 +414,16 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): z.first_valid_index():(z.first_valid_index()+pd.to_timedelta('14D')) ].mean() + hs1.iloc[:24*7].mean() else: - # if there is more than a year (actually 251 days) between the + # if there is more than a year (actually 251 days) between the # initiation of the AWS and the installation of the pressure transducer # we remove the intercept in the pressure transducer data. - # Removing the intercept + # Removing the intercept # means that we consider the ice surface height at 0 when the AWS # is installed, and not when the pressure transducer is installed. Y = z.iloc[:].values.reshape(-1, 1) - X = z.iloc[~np.isnan(Y)].index.astype(np.int64).values.reshape(-1, 1) - Y = Y[~np.isnan(Y)] - linear_regressor = LinearRegression() + X = z.iloc[~np.isnan(Y)].index.astype(np.int64).values.reshape(-1, 1) + Y = Y[~np.isnan(Y)] + linear_regressor = LinearRegression() linear_regressor.fit(X, Y) Y_pred = linear_regressor.predict(z.index.astype(np.int64).values.reshape(-1, 1) ) z = z-Y_pred[0] @@ -444,7 +444,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): ind_abl_yr = np.logical_and(ind_yr, df.index.month.isin([6,7,8])) ind_ablation[ind_yr] = ind_abl_yr[ind_yr] logger.debug(str(y)+' no z_ice_surf, just using JJA') - + else: logger.debug(str(y)+ ' derived from z_ice_surf') @@ -494,7 +494,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): if all(np.isnan(z_jja)) and any(~np.isnan(hs2_jja)): # if there is no PT for a given year, but there is some hs2 - # then z will be adjusted to hs2 next time it is available + # then z will be adjusted to hs2 next time it is available hs2_ref = 1 if all(np.isnan(z_winter)) and all(np.isnan(hs2_winter)): @@ -518,7 +518,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): # in some instances, the PT data is available but no ablation # is recorded, then hs2 remains the reference during that time. - # When eventually there is ablation, then we need to find the + # When eventually there is ablation, then we need to find the # first index in these preceding ablation-free years # the shift will be applied back from this point # first_index = z[:z[str(y)].first_valid_index()].isnull().iloc[::-1].idxmax() @@ -538,13 +538,13 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): else: logger.debug('adjusting z to hs1') first_index = hs2.iloc[ind_start[i]:].first_valid_index() - z[first_index:] = z[first_index:] - z[first_index] + hs2[first_index] + z[first_index:] = z[first_index:] - z[first_index] + hs2[first_index] else: - logger.debug('adjusting z to hs1') - z[first_index:] = z[first_index:] - z[first_index] + hs2[first_index] + logger.debug('adjusting z to hs1') + z[first_index:] = z[first_index:] - z[first_index] + hs2[first_index] hs2_ref = 0 # from now on PT is the reference - - + + else: # if z_pt is the reference and there is some ablation # then hs1 and hs2 are adjusted to z_pt @@ -560,7 +560,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): [first_index, hs2_year.first_valid_index()])) - # if PT, hs1 and hs2 are all nan until station is reactivated, then + # if PT, hs1 and hs2 are all nan until station is reactivated, then first_day_of_year = pd.to_datetime(str(y)+'-01-01') if len(z[first_day_of_year:first_index-pd.to_timedelta('1D')])>0: @@ -568,8 +568,8 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): hs1[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all() & \ hs2[first_day_of_year:first_index-pd.to_timedelta('1D')].isnull().all(): if (~np.isnan(np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')])) \ - and ~np.isnan(np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]))): - logger.debug(' ======= adjusting hs1 and hs2 to z_pt') + and ~np.isnan(np.nanmean(hs2[first_index:first_index+pd.to_timedelta('1D')]))): + logger.debug(' ======= adjusting hs1 and hs2 to z_pt') if ~np.isnan(np.nanmean(hs1[first_index:first_index+pd.to_timedelta('1D')]) ): hs1[first_index:] = hs1[first_index:] \ - np.nanmean(hs1[first_index:first_index+pd.to_timedelta('1D')]) \ @@ -580,23 +580,23 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): + np.nanmean(z[first_index:first_index+pd.to_timedelta('1D')]) # adjustment taking place at the end of the ablation period - if (ind_end[i] != -999): + if (ind_end[i] != -999): # if y == 2023: # import pdb; pdb.set_trace() # if there's ablation and # if there are PT data available at the end of the melt season if z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*7)].notnull().any(): logger.debug('adjusting hs2 to z') - # then we adjust hs2 to the end-of-ablation z + # then we adjust hs2 to the end-of-ablation z # first trying at the end of melt season - if ~np.isnan(np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])): + if ~np.isnan(np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)])): logger.debug('using end of melt season') hs2.iloc[ind_end[i]:] = hs2.iloc[ind_end[i]:] - \ np.nanmean(hs2.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)]) + \ np.nanmean(z.iloc[(ind_end[i]-24*7):(ind_end[i]+24*30)]) # if not possible, then trying the end of the following accumulation season elif (i+1 < len(ind_start)): - if ind_start[i+1]!=-999 and any(~np.isnan(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])): + if ind_start[i+1]!=-999 and any(~np.isnan(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)])): logger.debug('using end of accumulation season') hs2.iloc[ind_end[i]:] = hs2.iloc[ind_end[i]:] - \ np.nanmean(hs2.iloc[(ind_start[i+1]-24*7):(ind_start[i+1]+24*7)]) + \ @@ -614,7 +614,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): if any(~np.isnan(hs1_following_winter)): logger.debug('to hs1') # then we adjust hs1 to hs2 during the accumulation area - # adjustment is done so that the mean hs1 and mean hs2 match + # adjustment is done so that the mean hs1 and mean hs2 match # for the period when both are available hs2_following_winter[np.isnan(hs1_following_winter)] = np.nan hs1_following_winter[np.isnan(hs2_following_winter)] = np.nan @@ -628,12 +628,12 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): hs2_following_winter = hs2[str(y)+'-09-01':str(y+1)+'-03-01'].copy() # adjusting hs1 to hs2 (no ablation case) if any(~np.isnan(hs1_following_winter)): - logger.debug('adjusting hs1') + logger.debug('adjusting hs1') # and if there are some hs2 during the accumulation period if any(~np.isnan(hs2_following_winter)): logger.debug('to hs2') # then we adjust hs1 to hs2 during the accumulation area - # adjustment is done so that the mean hs1 and mean hs2 match + # adjustment is done so that the mean hs1 and mean hs2 match # for the period when both are available hs1_following_winter[np.isnan(hs2_following_winter)] = np.nan hs2_following_winter[np.isnan(hs1_following_winter)] = np.nan @@ -652,7 +652,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): if any(~np.isnan(hs2_following_winter)): logger.debug('to hs2, minimizing winter difference') # then we adjust hs1 to hs2 during the accumulation area - # adjustment is done so that the mean hs1 and mean hs2 match + # adjustment is done so that the mean hs1 and mean hs2 match # for the period when both are available tmp1 = hs1.iloc[ind_end[i]:min(len(hs1),ind_end[i]+24*30*9)].copy() tmp2 = hs2.iloc[ind_end[i]:min(len(hs2),ind_end[i]+24*30*9)].copy() @@ -670,15 +670,15 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): # if no hs2, then use PT data available at the end of the melt season elif np.any(~np.isnan(z.iloc[(ind_end[i]-24*14):(ind_end[i]+24*7)])): logger.debug('to z') - # then we adjust hs2 to the end-of-ablation z + # then we adjust hs2 to the end-of-ablation z # first trying at the end of melt season - if ~np.isnan(np.nanmean(hs1.iloc[(ind_end[i]-24*14):(ind_end[i]+24*30)])): + if ~np.isnan(np.nanmean(hs1.iloc[(ind_end[i]-24*14):(ind_end[i]+24*30)])): logger.debug('using end of melt season') hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - \ np.nanmean(hs1.iloc[(ind_end[i]-24*14):(ind_end[i]+24*30)]) + \ np.nanmean(z.iloc[(ind_end[i]-24*14):(ind_end[i]+24*30)]) # if not possible, then trying the end of the following accumulation season - elif ind_start[i+1]!=-999 and any(~np.isnan(hs1.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)])): + elif ind_start[i+1]!=-999 and any(~np.isnan(hs1.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)]+ z.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)])): logger.debug('using end of accumulation season') hs1.iloc[ind_end[i]:] = hs1.iloc[ind_end[i]:] - \ np.nanmean(hs1.iloc[(ind_start[i+1]-24*14):(ind_start[i+1]+24*7)]) + \ @@ -686,7 +686,7 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): elif any(~np.isnan(hs2_year)): logger.debug('to the last value of hs2') # then we adjust hs1 to hs2 during the accumulation area - # adjustment is done so that the mean hs1 and mean hs2 match + # adjustment is done so that the mean hs1 and mean hs2 match # for the period when both are available half_span = pd.to_timedelta('7D') tmp1 = hs1_year.loc[(hs2_year.last_valid_index()-half_span):(hs2_year.last_valid_index()+half_span)].copy() @@ -702,25 +702,25 @@ def combine_surface_height(df, site_type, threshold_ablation = -0.0002): df["z_surf_combined"] = np.nan # in winter, both SR1 and SR2 are used - df["z_surf_combined"] = df["z_surf_2_adj"].interpolate(limit=72).values - + df["z_surf_combined"] = df["z_surf_2_adj"].interpolate(limit=72).values + # in ablation season we use SR2 instead of the SR1&2 average # here two options: # 1) we ignore the SR1 and only use SR2 - # 2) we use SR1 when SR2 is not available (commented) + # 2) we use SR1 when SR2 is not available (commented) # the later one can cause jumps when SR2 starts to be available few days after SR1 data_update = df[["z_surf_1_adj", "z_surf_2_adj"]].mean(axis=1).values - - ind_update = ~ind_ablation + + ind_update = ~ind_ablation #ind_update = np.logical_and(ind_ablation, ~np.isnan(data_update)) - df.loc[ind_update,"z_surf_combined"] = data_update[ind_update] + df.loc[ind_update,"z_surf_combined"] = data_update[ind_update] # in ablation season we use pressure transducer over all other options - data_update = df[ "z_ice_surf_adj"].interpolate(limit=72).values + data_update = df[ "z_ice_surf_adj"].interpolate(limit=72).values ind_update = np.logical_and(ind_ablation, ~np.isnan(data_update)) - df.loc[ind_update,"z_surf_combined"] = data_update[ind_update] - + df.loc[ind_update,"z_surf_combined"] = data_update[ind_update] + logger.info('surface height combination finished') return df['z_surf_combined'], df["z_ice_surf_adj"], df["z_surf_1_adj"], df["z_surf_2_adj"] @@ -730,7 +730,7 @@ def hampel(vals_orig, k=7*24, t0=15): k: size of window (including the sample; 7 is equal to 3 on either side of value) ''' #Make copy so original not edited - vals=vals_orig.copy() + vals=vals_orig.copy() #Hampel Filter L= 1.4826 rolling_median=vals.rolling(k).median() @@ -744,19 +744,19 @@ def hampel(vals_orig, k=7*24, t0=15): def get_thermistor_depth(df_in, site, station_config): - '''Calculates the depth of the thermistors through time based on their + '''Calculates the depth of the thermistors through time based on their installation depth (collected in a google sheet) and on the change of surface height: instruments getting buried under new snow or surfacing due to ablation. There is a potential for additional filtering of thermistor data for surfaced (or just noisy) thermistors, but that is currently deactivated because slow. - + Parameters ---------- df_in : pandas:dataframe - dataframe containing the ice/firn temperature t_i_* as well as the + dataframe containing the ice/firn temperature t_i_* as well as the combined surface height z_surf_combined site : str - stid, so that maintenance date and sensor installation depths can be found + stid, so that maintenance date and sensor installation depths can be found in database station_config : dict potentially containing the key string_maintenance @@ -768,17 +768,17 @@ def get_thermistor_depth(df_in, site, station_config): # Add more entries as needed ] ''' - + temp_cols_name = ['t_i_'+str(i) for i in range(12) if 't_i_'+str(i) in df_in.columns] num_therm = len(temp_cols_name) - depth_cols_name = ['d_t_i_'+str(i) for i in range(1,num_therm+1)] - + depth_cols_name = ['d_t_i_'+str(i) for i in range(1,num_therm+1)] + if df_in['z_surf_combined'].isnull().all(): logger.info('No valid surface height at '+site+', cannot calculate thermistor depth') df_in[depth_cols_name + ['t_i_10m']] = np.nan else: logger.info('Calculating thermistor depth') - + # Convert maintenance_info to DataFrame for easier manipulation maintenance_string = pd.DataFrame( station_config.get("string_maintenance",[]), @@ -786,14 +786,14 @@ def get_thermistor_depth(df_in, site, station_config): ) maintenance_string["date"] = pd.to_datetime(maintenance_string["date"]) maintenance_string = maintenance_string.sort_values(by='date', ascending=True) - + if num_therm == 8: ini_depth = [1, 2, 3, 4, 5, 6, 7, 10] else: ini_depth = [0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5] df_in[depth_cols_name] = np.nan - + # filtering the surface height surface_height = df_in["z_surf_combined"].copy() ind_filter = surface_height.rolling(window=14, center=True).var() > 0.1 @@ -801,7 +801,7 @@ def get_thermistor_depth(df_in, site, station_config): surface_height[ind_filter] = np.nan df_in["z_surf_combined"] = surface_height.values z_surf_interp = df_in["z_surf_combined"].interpolate() - + # first initialization of the depths for i, col in enumerate(depth_cols_name): df_in[col] = ( @@ -809,18 +809,18 @@ def get_thermistor_depth(df_in, site, station_config): + z_surf_interp.values - z_surf_interp[z_surf_interp.first_valid_index()] ) - + # reseting depth at maintenance if len(maintenance_string.date) == 0: logger.info("No maintenance at "+site) - + for date in maintenance_string.date: if date > z_surf_interp.last_valid_index(): continue new_depth = maintenance_string.loc[ maintenance_string.date == date ].installation_depths.values[0] - + for i, col in enumerate(depth_cols_name[:len(new_depth)]): tmp = df_in[col].copy() tmp.loc[date:] = ( @@ -831,11 +831,11 @@ def get_thermistor_depth(df_in, site, station_config): ] ) df_in[col] = tmp.values - + # % Filtering thermistor data for i in range(len(temp_cols_name)): tmp = df_in[temp_cols_name[i]].copy() - + # variance filter # ind_filter = ( # df_in[temp_cols_name[i]] @@ -850,7 +850,7 @@ def get_thermistor_depth(df_in, site, station_config): # ind_filter.loc[np.isin(month, [5, 6, 7])] = False # if any(ind_filter): # tmp.loc[ind_filter] = np.nan - + # before and after maintenance adaptation filter if len(maintenance_string.date) > 0: for date in maintenance_string.date: @@ -866,7 +866,7 @@ def get_thermistor_depth(df_in, site, station_config): ) < np.timedelta64(7, "D") if any(ind_adapt): tmp.loc[ind_adapt] = np.nan - + # surfaced thermistor ind_pos = df_in[depth_cols_name[i]] < 0.1 if any(ind_pos): @@ -874,7 +874,7 @@ def get_thermistor_depth(df_in, site, station_config): # copying the filtered values to the original table df_in[temp_cols_name[i]] = tmp.values - + # removing negative depth df_in.loc[df_in[depth_cols_name[i]]<0, depth_cols_name[i]] = np.nan logger.info("interpolating 10 m firn/ice temperature") @@ -885,7 +885,7 @@ def get_thermistor_depth(df_in, site, station_config): kind="linear", min_diff_to_depth=1.5, ).set_index('date').values - + # filtering ind_pos = df_in["t_i_10m"] > 0.1 ind_low = df_in["t_i_10m"] < -70 @@ -897,12 +897,12 @@ def get_thermistor_depth(df_in, site, station_config): def interpolate_temperature(dates, depth_cor, temp, depth=10, min_diff_to_depth=2, kind="quadratic"): - '''Calculates the depth of the thermistors through time based on their + '''Calculates the depth of the thermistors through time based on their installation depth (collected in a google sheet) and on the change of surface height: instruments getting buried under new snow or surfacing due to ablation. There is a potential for additional filtering of thermistor data for surfaced (or just noisy) thermistors, but that is currently deactivated because slow. - + Parameters ---------- dates : numpy.array @@ -959,20 +959,20 @@ def gps_coordinate_postprocessing(ds, var, station_config={}): static_value = float(ds.attrs[coord_names[var_out]]) else: static_value = np.nan - + # if there is no gps observations, then we use the static value repeated # for each time stamp - if var not in ds.data_vars: + if var not in ds.data_vars: print('no',var,'at', ds.attrs['station_id']) return np.ones_like(ds['t_u'].data)*static_value - + if ds[var].isnull().all(): print('no',var,'at',ds.attrs['station_id']) return np.ones_like(ds['t_u'].data)*static_value - + # Extract station relocations from the config dict station_relocations = station_config.get("station_relocation", []) - + # Convert the ISO8601 strings to pandas datetime objects breaks = [pd.to_datetime(date_str) for date_str in station_relocations] if len(breaks)==0: @@ -981,16 +981,16 @@ def gps_coordinate_postprocessing(ds, var, station_config={}): logger.info('processing '+var+' with relocation on ' + ', '.join([br.strftime('%Y-%m-%dT%H:%M:%S') for br in breaks])) return piecewise_smoothing_and_interpolation(ds[var].to_series(), breaks) - + def piecewise_smoothing_and_interpolation(data_series, breaks): - '''Smoothes, inter- or extrapolate the GPS observations. The processing is - done piecewise so that each period between station relocations are done - separately (no smoothing of the jump due to relocation). Piecewise linear - regression is then used to smooth the available observations. Then this - smoothed curve is interpolated linearly over internal gaps. Eventually, this - interpolated curve is extrapolated linearly for timestamps before the first + '''Smoothes, inter- or extrapolate the GPS observations. The processing is + done piecewise so that each period between station relocations are done + separately (no smoothing of the jump due to relocation). Piecewise linear + regression is then used to smooth the available observations. Then this + smoothed curve is interpolated linearly over internal gaps. Eventually, this + interpolated curve is extrapolated linearly for timestamps before the first valid measurement and after the last valid measurement. - + Parameters ---------- data_series : pandas.Series @@ -998,7 +998,7 @@ def piecewise_smoothing_and_interpolation(data_series, breaks): breaks: list List of timestamps of station relocation. First and last item should be None so that they can be used in slice(breaks[i], breaks[i+1]) - + Returns ------- np.ndarray @@ -1009,44 +1009,44 @@ def piecewise_smoothing_and_interpolation(data_series, breaks): _inferred_series = [] for i in range(len(breaks) - 1): df = data_series.loc[slice(breaks[i], breaks[i+1])] - + # Drop NaN values and calculate the number of segments based on valid data df_valid = df.dropna() - if df_valid.shape[0] > 2: + if df_valid.shape[0] > 2: # Fit linear regression model to the valid data range x = pd.to_numeric(df_valid.index).values.reshape(-1, 1) y = df_valid.values.reshape(-1, 1) - + model = LinearRegression() model.fit(x, y) - + # Predict using the model for the entire segment range x_pred = pd.to_numeric(df.index).values.reshape(-1, 1) - + y_pred = model.predict(x_pred) df = pd.Series(y_pred.flatten(), index=df.index) # adds to list the predicted values for the current segment _inferred_series.append(df) - + df_all = pd.concat(_inferred_series) - + # Fill internal gaps with linear interpolation df_all = df_all.interpolate(method='linear', limit_area='inside') - + # Remove duplicate indices and return values as numpy array df_all = df_all[~df_all.index.duplicated(keep='last')] return df_all.values - -def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, - kappa=0.4, WS_lim=1., z_0=0.001, g=9.82, es_0=6.1071, eps=0.622, - gamma=16., L_sub=2.83e6, L_dif_max=0.01, c_pd=1005., aa=0.7, - bb=0.75, cc=5., dd=0.35, R_d=287.05): - '''Calculate latent and sensible heat flux using the bulk calculation - method - + +def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, + kappa=0.4, WS_lim=1., z_0=0.001, g=9.82, es_0=6.1071, eps=0.622, + gamma=16., L_sub=2.83e6, L_dif_max=0.01, c_pd=1005., aa=0.7, + bb=0.75, cc=5., dd=0.35, R_d=287.05): + '''Calculate latent and sensible heat flux using the bulk calculation + method + Parameters ---------- - T_0 : int + T_0 : int Freezing point temperature T_h : xarray.DataArray Air temperature @@ -1065,55 +1065,55 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, p_h : xarray.DataArray Air pressure kappa : int - Von Karman constant (0.35-0.42). Default is 0.4. + Von Karman constant (0.35-0.42). Default is 0.4. WS_lim : int - Default is 1. + Default is 1. z_0 : int - Aerodynamic surface roughness length for momention, assumed constant + Aerodynamic surface roughness length for momention, assumed constant for all ice/snow surfaces. Default is 0.001. - g : int - Gravitational acceleration (m/s2). Default is 9.82. - es_0 : int - Saturation vapour pressure at the melting point (hPa). Default is 6.1071. - eps : int + g : int + Gravitational acceleration (m/s2). Default is 9.82. + es_0 : int + Saturation vapour pressure at the melting point (hPa). Default is 6.1071. + eps : int Ratio of molar masses of vapor and dry air (0.622). gamma : int Flux profile correction (Paulson & Dyer). Default is 16.. - L_sub : int + L_sub : int Latent heat of sublimation (J/kg). Default is 2.83e6. - L_dif_max : int - Default is 0.01. + L_dif_max : int + Default is 0.01. c_pd : int Specific heat of dry air (J/kg/K). Default is 1005.. - aa : int - Flux profile correction constants (Holtslag & De Bruin '88). Default is + aa : int + Flux profile correction constants (Holtslag & De Bruin '88). Default is 0.7. - bb : int - Flux profile correction constants (Holtslag & De Bruin '88). Default is + bb : int + Flux profile correction constants (Holtslag & De Bruin '88). Default is 0.75. cc : int - Flux profile correction constants (Holtslag & De Bruin '88). Default is + Flux profile correction constants (Holtslag & De Bruin '88). Default is 5. dd : int - Flux profile correction constants (Holtslag & De Bruin '88). Default is + Flux profile correction constants (Holtslag & De Bruin '88). Default is 0.35. - R_d : int + R_d : int Gas constant of dry air. Default is 287.05. - + Returns ------- SHF_h : xarray.DataArray Sensible heat flux LHF_h : xarray.DataArray Latent heat flux - ''' - rho_atm = 100 * p_h / R_d / (T_h + T_0) # Calculate atmospheric density - nu = calculate_viscosity(T_h, T_0, rho_atm) # Calculate kinematic viscosity - + ''' + rho_atm = 100 * p_h / R_d / (T_h + T_0) # Calculate atmospheric density + nu = calculate_viscosity(T_h, T_0, rho_atm) # Calculate kinematic viscosity + SHF_h = xr.zeros_like(T_h) # Create empty xarrays LHF_h = xr.zeros_like(T_h) L = xr.full_like(T_h, 1E5) - + u_star = kappa * WS_h.where(WS_h>0) / np.log(z_WS / z_0) # Rough surfaces, from Smeets & Van den Broeke 2008 Re = u_star * z_0 / nu z_0h = u_star @@ -1126,12 +1126,12 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, * (1 - (Tsurf_h + T_0) / T_0) + np.log10(es_0)) q_surf = eps * es_ice_surf / (p_h - (1 - eps) * es_ice_surf) - theta = T_h + z_T *g / c_pd + theta = T_h + z_T *g / c_pd stable = (theta > Tsurf_h) & (WS_h > WS_lim) unstable = (theta < Tsurf_h) & (WS_h > WS_lim) #TODO: check if unstable = ~stable? And if not why not - #no_wind = (WS_h <= WS_lim) + #no_wind = (WS_h <= WS_lim) # Calculate stable stratification - for i in np.arange(0,31): + for i in np.arange(0,31): psi_m1 = -(aa* z_0/L[stable] + bb*( z_0/L[stable]-cc/dd)*np.exp(-dd* z_0/L[stable]) + bb*cc/dd) psi_m2 = -(aa*z_WS[stable]/L[stable] + bb*(z_WS[stable]/L[stable]-cc/dd)*np.exp(-dd*z_WS[stable]/L[stable]) + bb*cc/dd) psi_h1 = -(aa*z_0h[stable]/L[stable] + bb*(z_0h[stable]/L[stable]-cc/dd)*np.exp(-dd*z_0h[stable]/L[stable]) + bb*cc/dd) @@ -1139,8 +1139,8 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, u_star[stable] = kappa*WS_h[stable]/(np.log(z_WS[stable]/z_0)-psi_m2+psi_m1) Re[stable] = u_star[stable]*z_0/nu[stable] z_0h[stable] = z_0*np.exp(1.5-0.2*np.log(Re[stable])-0.11*(np.log(Re[stable]))**2) - - # If n_elements(where(z_0h[stable] < 1e-6)) get 1 then + + # If n_elements(where(z_0h[stable] < 1e-6)) get 1 then # z_0h[stable[where(z_0h[stable] < 1e-6)]] = 1e-6 z_0h[stable][z_0h[stable] < 1E-6] == 1E-6 th_star = kappa \ @@ -1156,12 +1156,12 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, * (1 + ((1-eps) / eps) * q_h[stable]) \ / (g * kappa * th_star * (1 + ((1-eps)/eps) * q_star)) L_dif = np.abs((L_prev-L[stable])/L_prev) - + # If n_elements(where(L_dif > L_dif_max)) eq 1 then break if np.all(L_dif <= L_dif_max): break - # Calculate unstable stratification + # Calculate unstable stratification if len(unstable) > 0: for i in np.arange(0,21): x1 = (1-gamma*z_0 /L[unstable])**0.25 @@ -1176,8 +1176,8 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, Re[unstable] = u_star[unstable]*z_0/nu[unstable] z_0h[unstable] = z_0 * np.exp(1.5 - 0.2 * np.log(Re[unstable]) - 0.11 \ * (np.log(Re[unstable]))**2) - - # If n_elements(where(z_0h[unstable] < 1e-6)) > 1 then + + # If n_elements(where(z_0h[unstable] < 1e-6)) > 1 then # z_0h[unstable[where(z_0h[unstable] < 1e-6)]] = 1e-6 z_0h[stable][z_0h[stable] < 1E-6] == 1E-6 th_star = kappa * (theta[unstable] - Tsurf_h[unstable]) \ @@ -1191,7 +1191,7 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, * ( 1 + ((1-eps) / eps) * q_h[unstable]) \ / (g * kappa * th_star * ( 1 + ((1-eps) / eps) * q_star)) L_dif = abs((L_prev-L[unstable])/L_prev) - + # If n_elements(where(L_dif > L_dif_max)) eq 1 then break if np.all(L_dif <= L_dif_max): break @@ -1199,12 +1199,12 @@ def calculate_tubulent_heat_fluxes(T_0, T_h, Tsurf_h, WS_h, z_WS, z_T, q_h, p_h, HF_nan = np.isnan(p_h) | np.isnan(T_h) | np.isnan(Tsurf_h) \ | np.isnan(q_h) | np.isnan(WS_h) | np.isnan(z_T) SHF_h[HF_nan] = np.nan - LHF_h[HF_nan] = np.nan + LHF_h[HF_nan] = np.nan return SHF_h, LHF_h -def calculate_viscosity(T_h, T_0, rho_atm): +def calculate_viscosity(T_h, T_0, rho_atm): '''Calculate kinematic viscosity of air - + Parameters ---------- T_h : xarray.DataArray @@ -1213,7 +1213,7 @@ def calculate_viscosity(T_h, T_0, rho_atm): Steam point temperature rho_atm : xarray.DataArray Surface temperature - + Returns ------- xarray.DataArray @@ -1221,15 +1221,15 @@ def calculate_viscosity(T_h, T_0, rho_atm): ''' # Dynamic viscosity of air in Pa s (Sutherlands' equation using C = 120 K) mu = 18.27e-6 * (291.15 + 120) / ((T_h + T_0) + 120) * ((T_h + T_0) / 291.15)**1.5 - + # Kinematic viscosity of air in m^2/s - return mu / rho_atm + return mu / rho_atm -def calculate_specific_humidity(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_100=1013.246, eps=0.622): +def calculate_specific_humidity(T_0, T_100, T_h, p_h, rh_h_wrt_ice_or_water, es_0=6.1071, es_100=1013.246, eps=0.622): '''Calculate specific humidity Parameters ---------- - T_0 : float + T_0 : float Steam point temperature. Default is 273.15. T_100 : float Steam point temperature in Kelvin @@ -1237,20 +1237,20 @@ def calculate_specific_humidity(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_ Air temperature p_h : xarray.DataArray Air pressure - RH_cor_h : xarray.DataArray + rh_h_wrt_ice_or_water : xarray.DataArray Relative humidity corrected es_0 : float Saturation vapour pressure at the melting point (hPa) es_100 : float Saturation vapour pressure at steam point temperature (hPa) - eps : int + eps : int ratio of molar masses of vapor and dry air (0.622) - + Returns ------- xarray.DataArray Specific humidity data array - ''' + ''' # Saturation vapour pressure above 0 C (hPa) es_wtr = 10**(-7.90298 * (T_100 / (T_h + T_0) - 1) + 5.02808 * np.log10(T_100 / (T_h + T_0)) - 1.3816E-7 * (10**(11.344 * (1 - (T_h + T_0) / T_100)) - 1) @@ -1260,21 +1260,21 @@ def calculate_specific_humidity(T_0, T_100, T_h, p_h, RH_cor_h, es_0=6.1071, es_ es_ice = 10**(-9.09718 * (T_0 / (T_h + T_0) - 1) - 3.56654 * np.log10(T_0 / (T_h + T_0)) + 0.876793 * (1 - (T_h + T_0) / T_0) - + np.log10(es_0)) + + np.log10(es_0)) # Specific humidity at saturation (incorrect below melting point) - q_sat = eps * es_wtr / (p_h - (1 - eps) * es_wtr) - + q_sat = eps * es_wtr / (p_h - (1 - eps) * es_wtr) + # Replace saturation specific humidity values below melting point - freezing = T_h < 0 + freezing = T_h < 0 q_sat[freezing] = eps * es_ice[freezing] / (p_h[freezing] - (1 - eps) * es_ice[freezing]) - + q_nan = np.isnan(T_h) | np.isnan(p_h) q_sat[q_nan] = np.nan # Convert to kg/kg - return RH_cor_h * q_sat / 100 - -if __name__ == "__main__": - # unittest.main() - pass + return rh_h_wrt_ice_or_water * q_sat / 100 + +if __name__ == "__main__": + # unittest.main() + pass diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index 1d203ad4..e1285fee 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -94,7 +94,7 @@ def __init__( formats = {dataset.attrs["format"].lower() for dataset in self.L0} if "raw" in formats: self.format = "raw" - elif "STM" in formats: + elif "stm" in formats: self.format = "STM" elif "tx" in formats: self.format = "tx" diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index 2280901e..598e6035 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -51,12 +51,13 @@ def resample_dataset(ds_h, t): # taking the 10 min data and using it as instantaneous values: is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) if (t == '60min') and is_10_minutes_timestamp.any(): - cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] + cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_wrt_ice_or_water', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] + cols_origin = ['p_u', 't_u', 'rh_u', 'rh_u_wrt_ice_or_water', 'wspd_u', 'wdir_u','wspd_x_u','wspd_y_u'] timestamp_10min = ds_h.time.where(is_10_minutes_timestamp, drop=True).to_index() timestamp_round_hour = df_d.index timestamp_to_update = timestamp_round_hour.intersection(timestamp_10min) - for col in cols_to_update: + for col, col_org in zip(cols_to_update, cols_origin): if col not in df_d.columns: df_d[col] = np.nan else: @@ -67,7 +68,7 @@ def resample_dataset(ds_h, t): timestamp_to_update = timestamp_to_update[missing_instantaneous] df_d.loc[timestamp_to_update, col] = ds_h.reindex( time= timestamp_to_update - )[col.replace('_i','_u')].values + )[col_org].values if col == 'p_i': df_d.loc[timestamp_to_update, col] = df_d.loc[timestamp_to_update, col].values-1000 @@ -95,8 +96,8 @@ def resample_dataset(ds_h, t): df_d[var] = (p_vap.to_series().resample(t).mean() \ / es_wtr.to_series().resample(t).mean())*100 - if var+'_cor' in df_d.keys(): - df_d[var+'_cor'] = (p_vap.to_series().resample(t).mean() \ + if var+'_wrt_ice_or_water' in df_d.keys(): + df_d[var+'_wrt_ice_or_water'] = (p_vap.to_series().resample(t).mean() \ / es_cor.to_series().resample(t).mean())*100 # passing each variable attribute to the ressample dataset diff --git a/src/pypromice/process/value_clipping.py b/src/pypromice/process/value_clipping.py index f65cad52..dfb26b77 100644 --- a/src/pypromice/process/value_clipping.py +++ b/src/pypromice/process/value_clipping.py @@ -11,8 +11,6 @@ def clip_values( ): """ Clip values in dataset to defined "hi" and "lo" variables from dataframe. - There is a special treatment here for rh_u and rh_l variables, where values - are clipped and not assigned to NaN. This is for replication purposes Parameters ---------- diff --git a/src/pypromice/process/write.py b/src/pypromice/process/write.py index 77e24863..181ed045 100644 --- a/src/pypromice/process/write.py +++ b/src/pypromice/process/write.py @@ -100,14 +100,14 @@ def prepare_and_write( elif t == 86400: # removing instantaneous values from daily and monthly files for v in col_names: - if ("_i" in v) and ("_i_" not in v): + if v in ['p_i', 't_i', 'rh_i', 'wspd_i', 'wdir_i', 'wspd_x_i', 'wspd_y_i']: col_names.remove(v) out_csv = output_dir / f"{name}_day.csv" out_nc = output_dir / f"{name}_day.nc" else: # removing instantaneous values from daily and monthly files for v in col_names: - if ("_i" in v) and ("_i_" not in v): + if v in ['p_i', 't_i', 'rh_i', 'wspd_i', 'wdir_i', 'wspd_x_i', 'wspd_y_i']: col_names.remove(v) out_csv = output_dir / f"{name}_month.csv" out_nc = output_dir / f"{name}_month.nc" diff --git a/src/pypromice/resources/variable_aliases_GC-Net.csv b/src/pypromice/resources/variable_aliases_GC-Net.csv index afac35f7..32a51c1b 100644 --- a/src/pypromice/resources/variable_aliases_GC-Net.csv +++ b/src/pypromice/resources/variable_aliases_GC-Net.csv @@ -5,10 +5,10 @@ p_l, t_u,TA2 t_l,TA1 rh_u,RH2 -rh_u_cor,RH2_cor +rh_u_wrt_ice_or_water,RH2_cor qh_u,Q2 rh_l,RH1 -rh_l_cor,RH1_cor +rh_l_wrt_ice_or_water,RH1_cor qh_l,Q1 wspd_u,VW2 wspd_l,VW1 diff --git a/src/pypromice/resources/variables.csv b/src/pypromice/resources/variables.csv index ec6a3b15..5b3f7714 100644 --- a/src/pypromice/resources/variables.csv +++ b/src/pypromice/resources/variables.csv @@ -6,10 +6,10 @@ p_l,air_pressure,Air pressure (lower boom),hPa,physicalMeasurement,time,FALSE,,6 t_u,air_temperature,Air temperature (upper boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,"",all,1,1,1,4 t_l,air_temperature,Air temperature (lower boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,"",two-boom,1,1,1,4 rh_u,relative_humidity,Relative humidity (upper boom),%,physicalMeasurement,time,FALSE,,0,100,"",all,1,1,1,4 -rh_u_cor,relative_humidity_corrected,Relative humidity (upper boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,"",all,0,1,1,4 +rh_u_wrt_ice_or_water,relative_humidity_with_respect_to_ice_or_water,Relative humidity (upper boom) with respect to saturation over ice in subfreezing conditions and over water otherwise,%,modelResult,time,FALSE,L2 or later,0,150,"",all,0,1,1,4 qh_u,specific_humidity,Specific humidity (upper boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,"",all,0,1,1,4 rh_l,relative_humidity,Relative humidity (lower boom),%,physicalMeasurement,time,FALSE,,0,100,"",two-boom,1,1,1,4 -rh_l_cor,relative_humidity_corrected,Relative humidity (lower boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,"",two-boom,0,1,1,4 +rh_l_wrt_ice_or_water,relative_humidity_with_respect_to_ice_or_water,Relative humidity (lower boom) with respect to saturation over ice in subfreezing conditions and over water otherwise,%,modelResult,time,FALSE,L2 or later,0,150,"",two-boom,0,1,1,4 qh_l,specific_humidity,Specific humidity (lower boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,,two-boom,0,1,1,4 wspd_u,wind_speed,Wind speed (upper boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_u wspd_x_u wspd_y_u,all,1,1,1,4 wspd_l,wind_speed,Wind speed (lower boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_l wspd_x_l wspd_y_l,two-boom,1,1,1,4 @@ -99,7 +99,7 @@ t_rad,temperature_of_radiation_sensor,Radiation sensor temperature,degrees_C,phy p_i,air_pressure,Air pressure (instantaneous) minus 1000,hPa,physicalMeasurement,time,TRUE,,-350,100,,all,1,1,1,4 t_i,air_temperature,Air temperature (instantaneous),degrees_C,physicalMeasurement,time,TRUE,,-80,40,,all,1,1,1,4 rh_i,relative_humidity,Relative humidity (instantaneous),%,physicalMeasurement,time,TRUE,,0,150,"",all,1,1,1,4 -rh_i_cor,relative_humidity_corrected,Relative humidity (instantaneous) – corrected,%,modelResult,time,TRUE,L2 or later,0,100,,all,0,1,1,4 +rh_i_wrt_ice_or_water,relative_humidity_with_respect_to_ice_or_water,Relative humidity (instantaneous) with respect to saturation over ice in subfreezing conditions and over water otherwise,%,modelResult,time,TRUE,L2 or later,0,100,,all,0,1,1,4 wspd_i,wind_speed,Wind speed (instantaneous),m s-1,physicalMeasurement,time,TRUE,,0,100,wdir_i wspd_x_i wspd_y_i,all,1,1,1,4 wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,physicalMeasurement,time,TRUE,,1,360,wspd_x_i wspd_y_i,all,1,1,1,4 wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,"",all,0,1,1,4 diff --git a/tests/unit/bufr_export/tx_l3_test1.csv b/tests/unit/bufr_export/tx_l3_test1.csv index c9aac70c..c36e441d 100755 --- a/tests/unit/bufr_export/tx_l3_test1.csv +++ b/tests/unit/bufr_export/tx_l3_test1.csv @@ -1,4 +1,4 @@ -time,p_u,p_l,t_u,t_l,rh_u,rh_u_cor,qh_u,rh_l,rh_l_cor,qh_l,wspd_u,wspd_l,wdir_u,wdir_l,wspd_x_u,wspd_y_u,wspd_x_l,wspd_y_l,dsr,dsr_cor,usr,usr_cor,albedo,dlr,ulr,cc,t_surf,dlhf_u,dlhf_l,dshf_u,dshf_l,z_boom_u,z_boom_l,precip_u,precip_u_cor,precip_u_rate,precip_l,precip_l_cor,precip_l_rate,t_i_1,t_i_2,t_i_3,t_i_4,t_i_5,t_i_6,t_i_7,t_i_8,t_i_9,t_i_10,t_i_11,tilt_x,tilt_y,rot,gps_lat,gps_lon,gps_alt,gps_time,gps_hdop,batt_v,fan_dc_u,fan_dc_l,t_rad,p_i,t_i,rh_i,rh_i_cor,wspd_i,wdir_i,wspd_x_i,wspd_y_i,msg_i,msg_lat,msg_lon +time,p_u,p_l,t_u,t_l,rh_u,rh_u_wrt_ice_or_water,qh_u,rh_l,rh_l_wrt_ice_or_water,qh_l,wspd_u,wspd_l,wdir_u,wdir_l,wspd_x_u,wspd_y_u,wspd_x_l,wspd_y_l,dsr,dsr_cor,usr,usr_cor,albedo,dlr,ulr,cc,t_surf,dlhf_u,dlhf_l,dshf_u,dshf_l,z_boom_u,z_boom_l,precip_u,precip_u_cor,precip_u_rate,precip_l,precip_l_cor,precip_l_rate,t_i_1,t_i_2,t_i_3,t_i_4,t_i_5,t_i_6,t_i_7,t_i_8,t_i_9,t_i_10,t_i_11,tilt_x,tilt_y,rot,gps_lat,gps_lon,gps_alt,gps_time,gps_hdop,batt_v,fan_dc_u,fan_dc_l,t_rad,p_i,t_i,rh_i,rh_i_cor,wspd_i,wdir_i,wspd_x_i,wspd_y_i,msg_i,msg_lat,msg_lon 2023-12-01 00:00:00,784.5,785.8,-16.32,-16.3,77.87,91.2846,1.0583,80.1,93.8804,1.0887,16.33,15.27,113.2,122.8,15.0095,-6.4331,12.8355,-8.2719,-1.9282,0.0,0.5033,0.0,,182.6197,241.7566,0.3205,-17.134,-1.6006,1.6393,30.3492,31.1479,4.1967,2.946,129.4,136.5924,0.0,304.0,315.0223,0.0,,-8.33,-7.41,-6.43,-5.9,-5.64,-5.51,-6.01,,,-12.46,0.6404,0.8809,218.9,66.482488,-46.29424,2135.0,21.0,0.9,12.72,35.15,11.81,-16.44,-213.9,-16.4,82.0,96.2012,15.48,122.3,13.0847,-8.2718,0.0,66.5026,-46.33518 2023-12-01 01:00:00,784.1,785.5,-15.82,-15.92,76.17,88.8564,1.0798,78.75,91.956,1.1052,15.85,14.83,112.3,126.2,14.6646,-6.0144,11.9672,-8.7587,-2.0466,0.0,0.7549,0.0,,178.4223,242.0001,0.2567,-17.034,-0.5022,2.1859,43.1898,40.0931,4.2047,2.9472,129.4,136.5924,0.0,304.0,315.0223,0.0,,-8.33,-7.41,-6.43,-5.9,-5.64,-5.51,-6.01,,,-12.46,0.631,0.5809,225.6,66.482471,-46.29426,2124.0,10022.0,0.73,12.74,30.55,11.82,-16.06,-214.4,-16.2,79.4,92.9691,15.28,118.8,13.39,-7.3612,0.0,66.5026,-46.33518 2023-12-01 02:00:00,784.1,785.5,-15.55,-15.6,76.88,89.4484,1.1146,79.15,92.1345,1.1408,15.55,14.57,117.1,129.6,13.8428,-7.0837,11.2264,-9.2873,-2.0272,0.0,0.6131,0.0,,182.2089,243.3796,0.2862,-16.6921,-0.5438,2.18,40.0149,38.6836,4.1981,2.9441,129.4,136.5924,0.0,304.0,315.0223,0.0,,-8.33,-7.41,-6.43,-5.9,-5.64,-5.51,-6.01,,,-12.46,0.3333,0.8761,231.1,66.482326,-46.294294,2121.0,20021.0,1.04,12.73,30.8,12.34,-15.74,-214.5,-15.6,79.0,91.9599,14.63,138.5,9.6941,-10.9572,0.0,66.50363,-46.20806 diff --git a/tests/unit/test_value_clippping.py b/tests/unit/test_value_clippping.py index 52f33234..8e90655d 100644 --- a/tests/unit/test_value_clippping.py +++ b/tests/unit/test_value_clippping.py @@ -198,32 +198,32 @@ def test_circular_dependencies(self): check_dtype=True, ) - def test_rh_corrected(self): + def test_rh_adjusted(self): variable_config = pd.DataFrame( columns=["field", "lo", "hi", "OOL"], data=[ - ["rh_u", 0, 150, "rh_u_cor"], - ["rh_u_cor", 0, 150, ""], + ["rh_u", 0, 150, "rh_u_wrt_ice_or_water"], + ["rh_u_wrt_ice_or_water", 0, 150, ""], ], ).set_index("field") rows_input = [] rows_expected = [] # All values are within the expected range - rows_input.append(dict(rh_u=42, rh_u_cor=43)) - rows_expected.append(dict(rh_u=42, rh_u_cor=43)) - # rh_u is below range, but rh_u_cor is within range. Both should be flagged due to the OOL relationship - rows_input.append(dict(rh_u=-10, rh_u_cor=3)) - rows_expected.append(dict(rh_u=np.nan, rh_u_cor=np.nan)) - # rh_u is within range, but rh_u_cor is below range; rh_u_cor should be flagged - rows_input.append(dict(rh_u=54, rh_u_cor=-4)) - rows_expected.append(dict(rh_u=54, rh_u_cor=np.nan)) - # rh_u is above range, but rh_u_cor is within range. Both should be flagged due to the OOL relationship - rows_input.append(dict(rh_u=160, rh_u_cor=120)) - rows_expected.append(dict(rh_u=np.nan, rh_u_cor=np.nan)) - # rh_u is within range, but rh_u_cor is above range; rh_u_cor should be flagged - rows_input.append(dict(rh_u=100, rh_u_cor=255)) - rows_expected.append(dict(rh_u=100, rh_u_cor=np.nan)) + rows_input.append(dict(rh_u=42, rh_u_wrt_ice_or_water=43)) + rows_expected.append(dict(rh_u=42, rh_u_wrt_ice_or_water=43)) + # rh_u is below range, but rh_u_wrt_ice_or_water is within range. Both should be flagged due to the OOL relationship + rows_input.append(dict(rh_u=-10, rh_u_wrt_ice_or_water=3)) + rows_expected.append(dict(rh_u=np.nan, rh_u_wrt_ice_or_water=np.nan)) + # rh_u is within range, but rh_u_wrt_ice_or_water is below range; rh_u_wrt_ice_or_water should be flagged + rows_input.append(dict(rh_u=54, rh_u_wrt_ice_or_water=-4)) + rows_expected.append(dict(rh_u=54, rh_u_wrt_ice_or_water=np.nan)) + # rh_u is above range, but rh_u_wrt_ice_or_water is within range. Both should be flagged due to the OOL relationship + rows_input.append(dict(rh_u=160, rh_u_wrt_ice_or_water=120)) + rows_expected.append(dict(rh_u=np.nan, rh_u_wrt_ice_or_water=np.nan)) + # rh_u is within range, but rh_u_wrt_ice_or_water is above range; rh_u_wrt_ice_or_water should be flagged + rows_input.append(dict(rh_u=100, rh_u_wrt_ice_or_water=255)) + rows_expected.append(dict(rh_u=100, rh_u_wrt_ice_or_water=np.nan)) # Prepare the data df_input = pd.DataFrame(rows_input, dtype=float)