Skip to content

Commit

Permalink
Bugfix/passing adj dir to l3 processing plus attribute fix (#292)
Browse files Browse the repository at this point in the history
* passing adjustment_dir to L2toL3.py

* fixing attributes in join_l3

- station_attribute containing info from merged dataset was lost when concatenating the datasets
- The key "source" is not present in the attributes of the old GC-Net files so `station_source = json.loads(station_attributes["source"])` was throwing an error

* give data_issues_path to get_l2tol3 in test_process

* using data_adjustments_dir as input in AWS.getL3

* adding path to dummy data_issues folder to process_test

* making sure data_issues_path  is Path in get_l2tol3
  • Loading branch information
BaptisteVandecrux authored Aug 20, 2024
1 parent 3357e62 commit b29958a
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 30 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/process_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
mkdir $GITHUB_WORKSPACE/out/L0toL2/
mkdir $GITHUB_WORKSPACE/data_issues
for i in $(echo ${{ env.TEST_STATION }} | tr ' ' '\n'); do
python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2.py -c $GITHUB_WORKSPACE/aws-l0/tx/config/$i.toml -i $GITHUB_WORKSPACE/aws-l0/tx --issues $GITHUB_WORKSPACE/data_issues -o $GITHUB_WORKSPACE/out/L0toL2/
python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2.py -c $GITHUB_WORKSPACE/aws-l0/tx/config/$i.toml -i $GITHUB_WORKSPACE/aws-l0/tx --issues $GITHUB_WORKSPACE/data_issues -o $GITHUB_WORKSPACE/out/L0toL2/ --data_issues_path $GITHUB_WORKSPACE/data_issues
done
- name: Run L2 to L3 processing
env:
Expand All @@ -50,7 +50,7 @@ jobs:
mkdir $GITHUB_WORKSPACE/out/L2toL3/
for i in $(echo ${{ env.TEST_STATION }} | tr ' ' '\n'); do
echo ${i}_hour.nc
python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2tol3.py -c $GITHUB_WORKSPACE/aws-l0/metadata/station_configurations/ -i $GITHUB_WORKSPACE/out/L0toL2/${i}/${i}_hour.nc -o $GITHUB_WORKSPACE/out/L2toL3/
python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2tol3.py -c $GITHUB_WORKSPACE/aws-l0/metadata/station_configurations/ -i $GITHUB_WORKSPACE/out/L0toL2/${i}/${i}_hour.nc -o $GITHUB_WORKSPACE/out/L2toL3/ --data_issues_path $GITHUB_WORKSPACE/data_issues
done
- name: Upload test output
uses: actions/upload-artifact@v3
Expand Down
12 changes: 8 additions & 4 deletions src/pypromice/process/L2toL3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@
from sklearn.linear_model import LinearRegression
from pypromice.qc.github_data_issues import adjustData
from scipy.interpolate import interp1d
from pathlib import Path
import logging

logger = logging.getLogger(__name__)

def toL3(L2, station_config={}, T_0=273.15):
def toL3(L2,
data_adjustments_dir: Path,
station_config={},
T_0=273.15):
'''Process one Level 2 (L2) product to Level 3 (L3) meaning calculating all
derived variables:
- Turbulent fluxes
Expand Down Expand Up @@ -109,7 +113,7 @@ def toL3(L2, station_config={}, T_0=273.15):

# processing continuous surface height, ice surface height, snow height
try:
ds = process_surface_height(ds, station_config)
ds = process_surface_height(ds, data_adjustments_dir, station_config)
except Exception as e:
logger.error("Error processing surface height at %s"%L2.attrs['station_id'])
logging.error(e, exc_info=True)
Expand All @@ -130,7 +134,7 @@ def toL3(L2, station_config={}, T_0=273.15):
return ds


def process_surface_height(ds, station_config={}):
def process_surface_height(ds, data_adjustments_dir, station_config={}):
"""
Process surface height data for different site types and create
surface height variables.
Expand Down Expand Up @@ -180,7 +184,7 @@ def process_surface_height(ds, station_config={}):
ds.z_boom_l.sel(time=first_valid_index) - ds['z_boom_l'])

# Adjust data for the created surface height variables
ds = adjustData(ds, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf'])
ds = adjustData(ds, data_adjustments_dir, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf'])

# Convert to dataframe and combine surface height variables
df_in = ds[[v for v in ['z_surf_1', 'z_surf_2', 'z_ice_surf'] if v in ds.data_vars]].to_dataframe()
Expand Down
2 changes: 1 addition & 1 deletion src/pypromice/process/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def getL3(self):
"""Perform L2 to L3 data processing, including resampling and metadata
and attribute population"""
logger.info("Level 3 processing...")
self.L3 = toL3(self.L2)
self.L3 = toL3(self.L2, data_adjustments_dir=self.data_issues_repository / "adjustments")

def writeArr(self, dataset, outpath, t=None):
"""Write L3 data to .nc and .csv hourly and daily files
Expand Down
31 changes: 20 additions & 11 deletions src/pypromice/process/get_l2.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,27 @@ def get_l2(config_file, inpath, outpath, variables, metadata, data_issues_path:
# Define input path
station_name = config_file.split('/')[-1].split('.')[0]
station_path = os.path.join(inpath, station_name)

# checking that data_issues_path is valid
if data_issues_path is None:
data_issues_path = Path("../PROMICE-AWS-data-issues")
if data_issues_path.exists():
logging.warning(f"data_issues_path is missing. Using default data issues path: {data_issues_path}")
else:
raise ValueError("data_issues_path is missing. Please provide a valid path to the data issues repository")

if os.path.exists(station_path):
aws = AWS(config_file, station_path, data_issues_repository=data_issues_path, var_file=variables, meta_file=metadata)
aws = AWS(config_file,
station_path,
data_issues_repository=data_issues_path,
var_file=variables,
meta_file=metadata)
else:
aws = AWS(config_file, inpath, data_issues_repository=data_issues_path, var_file=variables, meta_file=metadata)
aws = AWS(config_file,
inpath,
data_issues_repository=data_issues_path,
var_file=variables,
meta_file=metadata)

# Perform level 1 and 2 processing
aws.getL1()
Expand All @@ -58,21 +75,13 @@ def main():
stream=sys.stdout,
)

data_issues_path = args.data_issues_path
if data_issues_path is None:
data_issues_path = Path("../PROMICE-AWS-data-issues")
if data_issues_path.exists():
logging.warning(f"data_issues_path is missing. Using default data issues path: {data_issues_path}")
else:
raise ValueError(f"data_issues_path is missing. Please provide a valid path to the data issues repository")

_ = get_l2(
args.config_file,
args.inpath,
args.outpath,
args.variables,
args.metadata,
data_issues_path=data_issues_path,
args.data_issues_path,
)


Expand Down
30 changes: 26 additions & 4 deletions src/pypromice/process/get_l2tol3.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ def parse_arguments_l2tol3(debug_args=None):
required=False, help='File path to variables look-up table')
parser.add_argument('-m', '--metadata', default=None, type=str,
required=False, help='File path to metadata')
parser.add_argument('--data_issues_path', '--issues', default=None, help="Path to data issues repository")


args = parser.parse_args(args=debug_args)
return args

def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata):
def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, data_issues_path: Path|str):
if isinstance(config_folder, str):
config_folder = Path(config_folder)

Expand Down Expand Up @@ -68,9 +70,21 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata):
"project": "PROMICE",
"location_type": "ice sheet",
}


# checking that the adjustement directory is properly given
if data_issues_path is None:
data_issues_path = Path("../PROMICE-AWS-data-issues")
if data_issues_path.exists():
logging.warning(f"data_issues_path is missing. Using default data issues path: {data_issues_path}")
else:
raise ValueError("data_issues_path is missing. Please provide a valid path to the data issues repository")
else:
data_issues_path = Path(data_issues_path)

data_adjustments_dir = data_issues_path / "adjustments"

# Perform Level 3 processing
l3 = toL3(l2, station_config)
l3 = toL3(l2, data_adjustments_dir, station_config)

# Write Level 3 dataset to file if output directory given
v = pypromice.resources.load_variables(variables)
Expand All @@ -83,7 +97,15 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata):

def main():
args = parse_arguments_l2tol3()
_ = get_l2tol3(args.config_folder, args.inpath, args.outpath, args.variables, args.metadata)



_ = get_l2tol3(args.config_folder,
args.inpath,
args.outpath,
args.variables,
args.metadata,
args.data_issues_path)

if __name__ == "__main__":
main()
21 changes: 14 additions & 7 deletions src/pypromice/process/join_l3.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,9 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
l3_merged.z_ice_surf.to_series(), l3.z_ice_surf.to_series()
),
)


# saves attributes
attrs = l3_merged.attrs
# merging by time block
l3_merged = xr.concat(
(
Expand All @@ -504,6 +506,9 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
),
dim="time",
)

# restauring attributes
l3_merged.attrs = attrs

# Assign site id
if not l3_merged:
Expand All @@ -519,13 +524,15 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
site_config_source_hash=get_commit_hash_and_check_dirty(config_folder),
gcnet_source_hash=get_commit_hash_and_check_dirty(folder_gcnet),
)

for stid, station_attributes in l3_merged.attrs["stations_attributes"].items():
station_source = json.loads(station_attributes["source"])
for k, v in station_source.items():
if k in site_source and site_source[k] != v:
site_source[k] = "multiple"
else:
site_source[k] = v
if "source" in station_attributes.keys():
station_source = json.loads(station_attributes["source"])
for k, v in station_source.items():
if k in site_source and site_source[k] != v:
site_source[k] = "multiple"
else:
site_source[k] = v
l3_merged.attrs["source"] = json.dumps(site_source)

v = pypromice.resources.load_variables(variables)
Expand Down
2 changes: 1 addition & 1 deletion src/pypromice/qc/github_data_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def _getDF(flag_file):
).dropna(how='all', axis='rows')
else:
df=None
logger.info(f"No {flag_file.split('/')[-2][:-1]} file to read.")
logger.info(f"No {flag_file} file to read.")
return df


Expand Down
1 change: 1 addition & 0 deletions tests/e2e/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def test_full_e2e(self):
outpath=output_l3.as_posix(),
variables=None,
metadata=None,
data_issues_path=data_issues_path,
)

# Part 4 Join L3: Merge Current data and historical GC-Net and convert to site
Expand Down

0 comments on commit b29958a

Please sign in to comment.