From 236de1174c022e429d7d97d4348ed4daa5df845a Mon Sep 17 00:00:00 2001 From: ouyangwenyu Date: Wed, 27 Mar 2024 16:34:47 +0800 Subject: [PATCH] prepare, train, evaluate and visualize for your own data --- hydromodel/datasets/data_preprocess.py | 9 ++-- hydromodel/models/model_config.py | 8 ++- hydromodel/trainers/evaluate.py | 32 ++++++----- scripts/calibrate_xaj.py | 8 ++- scripts/evaluate_xaj.py | 62 +++++++++++---------- scripts/post_process.py | 75 +++++++++++++++----------- 6 files changed, 112 insertions(+), 82 deletions(-) diff --git a/hydromodel/datasets/data_preprocess.py b/hydromodel/datasets/data_preprocess.py index 370d8a5..14ff720 100644 --- a/hydromodel/datasets/data_preprocess.py +++ b/hydromodel/datasets/data_preprocess.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-10-25 21:16:22 -LastEditTime: 2024-03-27 14:30:15 +LastEditTime: 2024-03-27 16:31:55 LastEditors: Wenyu Ouyang Description: preprocess data for models in hydro-model-xaj FilePath: \hydro-model-xaj\hydromodel\datasets\data_preprocess.py @@ -206,7 +206,8 @@ def process_and_save_data_as_nc( # 读取流域属性 basin_attr_file = os.path.join(folder_path, "basin_attributes.csv") - basin_attrs = pd.read_csv(basin_attr_file) + # id must be str + basin_attrs = pd.read_csv(basin_attr_file, dtype={ID_NAME: str}) # 创建属性数据集 ds_attrs = xr.Dataset.from_dataframe(basin_attrs.set_index(ID_NAME)) @@ -234,8 +235,8 @@ def process_and_save_data_as_nc( # 初始化用于保存单位的字典 units = {} - # 获取流域ID列表 - basin_ids = basin_attrs[ID_NAME].tolist() + # id must be str + basin_ids = basin_attrs[ID_NAME].astype(str).tolist() # 为每个流域读取并处理时序数据 for i, basin_id in enumerate(basin_ids): diff --git a/hydromodel/models/model_config.py b/hydromodel/models/model_config.py index eb301d1..ef242b2 100644 --- a/hydromodel/models/model_config.py +++ b/hydromodel/models/model_config.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-10-25 21:16:22 -LastEditTime: 2024-03-27 15:03:41 +LastEditTime: 2024-03-27 16:16:25 LastEditors: Wenyu Ouyang Description: some basic config for hydro-model-xaj models FilePath: \hydro-model-xaj\hydromodel\models\model_config.py @@ -25,10 +25,8 @@ def read_model_param_dict(file_path="param.yaml"): } for model, contents in data.items() } - except FileNotFoundError: - print( - f"File not found: {file_path}, we directly use the default MODEL_PARAM_DICT." - ) + except Exception as e: + print(f"Error: {e}, we directly use the default MODEL_PARAM_DICT.") return MODEL_PARAM_DICT diff --git a/hydromodel/trainers/evaluate.py b/hydromodel/trainers/evaluate.py index 63d02c3..67c0662 100644 --- a/hydromodel/trainers/evaluate.py +++ b/hydromodel/trainers/evaluate.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-10-25 21:16:22 -LastEditTime: 2024-03-27 11:18:09 +LastEditTime: 2024-03-27 16:18:48 LastEditors: Wenyu Ouyang Description: Plots for calibration and testing results FilePath: \hydro-model-xaj\hydromodel\trainers\evaluate.py @@ -24,23 +24,25 @@ get_basin_area, _get_pe_q_from_ts, ) -from hydromodel.models.model_config import MODEL_PARAM_DICT +from hydromodel.models.model_config import read_model_param_dict from hydromodel.models.model_dict import MODEL_DICT class Evaluator: - def __init__(self, cali_dir, weight_dir=None, eval_dir=None): + def __init__(self, cali_dir, param_dir=None, eval_dir=None): """_summary_ Parameters ---------- cali_dir : _type_ calibration directory + param_dir : str + parameters directory eval_dir : _type_ evaluation directory """ - if weight_dir is None: - weight_dir = cali_dir + if param_dir is None: + param_dir = cali_dir if eval_dir is None: eval_dir = cali_dir cali_config = read_yaml_config(os.path.join(cali_dir, "config.yaml")) @@ -49,9 +51,10 @@ def __init__(self, cali_dir, weight_dir=None, eval_dir=None): self.data_dir = cali_config["data_dir"] self.model_info = cali_config["model"] self.save_dir = eval_dir - self.params_dir = weight_dir - if not os.path.exists(weight_dir): - os.makedirs(weight_dir) + self.params_dir = param_dir + self.param_range_file = cali_config["param_range_file"] + if not os.path.exists(param_dir): + os.makedirs(param_dir) if not os.path.exists(eval_dir): os.makedirs(eval_dir) @@ -70,7 +73,7 @@ def predict(self, ds): """ model_info = self.model_info p_and_e, _ = _get_pe_q_from_ts(ds) - basins = ds["basin"].data + basins = ds["basin"].data.astype(str) params = _read_all_basin_params(basins, self.params_dir) qsim, _ = MODEL_DICT[model_info["name"]]( p_and_e, @@ -78,6 +81,7 @@ def predict(self, ds): # we set the warmup_length=0 but later we get results from warmup_length to the end to evaluate warmup_length=0, **model_info, + **{"param_range_file": self.param_range_file}, ) qsim, qobs = self._convert_streamflow_units(ds, qsim) return qsim, qobs @@ -94,7 +98,7 @@ def save_results(self, ds, qsim, qobs): qobs : _type_ _description_ """ - basins = ds["basin"].data + basins = ds["basin"].data.astype(str) self._summarize_parameters(basins) self._renormalize_params(basins) self._save_evaluate_results(qsim, qobs, ds) @@ -142,8 +146,9 @@ def _summarize_parameters(self, basin_ids): param_dir = self.params_dir model_name = self.model_info["name"] params = [] + model_param_dict = read_model_param_dict(self.param_range_file) for basin_id in basin_ids: - columns = MODEL_PARAM_DICT[model_name]["param_name"] + columns = model_param_dict[model_name]["param_name"] params_txt = pd.read_csv( os.path.join(param_dir, basin_id + "_calibrate_params.txt") ) @@ -159,11 +164,12 @@ def _renormalize_params(self, basin_ids): param_dir = self.params_dir model_name = self.model_info["name"] renormalization_params = [] + model_param_dict = read_model_param_dict(self.param_range_file) for basin_id in basin_ids: params = np.loadtxt( os.path.join(param_dir, basin_id + "_calibrate_params.txt") )[1:].reshape(1, -1) - param_ranges = MODEL_PARAM_DICT[model_name]["param_range"] + param_ranges = model_param_dict[model_name]["param_range"] xaj_params = [ (value[1] - value[0]) * params[:, i] + value[0] for i, (key, value) in enumerate(param_ranges.items()) @@ -172,7 +178,7 @@ def _renormalize_params(self, basin_ids): params_df = pd.DataFrame(xaj_params_.T) renormalization_params.append(params_df) renormalization_params_dfs = pd.concat(renormalization_params, axis=1) - renormalization_params_dfs.index = MODEL_PARAM_DICT[model_name]["param_name"] + renormalization_params_dfs.index = model_param_dict[model_name]["param_name"] renormalization_params_dfs.columns = basin_ids print(renormalization_params_dfs) params_csv_file = os.path.join(param_dir, "basins_denorm_params.csv") diff --git a/scripts/calibrate_xaj.py b/scripts/calibrate_xaj.py index a266015..c5865e6 100644 --- a/scripts/calibrate_xaj.py +++ b/scripts/calibrate_xaj.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-11-19 17:27:05 -LastEditTime: 2024-03-27 15:22:39 +LastEditTime: 2024-03-27 15:56:19 LastEditors: Wenyu Ouyang Description: the script to calibrate a model for CAMELS basin FilePath: \hydro-model-xaj\scripts\calibrate_xaj.py @@ -10,6 +10,7 @@ import json import argparse +import shutil import sys import os from pathlib import Path @@ -85,6 +86,11 @@ def calibrate(args): algorithm=algo_info, loss=loss_info, ) + + # Save the parameter range file to result directory + shutil.copy(param_range_file, where_save) + # update the param_range_file path + args.param_range_file = os.path.join(where_save, param_range_file.split(os.sep)[-1]) # Convert the arguments to a dictionary args_dict = vars(args) # Save the arguments to a YAML file diff --git a/scripts/evaluate_xaj.py b/scripts/evaluate_xaj.py index f974a27..1b35c05 100644 --- a/scripts/evaluate_xaj.py +++ b/scripts/evaluate_xaj.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2024-03-26 12:00:12 -LastEditTime: 2024-03-27 10:20:22 +LastEditTime: 2024-03-27 16:20:25 LastEditors: Wenyu Ouyang Description: evaluate a calibrated hydrological model FilePath: \hydro-model-xaj\scripts\evaluate_xaj.py @@ -26,7 +26,6 @@ def evaluate(args): cali_dir = Path(os.path.join(repo_path, "result", exp)) cali_config = read_yaml_config(os.path.join(cali_dir, "config.yaml")) kfold = cali_config["cv_fold"] - algo_info = cali_config["algorithm"] basins = cali_config["basin_id"] warmup = cali_config["warmup"] data_type = cali_config["data_type"] @@ -44,35 +43,42 @@ def evaluate(args): warmup, basins, ) - for fold in range(kfold): - print(f"Start to evaluate the {fold+1}-th fold") - fold_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}") - if algo_info["name"] == "SCE_UA": + if kfold <= 1: + print("Start to evaluate") + # evaluate both train and test period for all basins + train_data = train_and_test_data[0] + test_data = train_and_test_data[1] + param_dir = os.path.join(cali_dir, "sceua_xaj") + _evaluate(cali_dir, param_dir, train_data, test_data) + print("Finish evaluating") + else: + for fold in range(kfold): + print(f"Start to evaluate the {fold+1}-th fold") + fold_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}") # evaluate both train and test period for all basins train_data = train_and_test_data[fold][0] test_data = train_and_test_data[fold][1] - eval_train_dir = os.path.join(fold_dir, "train") - eval_test_dir = os.path.join(fold_dir, "test") - train_eval = Evaluator(cali_dir, fold_dir, eval_train_dir) - test_eval = Evaluator(cali_dir, fold_dir, eval_test_dir) - qsim_train, qobs_train = train_eval.predict(train_data) - qsim_test, qobs_test = test_eval.predict(test_data) - train_eval.save_results( - train_data, - qsim_train, - qobs_train, - ) - test_eval.save_results( - test_data, - qsim_test, - qobs_test, - ) - else: - raise NotImplementedError( - "We don't provide this calibrate method! Choose from 'SCE_UA' or 'GA'!" - ) + _evaluate(cali_dir, fold_dir, train_data, test_data) + print(f"Finish evaluating the {fold}-th fold") - print(f"Finish evaluating the {fold}-th fold") + +def _evaluate(cali_dir, param_dir, train_data, test_data): + eval_train_dir = os.path.join(param_dir, "train") + eval_test_dir = os.path.join(param_dir, "test") + train_eval = Evaluator(cali_dir, param_dir, eval_train_dir) + test_eval = Evaluator(cali_dir, param_dir, eval_test_dir) + qsim_train, qobs_train = train_eval.predict(train_data) + qsim_test, qobs_test = test_eval.predict(test_data) + train_eval.save_results( + train_data, + qsim_train, + qobs_train, + ) + test_eval.save_results( + test_data, + qsim_test, + qobs_test, + ) if __name__ == "__main__": @@ -83,7 +89,7 @@ def evaluate(args): "--exp", dest="exp", help="An exp is corresponding to a data plan from calibrate_xaj.py", - default="expcamels001", + default="expbiliuhe001", type=str, ) the_args = parser.parse_args() diff --git a/scripts/post_process.py b/scripts/post_process.py index e838797..c748cdd 100644 --- a/scripts/post_process.py +++ b/scripts/post_process.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-11-19 17:27:05 -LastEditTime: 2024-03-27 11:23:02 +LastEditTime: 2024-03-27 16:27:03 LastEditors: Wenyu Ouyang Description: the script to postprocess results FilePath: \hydro-model-xaj\scripts\post_process.py @@ -26,36 +26,49 @@ def visualize(args): kfold = cali_config["cv_fold"] basins = cali_config["basin_id"] warmup = cali_config["warmup"] - for fold in range(kfold): - print(f"Start to evaluate the {fold+1}-th fold") - fold_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}") - # evaluate both train and test period for all basins - eval_train_dir = os.path.join(fold_dir, "train") - eval_test_dir = os.path.join(fold_dir, "test") - train_eval = Evaluator(cali_dir, fold_dir, eval_train_dir) - test_eval = Evaluator(cali_dir, fold_dir, eval_test_dir) - ds_train = train_eval.load_results() - ds_test = test_eval.load_results() - for basin in basins: - save_fig_train = os.path.join(eval_train_dir, f"train_sim_obs_{basin}.png") - plot_sim_and_obs( - ds_train["time"].isel(time=slice(warmup, None)), - ds_train["qsim"].sel(basin=basin).isel(time=slice(warmup, None)), - ds_train["qobs"].sel(basin=basin).isel(time=slice(warmup, None)), - save_fig_train, - xlabel="Date", - ylabel=None, + if kfold <= 1: + print("Start to visualize the results") + param_dir = os.path.join(cali_dir, "sceua_xaj") + eval_train_dir = os.path.join(param_dir, "train") + eval_test_dir = os.path.join(param_dir, "test") + _visualize(cali_dir, basins, warmup, param_dir, eval_train_dir, eval_test_dir) + print("Finish visualizing the results") + else: + for fold in range(kfold): + print(f"Start to visualize the {fold+1}-th fold") + param_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}") + eval_train_dir = os.path.join(param_dir, "train") + eval_test_dir = os.path.join(param_dir, "test") + _visualize( + cali_dir, basins, warmup, param_dir, eval_train_dir, eval_test_dir ) - save_fig_test = os.path.join(eval_test_dir, f"test_sim_obs_{basin}.png") - plot_sim_and_obs( - ds_test["time"].isel(time=slice(warmup, None)), - ds_test["qsim"].sel(basin=basin).isel(time=slice(warmup, None)), - ds_test["qobs"].sel(basin=basin).isel(time=slice(warmup, None)), - save_fig_test, - xlabel="Date", - ylabel=None, - ) - print(f"Finish visualizing the {fold}-th fold") + print(f"Finish visualizing the {fold}-th fold") + + +def _visualize(cali_dir, basins, warmup, param_dir, eval_train_dir, eval_test_dir): + train_eval = Evaluator(cali_dir, param_dir, eval_train_dir) + test_eval = Evaluator(cali_dir, param_dir, eval_test_dir) + ds_train = train_eval.load_results() + ds_test = test_eval.load_results() + for basin in basins: + save_fig_train = os.path.join(eval_train_dir, f"train_sim_obs_{basin}.png") + plot_sim_and_obs( + ds_train["time"].isel(time=slice(warmup, None)), + ds_train["qsim"].sel(basin=basin).isel(time=slice(warmup, None)), + ds_train["qobs"].sel(basin=basin).isel(time=slice(warmup, None)), + save_fig_train, + xlabel="Date", + ylabel=None, + ) + save_fig_test = os.path.join(eval_test_dir, f"test_sim_obs_{basin}.png") + plot_sim_and_obs( + ds_test["time"].isel(time=slice(warmup, None)), + ds_test["qsim"].sel(basin=basin).isel(time=slice(warmup, None)), + ds_test["qobs"].sel(basin=basin).isel(time=slice(warmup, None)), + save_fig_test, + xlabel="Date", + ylabel=None, + ) if __name__ == "__main__": @@ -66,7 +79,7 @@ def visualize(args): "--exp", dest="exp", help="An exp is corresponding to one data setting", - default="expcamels001", + default="expbiliuhe001", type=str, ) the_args = parser.parse_args()