Skip to content

Commit

Permalink
prepare, train, evaluate and visualize for your own data
Browse files Browse the repository at this point in the history
  • Loading branch information
OuyangWenyu committed Mar 27, 2024
1 parent 0df4d76 commit 236de11
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 82 deletions.
9 changes: 5 additions & 4 deletions hydromodel/datasets/data_preprocess.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2022-10-25 21:16:22
LastEditTime: 2024-03-27 14:30:15
LastEditTime: 2024-03-27 16:31:55
LastEditors: Wenyu Ouyang
Description: preprocess data for models in hydro-model-xaj
FilePath: \hydro-model-xaj\hydromodel\datasets\data_preprocess.py
Expand Down Expand Up @@ -206,7 +206,8 @@ def process_and_save_data_as_nc(

# 读取流域属性
basin_attr_file = os.path.join(folder_path, "basin_attributes.csv")
basin_attrs = pd.read_csv(basin_attr_file)
# id must be str
basin_attrs = pd.read_csv(basin_attr_file, dtype={ID_NAME: str})

# 创建属性数据集
ds_attrs = xr.Dataset.from_dataframe(basin_attrs.set_index(ID_NAME))
Expand Down Expand Up @@ -234,8 +235,8 @@ def process_and_save_data_as_nc(
# 初始化用于保存单位的字典
units = {}

# 获取流域ID列表
basin_ids = basin_attrs[ID_NAME].tolist()
# id must be str
basin_ids = basin_attrs[ID_NAME].astype(str).tolist()

# 为每个流域读取并处理时序数据
for i, basin_id in enumerate(basin_ids):
Expand Down
8 changes: 3 additions & 5 deletions hydromodel/models/model_config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2022-10-25 21:16:22
LastEditTime: 2024-03-27 15:03:41
LastEditTime: 2024-03-27 16:16:25
LastEditors: Wenyu Ouyang
Description: some basic config for hydro-model-xaj models
FilePath: \hydro-model-xaj\hydromodel\models\model_config.py
Expand All @@ -25,10 +25,8 @@ def read_model_param_dict(file_path="param.yaml"):
}
for model, contents in data.items()
}
except FileNotFoundError:
print(
f"File not found: {file_path}, we directly use the default MODEL_PARAM_DICT."
)
except Exception as e:
print(f"Error: {e}, we directly use the default MODEL_PARAM_DICT.")
return MODEL_PARAM_DICT


Expand Down
32 changes: 19 additions & 13 deletions hydromodel/trainers/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2022-10-25 21:16:22
LastEditTime: 2024-03-27 11:18:09
LastEditTime: 2024-03-27 16:18:48
LastEditors: Wenyu Ouyang
Description: Plots for calibration and testing results
FilePath: \hydro-model-xaj\hydromodel\trainers\evaluate.py
Expand All @@ -24,23 +24,25 @@
get_basin_area,
_get_pe_q_from_ts,
)
from hydromodel.models.model_config import MODEL_PARAM_DICT
from hydromodel.models.model_config import read_model_param_dict
from hydromodel.models.model_dict import MODEL_DICT


class Evaluator:
def __init__(self, cali_dir, weight_dir=None, eval_dir=None):
def __init__(self, cali_dir, param_dir=None, eval_dir=None):
"""_summary_
Parameters
----------
cali_dir : _type_
calibration directory
param_dir : str
parameters directory
eval_dir : _type_
evaluation directory
"""
if weight_dir is None:
weight_dir = cali_dir
if param_dir is None:
param_dir = cali_dir
if eval_dir is None:
eval_dir = cali_dir
cali_config = read_yaml_config(os.path.join(cali_dir, "config.yaml"))
Expand All @@ -49,9 +51,10 @@ def __init__(self, cali_dir, weight_dir=None, eval_dir=None):
self.data_dir = cali_config["data_dir"]
self.model_info = cali_config["model"]
self.save_dir = eval_dir
self.params_dir = weight_dir
if not os.path.exists(weight_dir):
os.makedirs(weight_dir)
self.params_dir = param_dir
self.param_range_file = cali_config["param_range_file"]
if not os.path.exists(param_dir):
os.makedirs(param_dir)
if not os.path.exists(eval_dir):
os.makedirs(eval_dir)

Expand All @@ -70,14 +73,15 @@ def predict(self, ds):
"""
model_info = self.model_info
p_and_e, _ = _get_pe_q_from_ts(ds)
basins = ds["basin"].data
basins = ds["basin"].data.astype(str)
params = _read_all_basin_params(basins, self.params_dir)
qsim, _ = MODEL_DICT[model_info["name"]](
p_and_e,
params,
# we set the warmup_length=0 but later we get results from warmup_length to the end to evaluate
warmup_length=0,
**model_info,
**{"param_range_file": self.param_range_file},
)
qsim, qobs = self._convert_streamflow_units(ds, qsim)
return qsim, qobs
Expand All @@ -94,7 +98,7 @@ def save_results(self, ds, qsim, qobs):
qobs : _type_
_description_
"""
basins = ds["basin"].data
basins = ds["basin"].data.astype(str)
self._summarize_parameters(basins)
self._renormalize_params(basins)
self._save_evaluate_results(qsim, qobs, ds)
Expand Down Expand Up @@ -142,8 +146,9 @@ def _summarize_parameters(self, basin_ids):
param_dir = self.params_dir
model_name = self.model_info["name"]
params = []
model_param_dict = read_model_param_dict(self.param_range_file)
for basin_id in basin_ids:
columns = MODEL_PARAM_DICT[model_name]["param_name"]
columns = model_param_dict[model_name]["param_name"]
params_txt = pd.read_csv(
os.path.join(param_dir, basin_id + "_calibrate_params.txt")
)
Expand All @@ -159,11 +164,12 @@ def _renormalize_params(self, basin_ids):
param_dir = self.params_dir
model_name = self.model_info["name"]
renormalization_params = []
model_param_dict = read_model_param_dict(self.param_range_file)
for basin_id in basin_ids:
params = np.loadtxt(
os.path.join(param_dir, basin_id + "_calibrate_params.txt")
)[1:].reshape(1, -1)
param_ranges = MODEL_PARAM_DICT[model_name]["param_range"]
param_ranges = model_param_dict[model_name]["param_range"]
xaj_params = [
(value[1] - value[0]) * params[:, i] + value[0]
for i, (key, value) in enumerate(param_ranges.items())
Expand All @@ -172,7 +178,7 @@ def _renormalize_params(self, basin_ids):
params_df = pd.DataFrame(xaj_params_.T)
renormalization_params.append(params_df)
renormalization_params_dfs = pd.concat(renormalization_params, axis=1)
renormalization_params_dfs.index = MODEL_PARAM_DICT[model_name]["param_name"]
renormalization_params_dfs.index = model_param_dict[model_name]["param_name"]
renormalization_params_dfs.columns = basin_ids
print(renormalization_params_dfs)
params_csv_file = os.path.join(param_dir, "basins_denorm_params.csv")
Expand Down
8 changes: 7 additions & 1 deletion scripts/calibrate_xaj.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2022-11-19 17:27:05
LastEditTime: 2024-03-27 15:22:39
LastEditTime: 2024-03-27 15:56:19
LastEditors: Wenyu Ouyang
Description: the script to calibrate a model for CAMELS basin
FilePath: \hydro-model-xaj\scripts\calibrate_xaj.py
Expand All @@ -10,6 +10,7 @@

import json
import argparse
import shutil
import sys
import os
from pathlib import Path
Expand Down Expand Up @@ -85,6 +86,11 @@ def calibrate(args):
algorithm=algo_info,
loss=loss_info,
)

# Save the parameter range file to result directory
shutil.copy(param_range_file, where_save)
# update the param_range_file path
args.param_range_file = os.path.join(where_save, param_range_file.split(os.sep)[-1])
# Convert the arguments to a dictionary
args_dict = vars(args)
# Save the arguments to a YAML file
Expand Down
62 changes: 34 additions & 28 deletions scripts/evaluate_xaj.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2024-03-26 12:00:12
LastEditTime: 2024-03-27 10:20:22
LastEditTime: 2024-03-27 16:20:25
LastEditors: Wenyu Ouyang
Description: evaluate a calibrated hydrological model
FilePath: \hydro-model-xaj\scripts\evaluate_xaj.py
Expand All @@ -26,7 +26,6 @@ def evaluate(args):
cali_dir = Path(os.path.join(repo_path, "result", exp))
cali_config = read_yaml_config(os.path.join(cali_dir, "config.yaml"))
kfold = cali_config["cv_fold"]
algo_info = cali_config["algorithm"]
basins = cali_config["basin_id"]
warmup = cali_config["warmup"]
data_type = cali_config["data_type"]
Expand All @@ -44,35 +43,42 @@ def evaluate(args):
warmup,
basins,
)
for fold in range(kfold):
print(f"Start to evaluate the {fold+1}-th fold")
fold_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}")
if algo_info["name"] == "SCE_UA":
if kfold <= 1:
print("Start to evaluate")
# evaluate both train and test period for all basins
train_data = train_and_test_data[0]
test_data = train_and_test_data[1]
param_dir = os.path.join(cali_dir, "sceua_xaj")
_evaluate(cali_dir, param_dir, train_data, test_data)
print("Finish evaluating")
else:
for fold in range(kfold):
print(f"Start to evaluate the {fold+1}-th fold")
fold_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}")
# evaluate both train and test period for all basins
train_data = train_and_test_data[fold][0]
test_data = train_and_test_data[fold][1]
eval_train_dir = os.path.join(fold_dir, "train")
eval_test_dir = os.path.join(fold_dir, "test")
train_eval = Evaluator(cali_dir, fold_dir, eval_train_dir)
test_eval = Evaluator(cali_dir, fold_dir, eval_test_dir)
qsim_train, qobs_train = train_eval.predict(train_data)
qsim_test, qobs_test = test_eval.predict(test_data)
train_eval.save_results(
train_data,
qsim_train,
qobs_train,
)
test_eval.save_results(
test_data,
qsim_test,
qobs_test,
)
else:
raise NotImplementedError(
"We don't provide this calibrate method! Choose from 'SCE_UA' or 'GA'!"
)
_evaluate(cali_dir, fold_dir, train_data, test_data)
print(f"Finish evaluating the {fold}-th fold")

print(f"Finish evaluating the {fold}-th fold")

def _evaluate(cali_dir, param_dir, train_data, test_data):
eval_train_dir = os.path.join(param_dir, "train")
eval_test_dir = os.path.join(param_dir, "test")
train_eval = Evaluator(cali_dir, param_dir, eval_train_dir)
test_eval = Evaluator(cali_dir, param_dir, eval_test_dir)
qsim_train, qobs_train = train_eval.predict(train_data)
qsim_test, qobs_test = test_eval.predict(test_data)
train_eval.save_results(
train_data,
qsim_train,
qobs_train,
)
test_eval.save_results(
test_data,
qsim_test,
qobs_test,
)


if __name__ == "__main__":
Expand All @@ -83,7 +89,7 @@ def evaluate(args):
"--exp",
dest="exp",
help="An exp is corresponding to a data plan from calibrate_xaj.py",
default="expcamels001",
default="expbiliuhe001",
type=str,
)
the_args = parser.parse_args()
Expand Down
75 changes: 44 additions & 31 deletions scripts/post_process.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2022-11-19 17:27:05
LastEditTime: 2024-03-27 11:23:02
LastEditTime: 2024-03-27 16:27:03
LastEditors: Wenyu Ouyang
Description: the script to postprocess results
FilePath: \hydro-model-xaj\scripts\post_process.py
Expand All @@ -26,36 +26,49 @@ def visualize(args):
kfold = cali_config["cv_fold"]
basins = cali_config["basin_id"]
warmup = cali_config["warmup"]
for fold in range(kfold):
print(f"Start to evaluate the {fold+1}-th fold")
fold_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}")
# evaluate both train and test period for all basins
eval_train_dir = os.path.join(fold_dir, "train")
eval_test_dir = os.path.join(fold_dir, "test")
train_eval = Evaluator(cali_dir, fold_dir, eval_train_dir)
test_eval = Evaluator(cali_dir, fold_dir, eval_test_dir)
ds_train = train_eval.load_results()
ds_test = test_eval.load_results()
for basin in basins:
save_fig_train = os.path.join(eval_train_dir, f"train_sim_obs_{basin}.png")
plot_sim_and_obs(
ds_train["time"].isel(time=slice(warmup, None)),
ds_train["qsim"].sel(basin=basin).isel(time=slice(warmup, None)),
ds_train["qobs"].sel(basin=basin).isel(time=slice(warmup, None)),
save_fig_train,
xlabel="Date",
ylabel=None,
if kfold <= 1:
print("Start to visualize the results")
param_dir = os.path.join(cali_dir, "sceua_xaj")
eval_train_dir = os.path.join(param_dir, "train")
eval_test_dir = os.path.join(param_dir, "test")
_visualize(cali_dir, basins, warmup, param_dir, eval_train_dir, eval_test_dir)
print("Finish visualizing the results")
else:
for fold in range(kfold):
print(f"Start to visualize the {fold+1}-th fold")
param_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}")
eval_train_dir = os.path.join(param_dir, "train")
eval_test_dir = os.path.join(param_dir, "test")
_visualize(
cali_dir, basins, warmup, param_dir, eval_train_dir, eval_test_dir
)
save_fig_test = os.path.join(eval_test_dir, f"test_sim_obs_{basin}.png")
plot_sim_and_obs(
ds_test["time"].isel(time=slice(warmup, None)),
ds_test["qsim"].sel(basin=basin).isel(time=slice(warmup, None)),
ds_test["qobs"].sel(basin=basin).isel(time=slice(warmup, None)),
save_fig_test,
xlabel="Date",
ylabel=None,
)
print(f"Finish visualizing the {fold}-th fold")
print(f"Finish visualizing the {fold}-th fold")


def _visualize(cali_dir, basins, warmup, param_dir, eval_train_dir, eval_test_dir):
train_eval = Evaluator(cali_dir, param_dir, eval_train_dir)
test_eval = Evaluator(cali_dir, param_dir, eval_test_dir)
ds_train = train_eval.load_results()
ds_test = test_eval.load_results()
for basin in basins:
save_fig_train = os.path.join(eval_train_dir, f"train_sim_obs_{basin}.png")
plot_sim_and_obs(
ds_train["time"].isel(time=slice(warmup, None)),
ds_train["qsim"].sel(basin=basin).isel(time=slice(warmup, None)),
ds_train["qobs"].sel(basin=basin).isel(time=slice(warmup, None)),
save_fig_train,
xlabel="Date",
ylabel=None,
)
save_fig_test = os.path.join(eval_test_dir, f"test_sim_obs_{basin}.png")
plot_sim_and_obs(
ds_test["time"].isel(time=slice(warmup, None)),
ds_test["qsim"].sel(basin=basin).isel(time=slice(warmup, None)),
ds_test["qobs"].sel(basin=basin).isel(time=slice(warmup, None)),
save_fig_test,
xlabel="Date",
ylabel=None,
)


if __name__ == "__main__":
Expand All @@ -66,7 +79,7 @@ def visualize(args):
"--exp",
dest="exp",
help="An exp is corresponding to one data setting",
default="expcamels001",
default="expbiliuhe001",
type=str,
)
the_args = parser.parse_args()
Expand Down

0 comments on commit 236de11

Please sign in to comment.