diff --git a/src/rlai/gpi/monte_carlo/iteration.py b/src/rlai/gpi/monte_carlo/iteration.py index cc721431..bbc54998 100644 --- a/src/rlai/gpi/monte_carlo/iteration.py +++ b/src/rlai/gpi/monte_carlo/iteration.py @@ -142,6 +142,7 @@ def iterate_value_q_pi( checkpoint_path_with_index = insert_index_into_path(checkpoint_path, i) final_checkpoint_path = checkpoint_path_with_index + os.makedirs(os.path.dirname(final_checkpoint_path), exist_ok=True) with open(checkpoint_path_with_index, 'wb') as checkpoint_file: pickle.dump(resume_args, checkpoint_file) diff --git a/src/rlai/gpi/temporal_difference/iteration.py b/src/rlai/gpi/temporal_difference/iteration.py index 83ff068f..992f2004 100644 --- a/src/rlai/gpi/temporal_difference/iteration.py +++ b/src/rlai/gpi/temporal_difference/iteration.py @@ -178,6 +178,7 @@ def iterate_value_q_pi( checkpoint_path_with_index = insert_index_into_path(checkpoint_path, i) final_checkpoint_path = checkpoint_path_with_index + os.makedirs(os.path.dirname(final_checkpoint_path), exist_ok=True) with open(checkpoint_path_with_index, 'wb') as checkpoint_file: pickle.dump(resume_args, checkpoint_file) diff --git a/src/rlai/policy_gradient/monte_carlo/reinforce.py b/src/rlai/policy_gradient/monte_carlo/reinforce.py index 27c2e9cd..57d32c76 100644 --- a/src/rlai/policy_gradient/monte_carlo/reinforce.py +++ b/src/rlai/policy_gradient/monte_carlo/reinforce.py @@ -182,6 +182,7 @@ def improve( checkpoint_path_with_index = insert_index_into_path(checkpoint_path, episodes_finished) final_checkpoint_path = checkpoint_path_with_index + os.makedirs(os.path.dirname(final_checkpoint_path), exist_ok=True) with open(checkpoint_path_with_index, 'wb') as checkpoint_file: pickle.dump(resume_args, checkpoint_file)