diff --git a/src/rlai/policy_gradient/monte_carlo/reinforce.py b/src/rlai/policy_gradient/monte_carlo/reinforce.py index dc59976..7d62a31 100644 --- a/src/rlai/policy_gradient/monte_carlo/reinforce.py +++ b/src/rlai/policy_gradient/monte_carlo/reinforce.py @@ -390,9 +390,6 @@ def improve( plt.close() - if pdf is not None: - pdf.close() - # plot per-episode metrics if len(environment.metric_episode_value) > 0: for metric in environment.metric_episode_value: @@ -414,6 +411,9 @@ def improve( plt.close() + if pdf is not None: + pdf.close() + num_fallback_iterations = 0 if ( training_pool is not None and