Skip to content

Commit

Permalink
WIP add option for not fitting intercept in beta-distribution policy.
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthewGerber committed Dec 25, 2023
1 parent 35ac59b commit 874576b
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 11 deletions.
2 changes: 1 addition & 1 deletion run_configurations/MountainCar continuous train FA.run.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/src/rlai/runners/trainer.py" />
<option name="PARAMETERS" value="--random-seed 12345 --agent rlai.policy_gradient.ParameterizedMdpAgent --gamma 0.99 --environment rlai.core.environments.gymnasium.Gym --gym-id MountainCarContinuous-v0 --render-every-nth-episode 50 --video-directory ~/Desktop/mountaincar_continuous_videos --T 1000 --plot-environment --train-function rlai.policy_gradient.monte_carlo.reinforce.improve --num-episodes 1000 --plot-state-value True --v-S rlai.state_value.function_approximation.ApproximateStateValueEstimator --feature-extractor rlai.core.environments.gymnasium.ContinuousMountainCarFeatureExtractor --function-approximation-model rlai.models.sklearn.SKLearnSGD --loss squared_error --sgd-alpha 0.0 --learning-rate constant --eta0 0.001 --policy rlai.policy_gradient.policies.continuous_action.ContinuousActionBetaDistributionPolicy --policy-feature-extractor rlai.core.environments.gymnasium.ContinuousMountainCarFeatureExtractor --alpha 0.001 --update-upon-every-visit True --plot-policy --save-agent-path ~/Desktop/mountaincar_continuous_agent.pickle --log DEBUG" />
<option name="PARAMETERS" value="--random-seed 12345 --agent rlai.policy_gradient.ParameterizedMdpAgent --gamma 0.99 --environment rlai.core.environments.gymnasium.Gym --gym-id MountainCarContinuous-v0 --render-every-nth-episode 50 --video-directory ~/Desktop/mountaincar_continuous_videos --T 1000 --plot-environment --train-function rlai.policy_gradient.monte_carlo.reinforce.improve --num-episodes 1000 --plot-state-value True --v-S rlai.state_value.function_approximation.ApproximateStateValueEstimator --feature-extractor rlai.core.environments.gymnasium.ContinuousMountainCarFeatureExtractor --function-approximation-model rlai.models.sklearn.SKLearnSGD --no-intercept --loss squared_error --sgd-alpha 0.0 --learning-rate constant --eta0 0.001 --policy rlai.policy_gradient.policies.continuous_action.ContinuousActionBetaDistributionPolicy --policy-feature-extractor rlai.core.environments.gymnasium.ContinuousMountainCarFeatureExtractor --alpha 0.001 --update-upon-every-visit True --plot-policy --save-agent-path ~/Desktop/mountaincar_continuous_agent.pickle --log DEBUG" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
Expand Down
4 changes: 2 additions & 2 deletions src/rlai/core/environments/gymnasium.py
Original file line number Diff line number Diff line change
Expand Up @@ -1206,10 +1206,10 @@ def extract(
"""

# extract raw feature values
scaled_feature_matrix = super().extract(state, refit_scaler)
scaled_feature_vector = np.append(super().extract(state, refit_scaler), 1.0)
interacted_feature_vector = self.state_category_interacter.interact(
np.array([state.observation]),
np.array([scaled_feature_matrix])
np.array([scaled_feature_vector])
)[0]

return interacted_feature_vector
Expand Down
21 changes: 13 additions & 8 deletions src/rlai/policy_gradient/policies/continuous_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,10 @@ def __commit_updates__(
])

# prepend an intercept column
intercept_state_feature_matrix = np.ones(shape=np.add(state_feature_matrix.shape, (0, 1)))
intercept_state_feature_matrix[:, 1:] = state_feature_matrix
if self.fit_itercept:
intercept_state_feature_matrix = np.ones(shape=np.add(state_feature_matrix.shape, (0, 1)))
intercept_state_feature_matrix[:, 1:] = state_feature_matrix
state_feature_matrix = intercept_state_feature_matrix

# invert action values back to [0.0, 1.0] (the domain of the beta distribution), creating one row per action
# taken and one column per action dimension.
Expand All @@ -563,7 +565,7 @@ def __commit_updates__(
) = self.get_action_density_gradients_vmap(
action_i_theta_a,
action_i_theta_b,
intercept_state_feature_matrix,
state_feature_matrix,
action_i_values
)

Expand Down Expand Up @@ -762,23 +764,26 @@ def __getitem__(

self.set_action(state)

intercept_state_features = np.append([1.0], self.feature_extractor.extract(state, True))
state_feature_vector = self.feature_extractor.extract(state, True)

if self.fit_intercept:
state_feature_vector = np.append([1.0], state_feature_vector)

# initialize coefficients for each action's a-shape parameter
if self.action_theta_a is None:
self.action_theta_a = np.zeros(
shape=(self.environment.get_action_space_dimensionality(), intercept_state_features.shape[0])
shape=(self.environment.get_action_space_dimensionality(), state_feature_vector.shape[0])
)

# initialize coefficients for each action's b-shape parameter
if self.action_theta_b is None:
self.action_theta_b = np.zeros(
shape=(self.environment.get_action_space_dimensionality(), intercept_state_features.shape[0])
shape=(self.environment.get_action_space_dimensionality(), state_feature_vector.shape[0])
)

# calculate the modeled shape parameters of each action dimension
action_a = np.exp(self.action_theta_a.dot(intercept_state_features))
action_b = np.exp(self.action_theta_b.dot(intercept_state_features))
action_a = np.exp(self.action_theta_a.dot(state_feature_vector))
action_b = np.exp(self.action_theta_b.dot(state_feature_vector))

try:

Expand Down

0 comments on commit 874576b

Please sign in to comment.