Skip to content

Commit

Permalink
Cleaned up reacher Td3 and removed reacher DDPG due to slow/poor conv…
Browse files Browse the repository at this point in the history
…ergence
  • Loading branch information
lajd committed Sep 27, 2020
1 parent 16ef5db commit 96ad6b1
Show file tree
Hide file tree
Showing 21 changed files with 113 additions and 729 deletions.
31 changes: 26 additions & 5 deletions agents/models/components/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,38 +17,57 @@ def __init__(
output_layer_initialization_fn: Optional[Callable] = None,
with_batchnorm: bool = False
):
"""
:param layer_sizes: Size for each linear layer
:param activation_function: Activation between layers
:param output_function: Any output torch.nn.Module to be applied at the head
:param dropout: Dropout for linear layers
:param seed: Random seed
:param hidden_layer_initialization_fn: How to initialize hidden linear layers
:param output_layer_initialization_fn: How to initialize the last layer of the MLP defined my layer_sizes
:param with_batchnorm: Apply batchnorm between linear layers
Order is always (input_bn)->FC->BN->Activation->Dropout->FC
"""
super().__init__()

if len(layer_sizes) < 2:
raise ValueError("Must provide at least 2 layer sizes")
if seed:
self.set_seed(seed)

mlp_layers = torch.nn.ModuleList([])

# Apply batchnorm to inputs of each layer
# Input BN
if with_batchnorm:
mlp_layers.append(torch.nn.BatchNorm1d(layer_sizes[0]))

# HL 1
first_layer = torch.nn.Linear(layer_sizes[0], layer_sizes[1])
if hidden_layer_initialization_fn:
first_layer.weight.data.uniform_(*hidden_layer_initialization_fn(first_layer))

mlp_layers.append(first_layer)

if len(layer_sizes) == 2:
mlp_layers.append(activation_function)

# HL 2-N
previous_output = layer_sizes[1]
for n_out in layer_sizes[2:]:
# BN
if with_batchnorm:
mlp_layers.append(torch.nn.BatchNorm1d(previous_output))

# Activation
mlp_layers.append(activation_function)

# Dropout
if dropout:
mlp_layers.append(torch.nn.Dropout(dropout))

# Next FC
next_layer = torch.nn.Linear(previous_output, n_out)
if hidden_layer_initialization_fn:
next_layer.weight.data.uniform_(*hidden_layer_initialization_fn(next_layer))

mlp_layers.append(next_layer)

previous_output = n_out
Expand All @@ -57,9 +76,11 @@ def __init__(
mlp_layers[-1].weight.data.uniform_(*output_layer_initialization_fn(mlp_layers[-1]))
mlp_layers[-1].bias.data.uniform_(*output_layer_initialization_fn(mlp_layers[-1]))

# Apply output function -- Can be an Activation or a module
if output_function:
mlp_layers.append(output_function)

# Stack
self.mlp_layers = torch.nn.Sequential(*mlp_layers)

def forward(self, x: torch.FloatTensor) -> torch.Tensor:
Expand Down
1 change: 1 addition & 0 deletions agents/policies/ddpg_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def get_actions_():
action = np.clip(action, self.action_range[0], self.action_range[1])
else:
raise ValueError('Must provide either epsilon_scheduler or noise')

return Action(value=action)

def get_random_action(self, *args) -> Action:
Expand Down
5 changes: 3 additions & 2 deletions agents/ppo_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ def process_trajectory(self):
values = torch.cat(values).detach()
states = torch.cat(states)
actions = torch.cat(actions)
joint_states = torch.cat(joint_states) if len(joint_states) > 1 else None
joint_actions = torch.cat(joint_actions) if len(joint_actions) > 1 else None
joint_states = torch.cat(joint_states) if joint_states[0] is not None else joint_states
joint_actions = torch.cat(joint_actions) if joint_actions[0] is not None else joint_actions

advantage = returns - values

Expand Down Expand Up @@ -239,6 +239,7 @@ def step_episode(self, episode: int, *args, **kwargs):
self.process_trajectory()
if len(self.current_trajectory_memory) >= self.batch_size * self.min_batches_for_training:
for _ in range(self.num_learning_updates):
print('learning')
for sampled_states, sampled_actions, sampled_log_probs, sampled_returns, sampled_advantages, _, _ in self.current_trajectory_memory.sample(self.batch_size):
self._learn(sampled_log_probs, sampled_states, sampled_actions, sampled_advantages, sampled_returns)
self.current_trajectory_memory.reset()
Expand Down
14 changes: 14 additions & 0 deletions simulation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@ def default_step_episode_agents_fn(brain_set: BrainSet, episode_number: int):
agent.step_episode(episode_number)


def single_agent_step_agents_fn(brain_set: BrainSet, next_brain_environment: dict, t: int):
for brain_name, brain_environment in next_brain_environment.items():
agent = brain_set[brain_name].agents[0]
brain_agent_experience = Experience(
state=brain_environment['states'],
action=brain_environment['actions'][0],
reward=brain_environment['rewards'],
next_state=brain_environment['next_states'],
done=torch.LongTensor(brain_environment['dones']),
t_step=t,
)
agent.step(brain_agent_experience)


def default_preprocess_brain_actions_for_env_fn(brain_actions: Dict[str, List[Action]]) -> Dict[str, List[Action]]:

assert len(brain_actions) > 0 and isinstance(list(brain_actions.values())[0][0], Action), brain_actions
Expand Down

This file was deleted.

4 changes: 0 additions & 4 deletions tasks/crawler/solutions/ddpg/__init__.py

This file was deleted.

15 changes: 0 additions & 15 deletions tasks/crawler/solutions/ddpg/eval_td3.py

This file was deleted.

228 changes: 0 additions & 228 deletions tasks/crawler/solutions/ddpg/train_td3.py

This file was deleted.

Loading

0 comments on commit 96ad6b1

Please sign in to comment.