Skip to content

Commit

Permalink
[Feature] Linearise reward transform (#2681)
Browse files Browse the repository at this point in the history
Co-authored-by: Louis Faury <louis.faury@helsing.ai>
Co-authored-by: Vincent Moens <vincentmoens@gmail.com>
  • Loading branch information
3 people authored Jan 13, 2025
1 parent ed656a1 commit ff1ff7e
Show file tree
Hide file tree
Showing 6 changed files with 453 additions and 3 deletions.
1 change: 1 addition & 0 deletions docs/source/reference/envs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,7 @@ to be able to create this other composition:
GrayScale
InitTracker
KLRewardTransform
LineariseReward
NoopResetEnv
ObservationNorm
ObservationTransform
Expand Down
331 changes: 330 additions & 1 deletion test/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
from torchrl._utils import _replace_last, prod
from torchrl.data import (
Bounded,
BoundedContinuous,
Categorical,
Composite,
LazyTensorStorage,
Expand All @@ -92,6 +93,7 @@
TensorSpec,
TensorStorage,
Unbounded,
UnboundedContinuous,
)
from torchrl.envs import (
ActionMask,
Expand All @@ -117,6 +119,7 @@
GrayScale,
gSDENoise,
InitTracker,
LineariseRewards,
MultiStepTransform,
NoopResetEnv,
ObservationNorm,
Expand Down Expand Up @@ -412,7 +415,7 @@ def test_transform_rb(self, rbclass):
assert ((sample["reward"] == 0) | (sample["reward"] == 1)).all()

def test_transform_inverse(self):
raise pytest.skip("No inverse for BinerizedReward")
raise pytest.skip("No inverse for BinarizedReward")


class TestClipTransform(TransformBase):
Expand Down Expand Up @@ -12403,6 +12406,332 @@ def test_transform_inverse(self):
pytest.skip("Tested elsewhere")


class TestLineariseRewards(TransformBase):
def test_weight_shape_error(self):
with pytest.raises(
ValueError, match="Expected weights to be a unidimensional tensor"
):
LineariseRewards(in_keys=("reward",), weights=torch.ones(size=(2, 4)))

def test_weight_sign_error(self):
with pytest.raises(ValueError, match="Expected all weights to be >0"):
LineariseRewards(in_keys=("reward",), weights=-torch.ones(size=(2,)))

def test_discrete_spec_error(self):
with pytest.raises(
NotImplementedError,
match="Aggregation of rewards that take discrete values is not supported.",
):
transform = LineariseRewards(in_keys=("reward",))
reward_spec = Categorical(n=2)
transform.transform_reward_spec(reward_spec)

@pytest.mark.parametrize(
"reward_spec",
[
UnboundedContinuous(shape=3),
BoundedContinuous(0, 1, shape=2),
],
)
def test_single_trans_env_check(self, reward_spec: TensorSpec):
env = TransformedEnv(
ContinuousActionVecMockEnv(reward_spec=reward_spec),
LineariseRewards(in_keys=["reward"]), # will use default weights
)
check_env_specs(env)

@pytest.mark.parametrize(
"reward_spec",
[
UnboundedContinuous(shape=3),
BoundedContinuous(0, 1, shape=2),
],
)
def test_serial_trans_env_check(self, reward_spec: TensorSpec):
def make_env():
return TransformedEnv(
ContinuousActionVecMockEnv(reward_spec=reward_spec),
LineariseRewards(in_keys=["reward"]), # will use default weights
)

env = SerialEnv(2, make_env)
check_env_specs(env)

@pytest.mark.parametrize(
"reward_spec",
[
UnboundedContinuous(shape=3),
BoundedContinuous(0, 1, shape=2),
],
)
def test_parallel_trans_env_check(
self, maybe_fork_ParallelEnv, reward_spec: TensorSpec
):
def make_env():
return TransformedEnv(
ContinuousActionVecMockEnv(reward_spec=reward_spec),
LineariseRewards(in_keys=["reward"]), # will use default weights
)

env = maybe_fork_ParallelEnv(2, make_env)
try:
check_env_specs(env)
finally:
try:
env.close()
except RuntimeError:
pass

@pytest.mark.parametrize(
"reward_spec",
[
UnboundedContinuous(shape=3),
BoundedContinuous(0, 1, shape=2),
],
)
def test_trans_serial_env_check(self, reward_spec: TensorSpec):
def make_env():
return ContinuousActionVecMockEnv(reward_spec=reward_spec)

env = TransformedEnv(
SerialEnv(2, make_env), LineariseRewards(in_keys=["reward"])
)
check_env_specs(env)

@pytest.mark.parametrize(
"reward_spec",
[
UnboundedContinuous(shape=3),
BoundedContinuous(0, 1, shape=2),
],
)
def test_trans_parallel_env_check(
self, maybe_fork_ParallelEnv, reward_spec: TensorSpec
):
def make_env():
return ContinuousActionVecMockEnv(reward_spec=reward_spec)

env = TransformedEnv(
maybe_fork_ParallelEnv(2, make_env),
LineariseRewards(in_keys=["reward"]),
)
try:
check_env_specs(env)
finally:
try:
env.close()
except RuntimeError:
pass

@pytest.mark.parametrize("reward_key", [("reward",), ("agents", "reward")])
@pytest.mark.parametrize(
"num_rewards, weights",
[
(1, None),
(3, None),
(2, [1.0, 2.0]),
],
)
def test_transform_no_env(self, reward_key, num_rewards, weights):
out_keys = reward_key[:-1] + ("scalar_reward",)
t = LineariseRewards(in_keys=[reward_key], out_keys=[out_keys], weights=weights)
td = TensorDict({reward_key: torch.randn(num_rewards)}, [])
t._call(td)

weights = torch.ones(num_rewards) if weights is None else torch.tensor(weights)
expected = sum(
w * r
for w, r in zip(
weights,
td[reward_key],
)
)
torch.testing.assert_close(td[out_keys], expected)

@pytest.mark.parametrize("reward_key", [("reward",), ("agents", "reward")])
@pytest.mark.parametrize(
"num_rewards, weights",
[
(1, None),
(3, None),
(2, [1.0, 2.0]),
],
)
def test_transform_compose(self, reward_key, num_rewards, weights):
out_keys = reward_key[:-1] + ("scalar_reward",)
t = Compose(
LineariseRewards(in_keys=[reward_key], out_keys=[out_keys], weights=weights)
)
td = TensorDict({reward_key: torch.randn(num_rewards)}, [])
t._call(td)

weights = torch.ones(num_rewards) if weights is None else torch.tensor(weights)
expected = sum(
w * r
for w, r in zip(
weights,
td[reward_key],
)
)
torch.testing.assert_close(td[out_keys], expected)

class _DummyMultiObjectiveEnv(EnvBase):
"""A dummy multi-objective environment."""

def __init__(self, num_rewards: int) -> None:
super().__init__()
self._num_rewards = num_rewards

self.observation_spec = Composite(
observation=UnboundedContinuous((*self.batch_size, 3))
)
self.action_spec = Categorical(2, (*self.batch_size, 1), dtype=torch.bool)
self.done_spec = Categorical(2, (*self.batch_size, 1), dtype=torch.bool)
self.full_done_spec["truncated"] = self.full_done_spec["terminated"].clone()
self.reward_spec = UnboundedContinuous(*self.batch_size, num_rewards)

def _reset(self, tensordict: TensorDict) -> TensorDict:
return self.observation_spec.sample()

def _step(self, tensordict: TensorDict) -> TensorDict:
done, terminated = False, False
reward = torch.randn((self._num_rewards,))

return TensorDict(
{
("observation"): self.observation_spec["observation"].sample(),
("done"): done,
("terminated"): terminated,
("reward"): reward,
}
)

def _set_seed(self) -> None:
pass

@pytest.mark.parametrize(
"num_rewards, weights",
[
(1, None),
(3, None),
(2, [1.0, 2.0]),
],
)
def test_transform_env(self, num_rewards, weights):
weights = weights if weights is not None else [1.0 for _ in range(num_rewards)]

transform = LineariseRewards(
in_keys=("reward",), out_keys=("scalar_reward",), weights=weights
)
env = TransformedEnv(self._DummyMultiObjectiveEnv(num_rewards), transform)
rollout = env.rollout(10)
scalar_reward = rollout.get(("next", "scalar_reward"))
assert scalar_reward.shape[-1] == 1

expected = sum(
w * r
for w, r in zip(weights, rollout.get(("next", "reward")).split(1, dim=-1))
)
torch.testing.assert_close(scalar_reward, expected)

@pytest.mark.parametrize(
"num_rewards, weights",
[
(1, None),
(3, None),
(2, [1.0, 2.0]),
],
)
def test_transform_model(self, num_rewards, weights):
weights = weights if weights is not None else [1.0 for _ in range(num_rewards)]
transform = LineariseRewards(
in_keys=("reward",), out_keys=("scalar_reward",), weights=weights
)

model = nn.Sequential(transform, nn.Identity())
td = TensorDict({"reward": torch.randn(num_rewards)}, [])
model(td)

expected = sum(w * r for w, r in zip(weights, td["reward"]))
torch.testing.assert_close(td["scalar_reward"], expected)

@pytest.mark.parametrize("rbclass", [ReplayBuffer, TensorDictReplayBuffer])
def test_transform_rb(self, rbclass):
num_rewards = 3
weights = None
transform = LineariseRewards(
in_keys=("reward",), out_keys=("scalar_reward",), weights=weights
)

rb = rbclass(storage=LazyTensorStorage(10))
td = TensorDict({"reward": torch.randn(num_rewards)}, []).expand(10)
rb.append_transform(transform)
rb.extend(td)

td = rb.sample(2)
torch.testing.assert_close(td["scalar_reward"], td["reward"].sum(-1))

def test_transform_inverse(self):
raise pytest.skip("No inverse for LineariseReward")

@pytest.mark.parametrize(
"weights, reward_spec, expected_spec",
[
(None, UnboundedContinuous(shape=3), UnboundedContinuous(shape=1)),
(
None,
BoundedContinuous(0, 1, shape=3),
BoundedContinuous(0, 3, shape=1),
),
(
None,
BoundedContinuous(low=[-1.0, -2.0], high=[1.0, 2.0]),
BoundedContinuous(low=-3.0, high=3.0, shape=1),
),
(
[1.0, 0.0],
BoundedContinuous(
low=[-1.0, -2.0],
high=[1.0, 2.0],
shape=2,
),
BoundedContinuous(low=-1.0, high=1.0, shape=1),
),
],
)
def test_reward_spec(
self,
weights,
reward_spec: TensorSpec,
expected_spec: TensorSpec,
) -> None:
transform = LineariseRewards(in_keys=("reward",), weights=weights)
assert transform.transform_reward_spec(reward_spec) == expected_spec

def test_composite_reward_spec(self) -> None:
weights = None
reward_spec = Composite(
agent_0=Composite(
reward=BoundedContinuous(low=[0, 0, 0], high=[1, 1, 1], shape=3)
),
agent_1=Composite(
reward=BoundedContinuous(
low=[-1, -1, -1],
high=[1, 1, 1],
shape=3,
)
),
)
expected_reward_spec = Composite(
agent_0=Composite(reward=BoundedContinuous(low=0, high=3, shape=1)),
agent_1=Composite(reward=BoundedContinuous(low=-3, high=3, shape=1)),
)
transform = LineariseRewards(
in_keys=[("agent_0", "reward"), ("agent_1", "reward")], weights=weights
)
assert transform.transform_reward_spec(reward_spec) == expected_reward_spec


if __name__ == "__main__":
args, unknown = argparse.ArgumentParser().parse_known_args()
pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
1 change: 1 addition & 0 deletions torchrl/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
Binary,
BinaryDiscreteTensorSpec,
Bounded,
BoundedContinuous,
BoundedTensorSpec,
Categorical,
Composite,
Expand Down
1 change: 1 addition & 0 deletions torchrl/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
gSDENoise,
InitTracker,
KLRewardTransform,
LineariseRewards,
MultiStepTransform,
NoopResetEnv,
ObservationNorm,
Expand Down
1 change: 1 addition & 0 deletions torchrl/envs/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
GrayScale,
gSDENoise,
InitTracker,
LineariseRewards,
NoopResetEnv,
ObservationNorm,
ObservationTransform,
Expand Down
Loading

0 comments on commit ff1ff7e

Please sign in to comment.