-
Notifications
You must be signed in to change notification settings - Fork 259
/
Copy pathtoyctf-random.py
87 lines (64 loc) · 1.9 KB
/
toyctf-random.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# ---
# jupyter:
# jupytext:
# formats: py:percent,ipynb
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.16.4
# kernelspec:
# display_name: Python 3 (ipykernel)
# language: python
# name: python3
# ---
# %%
# %% [markdown] magic_args="[markdown]"
# Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License.
#
# # Random agent playing the Capture The Flag toy environment
# %%
import sys
import logging
import gymnasium as gym
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s: %(message)s")
# %matplotlib inline
# %% [markdown]
# ### CyberBattle simulation
# - **Environment**: a network of nodes with assigned vulnerabilities/functionalities, value, and firewall configuration
# - **Action space**: local attack | remote attack | authenticated connection
# - **Observation**: effects of action on environment
# %%
from typing import cast
from cyberbattle._env.cyberbattle_env import CyberBattleEnv
_gym_env = gym.make("CyberBattleToyCtf-v0")
gym_env = cast(CyberBattleEnv, _gym_env)
# %%
gym_env.environment
# %%
gym_env.action_space
# %%
gym_env.action_space.sample()
# %% [markdown]
# ## A random agent
# %%
for i_episode in range(1):
observation, _ = gym_env.reset()
total_reward = 0
for t in range(5600):
action = gym_env.sample_valid_action()
observation, reward, done, _, info = gym_env.step(action)
total_reward += reward
if reward > 0:
print("####### rewarded action: {action}")
print(f"total_reward={total_reward} reward={reward}")
gym_env.render()
if done:
print("Episode finished after {} timesteps".format(t + 1))
break
gym_env.render()
gym_env.close()
print("simulation ended")
# %% [markdown]
# ### End of simulation
# %%