-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSequenceGuessingEnv .cs
81 lines (68 loc) · 1.97 KB
/
SequenceGuessingEnv .cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
using System;
using System.Threading.Tasks;
using OneOf;
using RLMatrix;
public class SequenceGuessingEnv : IEnvironmentAsync<float[]>
{
public int stepCounter { get; set; }
public int maxSteps { get; set; }
public bool isDone { get; set; }
public OneOf<int, (int, int)> stateSize { get; set; }
public int[] actionSize { get; set; }
float state;
int randomLength;
Random random = new Random();
public SequenceGuessingEnv()
{
InitialiseAsync();
}
public Task<float[]> GetCurrentState()
{
if (isDone)
Reset().Wait(); // Reset if done
return Task.FromResult(new float[] { state, stepCounter });
}
public void InitialiseAsync()
{
maxSteps = 50;
isDone = false;
stateSize = 2;
actionSize = new int[] { 5 };
randomLength = random.Next(1, 47);
stepCounter = 1;
state = randomLength;
}
public Task Reset()
{
InitialiseAsync();
return Task.CompletedTask;
}
public Task<(float, bool)> Step(int[] actionsIds)
{
if (isDone)
Reset().Wait(); // Reset if done
stepCounter++;
if (actionsIds[0] == 1)
{
var reward = 100 - 20 * Math.Abs(stepCounter - randomLength);
Console.WriteLine($"Finished with reward {reward} at {stepCounter} steps");
isDone = true;
return Task.FromResult(((float)reward, isDone));
}
else
{
if (stepCounter > 3)
{
state = stepCounter;
}
if (stepCounter >= maxSteps)
{
var reward = -20 * Math.Abs(stepCounter - randomLength);
Console.WriteLine($"Finished with reward {reward} at {stepCounter} steps");
isDone = true;
return Task.FromResult(((float)reward, isDone));
}
return Task.FromResult((0.1f, isDone));
}
}
}