-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpublic_tests.py
79 lines (60 loc) · 3.46 KB
/
public_tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from tensorflow.keras.activations import relu, linear
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
def test_network(target):
num_actions = 4
state_size = 8
i = 0
assert len(target.layers) == 3, f"Wrong number of layers. Expected 3 but got {len(target.layers)}"
assert list(target.input_shape) == [None, state_size], \
f"Wrong input shape. Expected [None, 400] but got {list(target.input_shape)}"
expected = [[Dense, [None, 64], relu],
[Dense, [None, 64], relu],
[Dense, [None, num_actions], linear]]
for layer in target.layers:
assert type(layer) == expected[i][0], \
f"Wrong type in layer {i}. Expected {expected[i][0]} but got {type(layer)}"
assert list(layer.output.shape) == expected[i][1], \
f"Wrong number of units in layer {i}. Expected {expected[i][1]} but got {list(layer.output.shape)}"
assert layer.activation == expected[i][2], \
f"Wrong activation in layer {i}. Expected {expected[i][2]} but got {layer.activation}"
i = i + 1
print("\033[92mAll tests passed!")
def test_optimizer(target, ALPHA):
assert type(target) == Adam, f"Wrong optimizer. Expected: {Adam}, got: {target}"
assert np.isclose(target.learning_rate.numpy(), ALPHA), f"Wrong alpha. Expected: {ALPHA}, got: {target.learning_rate.numpy()}"
print("\033[92mAll tests passed!")
def test_compute_loss(target):
num_actions = 4
def target_q_network_random(inputs):
return np.float32(np.random.rand(inputs.shape[0],num_actions))
def q_network_random(inputs):
return np.float32(np.random.rand(inputs.shape[0],num_actions))
def target_q_network_ones(inputs):
return np.float32(np.ones((inputs.shape[0], num_actions)))
def q_network_ones(inputs):
return np.float32(np.ones((inputs.shape[0], num_actions)))
np.random.seed(1)
states = np.float32(np.random.rand(64, 8))
actions = np.float32(np.floor(np.random.uniform(0, 1, (64, )) * 4))
rewards = np.float32(np.random.rand(64, ))
next_states = np.float32(np.random.rand(64, 8))
done_vals = np.float32((np.random.uniform(0, 1, size=(64,)) > 0.96) * 1)
loss = target((states, actions, rewards, next_states, done_vals), 0.995, q_network_random, target_q_network_random)
assert np.isclose(loss, 0.6991737), f"Wrong value. Expected {0.6991737}, got {loss}"
# Test when episode terminates
done_vals = np.float32(np.ones((64,)))
loss = target((states, actions, rewards, next_states, done_vals), 0.995, q_network_ones, target_q_network_ones)
assert np.isclose(loss, 0.343270182), f"Wrong value. Expected {0.343270182}, got {loss}"
# Test MSE with parameters A = B
done_vals = np.float32((np.random.uniform(0, 1, size=(64,)) > 0.96) * 1)
rewards = np.float32(np.ones((64, )))
loss = target((states, actions, rewards, next_states, done_vals), 0, q_network_ones, target_q_network_ones)
assert np.isclose(loss, 0), f"Wrong value. Expected {0}, got {loss}"
# Test MSE with parameters A = 0 and B = 1
done_vals = np.float32((np.random.uniform(0, 1, size=(64,)) > 0.96) * 1)
rewards = np.float32(np.zeros((64, )))
loss = target((states, actions, rewards, next_states, done_vals), 0, q_network_ones, target_q_network_ones)
assert np.isclose(loss, 1), f"Wrong value. Expected {1}, got {loss}"
print("\033[92mAll tests passed!")