-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_model.py
112 lines (86 loc) · 4.48 KB
/
test_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import time
import torch as th
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from constants import *
from model import *
from torch.autograd import Variable
th.manual_seed(RANDOM_SEED)
# start test functions with test_
def test_example():
pass
# It's often useful to change HIDDEN_DIM to 1 for testing the code.
# This test ensures that you haven't forgotten to undo that. And similar things...
def test_constants():
assert(not DISABLE_CUDA)
assert(HIDDEN_DIM == 200)
def test_dcn_model():
# https://discuss.pytorch.org/t/solved-make-sure-that-pytorch-using-gpu-to-compute/4870/2
# Is GPU available:
print ("cuda device count = %d" % th.cuda.device_count())
print ("cuda is available = %d" % th.cuda.is_available())
device = th.device("cuda:0" if th.cuda.is_available() and (not DISABLE_CUDA) else "cpu")
doc = th.randn(BATCH_SIZE, 30, EMBEDDING_DIM, device=device) # Fake word vec dimension set to EMBEDDING_DIM.
que = th.randn(BATCH_SIZE, 5, EMBEDDING_DIM, device=device) # Fake word vec dimension set to EMBEDDING_DIM.
# Fake ground truth data (one batch of starts and ends):
true_s = th.randint(0, doc.size()[1], (BATCH_SIZE,), device=device)
true_e = th.randint(0, doc.size()[1], (BATCH_SIZE,), device=device)
for i in range(BATCH_SIZE):
true_s[i], true_e[i] = min(true_s[i], true_e[i]), max(true_s[i], true_e[i])
# Run model.
model = DCNModel(BATCH_SIZE, device).to(device)
loss, s, e = model.forward(doc, que, true_s, true_e)
print("Predicted start: %s \nPredicted end: %s \nloss: %s" % (str(s), str(e), str(loss)))
model.zero_grad()
loss.backward()
print("%d/%d parameters have non-None gradients." % (len([param for param in model.parameters() if param.grad is not None]), len(list(model.parameters()))))
def test_hmn():
device = th.device("cuda:0" if th.cuda.is_available() and (not DISABLE_CUDA) else "cpu")
hmn = HighwayMaxoutNetwork(BATCH_SIZE, DROPOUT, HIDDEN_DIM, MAXOUT_POOL_SIZE, device).to(device)
u_t = th.ones(BATCH_SIZE, 2 * HIDDEN_DIM, 1, device=device)
h_i = th.ones(BATCH_SIZE, HIDDEN_DIM, 1, device=device)
u_si_m_1, u_ei_m_1 = th.ones(BATCH_SIZE, 2 * HIDDEN_DIM, 1, device=device), th.ones(BATCH_SIZE, 2 * HIDDEN_DIM, 1, device=device) #TODO device
output = hmn.forward(u_t, h_i, u_si_m_1, u_ei_m_1)
# Call backward on a scalar ("output" is of dimension BATCH_SIZE x 1)
assert(output.size()[0] == hmn.batch_size)
assert(output.size()[1] == 1)
th.mean(output).backward()
print("%d/%d parameters have non-None gradients." % (len([param for param in hmn.parameters() if param.grad is not None]), len(list(hmn.parameters()))))
def test_decoder():
max_iter = 10
device = th.device("cuda:0" if th.cuda.is_available() and (not DISABLE_CUDA) else "cpu")
dpd = DynamicPointerDecoder(BATCH_SIZE, max_iter, DROPOUT, DROPOUT, HIDDEN_DIM, MAXOUT_POOL_SIZE, device).to(device)
U = th.ones(BATCH_SIZE, 2 * HIDDEN_DIM, 50, device=device)
alphas, betas, s, e = dpd.forward(U)
loss = th.mean(th.mean(alphas, dim=0)) + th.mean(th.mean(betas, dim=0))
dpd.zero_grad()
loss.backward()
print(loss)
# Optimiser.
def test_optimiser():
# Is GPU available:
print ("cuda device count = %d" % th.cuda.device_count())
print ("cuda is available = %d" % th.cuda.is_available())
device = th.device("cuda:0" if th.cuda.is_available() and (not DISABLE_CUDA) else "cpu")
# Fake one batch of data
doc = th.randn(BATCH_SIZE, 30, EMBEDDING_DIM, device=device) # Fake word vec dimension set to EMBEDDING_DIM.
que = th.randn(BATCH_SIZE, 5, EMBEDDING_DIM, device=device) # Fake word vec dimension set to EMBEDDING_DIM.
# Fake one batch of ground truth
true_s = th.randint(0, doc.size()[1], (BATCH_SIZE,), device=device)
true_e = th.randint(0, doc.size()[1], (BATCH_SIZE,), device=device)
for i in range(BATCH_SIZE):
true_s[i], true_e[i] = min(true_s[i], true_e[i]), max(true_s[i], true_e[i])
model = DCNModel(BATCH_SIZE, device).to(device)
optimizer = optim.Adam(model.parameters())
N_STEPS = 3
loss_values_over_steps = []
for step_it in range(N_STEPS):
optimizer.zero_grad()
loss, _, _ = model(doc, que, true_s, true_e)
loss.backward()
optimizer.step()
loss_values_over_steps.append(loss[0])
print("Loss after %d steps: %f" %(step_it+1, loss[0]))
assert(loss_values_over_steps[0] >= loss_values_over_steps[-1])
print("Optimizer finished.")