-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel.py
59 lines (48 loc) · 2.17 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import math
import torch
import torch.nn as nn
class PositionalEncoding(nn.Module):
def __init__(self, d_model, max_len=5000):
super(PositionalEncoding, self).__init__()
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)
def forward(self, x):
return x + self.pe[:x.size(0), :]
class Transformer(nn.Module):
def __init__(self, interval, d_input=63, feature_size=64, num_layers=2, dropout=0.1, nhead=4):
super(Transformer, self).__init__()
self.model_type = 'Transformer'
self.interval = interval
self.src_mask = None
self.embedding = nn.Linear(d_input, feature_size)
self.pos_encoder = PositionalEncoding(feature_size)
self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=nhead, dropout=dropout)
self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
self.decoder = nn.Linear(feature_size, d_input)
self.init_weights()
def init_weights(self):
init_range = 0.1
self.decoder.bias.data.zero_()
self.decoder.weight.data.uniform_(-init_range, init_range)
def forward(self, src):
if self.src_mask is None or self.src_mask.size(0) != len(src):
device = src.device
mask = self._generate_exist_frames_mask(src).to(device)
self.src_mask = mask
src = self.embedding(src)
src = self.pos_encoder(src)
output = self.transformer_encoder(src, self.src_mask)
output = self.decoder(output)
return output
def _generate_exist_frames_mask(self, src):
input_window = len(src)
mask = torch.zeros(input_window, input_window)
for frame in range(input_window):
if frame % self.interval != 0:
mask[:, frame] = float('-inf')
return mask