-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutilities.py
90 lines (76 loc) · 3.35 KB
/
utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from config import config
import torch
import numpy as np
from torch.nn import functional as f
import os
import subprocess
from config import remote
from torch.autograd import Variable
import copy
def get_prior(shape):
return Variable(torch.randn(*shape) * 1.).to(config["train"]["device"]) # single gaussian
class Batch:
"Object for holding a batch of data with mask during training."
def __init__(self, src, trg=None, pad=0):
self.src = src
self.src_mask = (src != pad).unsqueeze(-2) # TODO MASK
# self.src_mask = (src != pad)
# self.src_mask = self.src_mask[:, :, :, None] * self.src_mask[:, :, None, :]
if trg is not None:
self.trg = trg[..., :-1]
self.trg_y = trg[..., 1:]
self.trg_mask = \
self.make_std_mask(self.trg, pad)
self.ntokens = (self.trg_y != pad).data.sum()
@staticmethod
def make_std_mask(tgt, pad):
"Create a mask to hide padding and future words."
tgt_mask = (tgt != pad).unsqueeze(-2)
tgt_mask = tgt_mask & Variable(
subsequent_mask(tgt.size(-1)).type_as(tgt_mask.data))
return tgt_mask
def subsequent_mask(size):
"Mask out subsequent positions."
attn_shape = (1, size, size)
sm = np.triu(np.ones(attn_shape), k=1).astype('uint8')
return torch.from_numpy(sm) == 0
def min_max_scaling(value, old_interval, new_interval):
"""
It scales a value with range [mn, mx] into a int value with range [a, b]
"""
mn, mx = old_interval
a, b = new_interval
return round((((value - mn) * (b - a)) / (mx - mn)) + a)
def midi_to_wav(input_file, output_file):
"""
- Manual is available in
https://github.com/FluidSynth/fluidsynth/wiki/UserManual
- Sound font can be downloaded from
http://timtechsoftware.com/ad.html?keyword=sf2%20format?file_name=the%20General%20MIDI%20Soundfont?file_url=uploads/
GeneralUser_GS_SoftSynth_v144.sf2
- To install FluidSync for windows, download the executable, for unix use conda
"""
subprocess.call(["fluidsynth" if remote else os.path.join("fl", "bin", "fluidsynth"),
"-F", output_file,
# "-i", "-n", "-T", "wav", # those seems to be useless
# "-q", # activate quiet mode
"-r", "8000",
# "-T", "raw", # audio type
"sound_font.sf2" if remote else os.path.join("fl", "sound_font.sf2"), # a sound font
input_file])
# def create_trg_mask(trg):
# trg_mask = np.full(trg.shape + (trg.shape[-1],), True)
# for i in range(trg.shape[0]):
# for b in range(trg.shape[1]):
# line_mask = trg[i][b] != config["tokens"]["pad"]
#
# eos_index = np.argmax(trg[i][b] == config["tokens"]["sos"])
# if eos_index != 0 and eos_index < len(line_mask) - 1:
# line_mask[(eos_index+1):] = config["tokens"]["pad"]
#
# pad_mask = np.matmul(line_mask[:, np.newaxis], line_mask[np.newaxis, :])
# subsequent_mask = np.expand_dims(np.tril(np.ones((trg.shape[-1], trg.shape[-1]))), (0, 1))
# subsequent_mask = subsequent_mask.astype(np.bool)
# trg_mask[i][b] = pad_mask & subsequent_mask
# trg_mask = torch.BoolTensor(trg_mask).to(config["train"]["device"])
# return trg_mask