-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathpy12transforms.py
138 lines (96 loc) · 3.42 KB
/
py12transforms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import torch
import random
def crop(vid, i, j, h, w):
return vid[..., i:(i + h), j:(j + w)]
def center_crop(vid, output_size):
h, w = vid.shape[-2:]
th, tw = output_size
i = int(round((h - th) / 2.))
j = int(round((w - tw) / 2.))
return crop(vid, i, j, th, tw)
def hflip(vid):
return vid.flip(dims=(-1,))
# NOTE: for those functions, which generally expect mini-batches, we keep them
# as non-minibatch so that they are applied as if they were 4d (thus image).
# this way, we only apply the transformation in the spatial domain
def resize(vid, size, interpolation='bilinear'):
# NOTE: using bilinear interpolation because we don't work on minibatches
# at this level
scale = None
if isinstance(size, int):
scale = float(size) / min(vid.shape[-2:])
size = None
return torch.nn.functional.interpolate(
vid, size=size, scale_factor=scale, mode=interpolation, align_corners=False)
def pad(vid, padding, fill=0, padding_mode="constant"):
# NOTE: don't want to pad on temporal dimension, so let as non-batch
# (4d) before padding. This works as expected
return torch.nn.functional.pad(vid, padding, value=fill, mode=padding_mode)
def to_normalized_float_tensor(vid):
return vid.permute(3, 0, 1, 2).to(torch.float32) / 255
def normalize(vid, mean, std):
shape = (-1,) + (1,) * (vid.dim() - 1)
mean = torch.as_tensor(mean).reshape(shape)
std = torch.as_tensor(std).reshape(shape)
return (vid - mean) / std
def unnormalize(vid, mean, std):
shape = (-1,) + (1,) * (vid.dim() - 1)
mean = torch.as_tensor(mean).reshape(shape)
std = torch.as_tensor(std).reshape(shape)
return (vid * std) + mean
# Class interface
class RandomCrop(object):
def __init__(self, size):
self.size = size
@staticmethod
def get_params(vid, output_size):
"""Get parameters for ``crop`` for a random crop.
"""
h, w = vid.shape[-2:]
th, tw = output_size
if w == tw and h == th:
return 0, 0, h, w
i = random.randint(0, h - th)
j = random.randint(0, w - tw)
return i, j, th, tw
def __call__(self, vid):
i, j, h, w = self.get_params(vid, self.size)
return crop(vid, i, j, h, w)
class CenterCrop(object):
def __init__(self, size):
self.size = size
def __call__(self, vid):
return center_crop(vid, self.size)
class Resize(object):
def __init__(self, size):
self.size = size
def __call__(self, vid):
return resize(vid, self.size)
class ToFloatTensorInZeroOne(object):
def __call__(self, vid):
return to_normalized_float_tensor(vid)
class Normalize(object):
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, vid):
return normalize(vid, self.mean, self.std)
class Unnormalize(object):
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, vid):
return unnormalize(vid, self.mean, self.std)
class RandomHorizontalFlip(object):
def __init__(self, p=0.5):
self.p = p
def __call__(self, vid):
if random.random() < self.p:
return hflip(vid)
return vid
class Pad(object):
def __init__(self, padding, fill=0):
self.padding = padding
self.fill = fill
def __call__(self, vid):
return pad(vid, self.padding, self.fill)