Skip to content

Commit f138c78

Browse files
committed
Add L-BFGS optimization option and allow setting layer weights
1 parent ef638ee commit f138c78

File tree

2 files changed

+83
-55
lines changed

2 files changed

+83
-55
lines changed

README.md

+5-4
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,13 @@ The optimization of the generated image is performed on GPU. On a 2014 MacBook P
2929

3030
Other options:
3131

32-
- `num_iters`: Number of optimization steps.
32+
- `num_iters`: Number of optimization steps. Default is 500.
3333
- `size`: Long edge dimension of the generated image. Set to 0 to use the size of the content image. Default is 500.
34-
- `nodisplay`: Suppress image display during optimization.
35-
- `smoothness`: Constant that controls smoothness of generated image (total variation norm regularization strength). Default is 7.5E-3.
34+
- `display_interval`: Number of iterations between image displays. Set to 0 to suppress image display. Default is 20.
35+
- `smoothness`: Constant that controls smoothness of generated image (total variation norm regularization strength). Default is 6E-3.
3636
- `init`: {image, random}. Initialization mode for optimized image. `image` initializes with the content image; `random` initializes with random Gaussian noise. Default is `image`.
3737
- `backend`: {cunn, cudnn}. Neural network CUDA backend. `cudnn` requires the [Torch bindings](https://github.com/soumith/cudnn.torch/tree/R3) for CuDNN R3.
38+
- `optimizer`: {sgd, lbfgs}. Optimization algorithm. `lbfgs` is slower per iteration and consumes more memory, but may yield better results. Default is `sgd`.
3839

3940
## Examples
4041

@@ -64,7 +65,7 @@ The outputs of the following layers are used to optimize for style: `conv1/7x7_s
6465

6566
The outputs of the following layers are used to optimize for content: `inception_3a`, `inception_4a`.
6667

67-
Optimization of the generated image is performed using gradient descent with momentum of 0.9. The learning rate is decayed exponentially by 0.75 every 100 iterations.
68+
By default, optimization of the generated image is performed using gradient descent with momentum of 0.9. The learning rate is decayed exponentially by 0.75 every 100 iterations. L-BFGS can also be used.
6869

6970
By default, the optimized image is initialized using the content image; the implementation also works with white noise initialization, as described in the paper.
7071

main.lua

+78-51
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,16 @@ cmd:text()
2323
cmd:text('A Neural Algorithm of Artistic Style')
2424
cmd:text()
2525
cmd:text('Options:')
26-
cmd:option('--style', 'none', 'Path to style image')
27-
cmd:option('--content', 'none', 'Path to content image')
28-
cmd:option('--style_factor', 5e9, 'Trade-off factor between style and content')
29-
cmd:option('--num_iters', 500, 'Number of iterations')
30-
cmd:option('--size', 500, 'Length of image long edge (0 to use original content size)')
31-
cmd:option('--nodisplay', false, 'Whether to skip image display during optimization')
32-
cmd:option('--smoothness', 7.5e-3, 'Total variation norm regularization strength (higher for smoother output)')
33-
cmd:option('--init', 'image', '{image, random}. Initialization mode for optimized image.')
34-
cmd:option('--backend', 'cunn', '{cunn, cudnn}. Neural network CUDA backend.')
26+
cmd:option('--style', 'none', 'Path to style image')
27+
cmd:option('--content', 'none', 'Path to content image')
28+
cmd:option('--style_factor', 5e9, 'Trade-off factor between style and content')
29+
cmd:option('--num_iters', 500, 'Number of iterations')
30+
cmd:option('--size', 500, 'Length of image long edge (0 to use original content size)')
31+
cmd:option('--display_interval', 20, 'Iterations between image displays (0 to suppress display)')
32+
cmd:option('--smoothness', 6e-3, 'Total variation norm regularization strength (higher for smoother output)')
33+
cmd:option('--init', 'image', '{image, random}. Initialization mode for optimized image.')
34+
cmd:option('--backend', 'cunn', '{cunn, cudnn}. Neural network CUDA backend.')
35+
cmd:option('--optimizer', 'sgd', '{sgd, lbfgs}. Optimization algorithm.')
3536
local opt = cmd:parse(arg)
3637
if opt.size <= 0 then
3738
opt.size = nil
@@ -116,40 +117,50 @@ end
116117
local model = create_model('inception_caffe.th', opt.backend)
117118
collectgarbage()
118119

119-
local style_layers = {
120-
'conv1/7x7_s2',
121-
'conv2/3x3',
122-
'inception_3a',
123-
'inception_3b',
124-
'inception_4a',
125-
'inception_4b',
126-
'inception_4c',
127-
'inception_4d',
128-
'inception_4e',
120+
-- choose style and content layers
121+
local style_weights = {
122+
['conv1/7x7_s2'] = 1,
123+
['conv2/3x3'] = 1,
124+
['inception_3a'] = 1,
125+
['inception_3b'] = 1,
126+
['inception_4a'] = 1,
127+
['inception_4b'] = 1,
128+
['inception_4c'] = 1,
129+
['inception_4d'] = 1,
130+
['inception_4e'] = 1,
129131
}
130132

131-
local content_layers = {
132-
'inception_3a',
133-
'inception_4a',
133+
local content_weights = {
134+
['inception_3a'] = 1,
135+
['inception_4a'] = 1,
134136
}
135137

136-
local style_index, content_index = {}, {}
137-
for i, name in ipairs(style_layers) do style_index[name] = true end
138-
for i, name in ipairs(content_layers) do content_index[name] = true end
138+
-- compute normalization factor
139+
local style_weight_sum = 0
140+
local content_weight_sum = 0
141+
for k, v in pairs(style_weights) do
142+
style_weight_sum = style_weight_sum + v
143+
end
139144

145+
for k, v in pairs(content_weights) do
146+
content_weight_sum = content_weight_sum + v
147+
end
140148

141149
-- load content image
142150
local img = preprocess(image.load(opt.content), opt.size):cuda()
143151
model:forward(img)
144-
local img_activations, _ = collect_activations(model, content_index, {})
152+
local img_activations, _ = collect_activations(model, content_weights, {})
145153

146154
-- load style image
147-
local art = preprocess(image.load(opt.style), math.max(img:size(3), img:size(4))):cuda()
155+
local art = preprocess(
156+
image.load(opt.style), math.max(img:size(3), img:size(4))
157+
):cuda()
148158
model:forward(art)
149-
local _, art_grams = collect_activations(model, {}, style_index)
159+
local _, art_grams = collect_activations(model, {}, style_weights)
160+
art = nil
161+
collectgarbage()
150162

151163
function opfunc(input)
152-
153164
-- forward prop
154165
model:forward(input)
155166

@@ -162,35 +173,31 @@ function opfunc(input)
162173
local name = module._name
163174

164175
-- add content gradient
165-
if name and content_index[name] then
176+
if name and content_weights[name] then
166177
local c_loss, c_grad = content_grad(module.output, img_activations[name])
167-
--printf('[content]\t%s\t%.2e\n', name, c_loss)
168-
loss = loss + c_loss / #content_layers
169-
grad:add(1 / #content_layers, c_grad)
178+
local w = content_weights[name] / content_weight_sum
179+
--printf('[content]\t%s\t%.2e\n', name, w * c_loss)
180+
loss = loss + w * c_loss
181+
grad:add(w, c_grad)
170182
end
171183

172184
-- add style gradient
173-
if name and style_index[name] then
185+
if name and style_weights[name] then
174186
local s_loss, s_grad = style_grad(module.output, art_grams[name])
175-
--printf('[style]\t%s\t%.2e\n', name, s_loss)
176-
loss = loss + opt.style_factor * s_loss / #style_layers
177-
grad:add(opt.style_factor / #style_layers, s_grad)
187+
local w = opt.style_factor * style_weights[name] / style_weight_sum
188+
--printf('[style]\t%s\t%.2e\n', name, w * s_loss)
189+
loss = loss + w * s_loss
190+
grad:add(w, s_grad)
178191
end
179192
grad = module:backward(module_input, grad)
180193
end
181194

182195
-- total variation regularization for denoising
183196
grad:add(total_var_grad(input):mul(opt.smoothness))
184-
return loss, grad
197+
return loss, grad:view(-1)
185198
end
186199

187-
local optim_state = {
188-
learningRate = 0.1,
189-
momentum = 0.9,
190-
dampening = 0.0,
191-
}
192-
193-
-- optimized image
200+
-- image to optimize
194201
local input
195202
if opt.init == 'image' then
196203
input = img
@@ -202,10 +209,9 @@ else
202209
error('unrecognized initialization option: ' .. opt.init)
203210
end
204211

205-
-- optimize
206212
local timer = torch.Timer()
207213
local output = depreprocess(input):double()
208-
if not opt.nodisplay then
214+
if opt.display_interval > 0 then
209215
image.display(output)
210216
end
211217

@@ -215,10 +221,31 @@ if not paths.dirp(frames_dir) then
215221
paths.mkdir(frames_dir)
216222
end
217223
image.save(paths.concat(frames_dir, '0.jpg'), output)
224+
225+
-- set optimizer options
226+
local optim_state
227+
if opt.optimizer == 'sgd' then
228+
optim_state = {
229+
learningRate = 0.1,
230+
momentum = 0.9,
231+
dampening = 0.0,
232+
}
233+
elseif opt.optimizer == 'lbfgs' then
234+
optim_state = {
235+
maxIter = 3,
236+
learningRate = 1,
237+
}
238+
else
239+
error('unknown optimizer: ' .. opt.optimizer)
240+
end
241+
242+
-- optimize
218243
for i = 1, opt.num_iters do
219-
local _, loss = optim.sgd(opfunc, input, optim_state)
244+
local _, loss = optim[opt.optimizer](opfunc, input, optim_state)
220245
loss = loss[1]
221-
if i % 100 == 0 then
246+
247+
-- anneal learning rate
248+
if opt.optimizer == 'sgd' and i % 100 == 0 then
222249
optim_state.learningRate = 0.75 * optim_state.learningRate
223250
end
224251

@@ -229,14 +256,14 @@ for i = 1, opt.num_iters do
229256

230257
if i <= 20 or i % 5 == 0 then
231258
output = depreprocess(input):double()
232-
if not opt.nodisplay and i % 50 == 0 then
259+
if opt.display_interval > 0 and i % opt.display_interval == 0 then
233260
image.display(output)
234261
end
235262
image.save(paths.concat(frames_dir, i .. '.jpg'), output)
236263
end
237264
end
238265

239266
output = depreprocess(input)
240-
if not opt.nodisplay then
267+
if opt.display_interval > 0 then
241268
image.display(output)
242269
end

0 commit comments

Comments
 (0)