Skip to content

Commit b1d6550

Browse files
authored
Merge pull request #1334 from laurilaatu/oneapihgq
Initial HGQ support for oneAPI
1 parent b9ab84c commit b1d6550

File tree

10 files changed

+173
-8
lines changed

10 files changed

+173
-8
lines changed
File renamed without changes.

hls4ml/backends/fpga/fpga_types.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ def definition_cpp(self):
7575
self._saturation_mode_cpp(self.saturation_mode),
7676
self.saturation_bits,
7777
]
78+
if args[0] == 1:
79+
# Currently oneAPI ac_fixed requires at least two bits for both signed and unsigned cases
80+
# Should be fixed in the future once oneAPI supports 1-bit unsigned ac_fixed
81+
args[0] = 2
7882
if args[3] == 'AC_TRN' and args[4] == 'AC_WRAP':
7983
# This is the default, so we won't write the full definition for brevity
8084
args[3] = args[4] = None

hls4ml/backends/fpga/passes/hgq_proxy_model.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ def to_apfixed(k, b, i, RND, SAT):
1515

1616
def to_acfixed(k, b, i, RND, SAT):
1717
k = 'false' if k == 0 else 'true'
18+
if b == 1:
19+
# Currently oneAPI ac_fixed requires at least two bits for both signed and unsigned cases
20+
# Should be fixed in the future once oneAPI supports 1-bit unsigned ac_fixed
21+
b = 2
1822
return f'ac_fixed<{b},{i},{k},AC_{RND},AC_{SAT}>'
1923

2024

@@ -23,23 +27,26 @@ def generate_mask_fn(
2327
) -> str:
2428
"""Generate heterogenous quantization mask function, ONLY works for IOType=io_parallel"""
2529
assert k.shape[0] == b.shape[0] == i.shape[0] == 1
26-
assert backend.lower() in ('quartus', 'vivado', 'vitis'), f'Backend {backend} not tested'
30+
assert backend.lower() in ('oneapi', 'quartus', 'vivado', 'vitis'), f'Backend {backend} not tested'
2731
Ks, Bs, Is = k[0], b[0], i[0]
2832
Ks, Bs, Is = np.broadcast_to(Ks, shape), np.broadcast_to(Bs, shape), np.broadcast_to(Is, shape)
2933
Ks, Bs, Is = Ks.ravel(), Bs.ravel(), Is.ravel()
3034
masks = []
31-
to_fixed = to_acfixed if backend.lower() == 'quartus' else to_apfixed
35+
to_fixed = to_acfixed if backend.lower() in ['oneapi', 'quartus'] else to_apfixed
3236
for idx, (k, b, i) in enumerate(zip(Ks, Bs, Is)):
3337
if b == 0:
3438
fn = f'out[{idx}] = 0;'
3539
else:
3640
fn = f'out[{idx}] = {to_fixed(k, b, i, RND, SAT)}(inp[{idx}]);'
3741
masks.append(f' {fn}')
3842
body = "\n".join(masks)
43+
arguments = (
44+
'input_t *inp, output_t *out' if backend.lower() not in ['oneapi', 'quartus'] else 'input_t &inp, output_t &out'
45+
)
3946
mask_fn = f'''
4047
template<typename input_t, typename output_t>
41-
void {name}(input_t *inp, output_t *out) {{
42-
#pragma HLS INLINE
48+
void {name}({arguments}) {{
49+
{'#pragma HLS INLINE' if backend.lower() not in ['oneapi', 'quartus'] else ''}
4350
4451
{body}
4552
}}

hls4ml/backends/oneapi/oneapi_backend.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ def _register_flows(self):
6868
'oneapi:quantize_dense_output',
6969
'fuse_consecutive_batch_normalization',
7070
'oneapi:xnor_pooling',
71-
'oneapi:generate_conv_im2col',
7271
]
7372
quantization_flow = register_flow('quantization', quantization_passes, requires=[init_flow], backend=self.name)
7473

@@ -79,6 +78,7 @@ def _register_flows(self):
7978
'oneapi:skip_softmax',
8079
'oneapi:fix_softmax_table_size',
8180
'infer_precision_types',
81+
'oneapi:process_fixed_point_quantizer_layer',
8282
]
8383
optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
8484

@@ -104,7 +104,6 @@ def _register_flows(self):
104104
+ optimization_passes
105105
+ writer_passes
106106
+ ['oneapi:inplace_stream_flatten', 'oneapi:reshape_stream'] # not needed
107-
+ ['oneapi:process_fixed_point_quantizer_layer'] # not yet supported
108107
]
109108

110109
if len(extras) > 0:

hls4ml/backends/quartus/quartus_backend.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ def _register_flows(self):
6161
'quartus:transform_types',
6262
'quartus:register_bram_weights',
6363
'quartus:apply_resource_strategy',
64-
'quartus:generate_conv_im2col',
6564
'quartus:apply_winograd_kernel_transformation',
6665
]
6766
quartus_types_flow = register_flow('specific_types', quartus_types, requires=[init_flow], backend=self.name)
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
from hls4ml.model.layers import Conv1D, Conv2D, SeparableConv1D, SeparableConv2D
2+
from hls4ml.model.optimizer import OptimizerPass
3+
from hls4ml.model.types import Source
4+
5+
6+
class GenerateConvIm2col(OptimizerPass):
7+
'''Generates tcode for im2col step of 1D/2d convolution'''
8+
9+
# Note, DepthwizeConv1D/2D also matches because it inherits from Conv1D/2D
10+
def match(self, node):
11+
return (
12+
isinstance(node, (Conv1D, Conv2D, SeparableConv1D, SeparableConv2D))
13+
and node.model.config.get_config_value('IOType') == 'io_parallel'
14+
)
15+
16+
def transform(self, model, node):
17+
node_class = node.class_name
18+
if 'Separable' in node_class:
19+
if '1D' in node_class:
20+
self._generate_separable_im2col_1d(node)
21+
elif '2D' in node_class:
22+
self._generate_separable_im2col_2d(node)
23+
else:
24+
raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
25+
else:
26+
if '1D' in node_class:
27+
self._generate_im2col_1d(node)
28+
elif '2D' in node_class:
29+
self._generate_im2col_2d(node)
30+
else:
31+
raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
32+
33+
def _generate_im2col_1d(self, node):
34+
code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
35+
node.get_attr('index'),
36+
node.get_attr('n_partitions'),
37+
node.get_input_variable().shape[0],
38+
node.get_input_variable().shape[1],
39+
kernel=node.get_attr('filt_width'),
40+
stride=node.get_attr('stride_width'),
41+
pad=(node.get_attr('pad_left'), node.get_attr('pad_right')),
42+
)
43+
44+
node.set_attr('line_buffer_codegen', Source(code_str))
45+
46+
def _generate_im2col_2d(self, node):
47+
code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
48+
node.get_attr('index'),
49+
node.get_attr('n_partitions'),
50+
node.get_input_variable().shape[0],
51+
node.get_input_variable().shape[1],
52+
node.get_input_variable().shape[2],
53+
kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
54+
stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
55+
pad=(
56+
node.get_attr('pad_top'),
57+
node.get_attr('pad_bottom'),
58+
node.get_attr('pad_left'),
59+
node.get_attr('pad_right'),
60+
),
61+
)
62+
63+
node.set_attr('line_buffer_codegen', Source(code_str))
64+
65+
def _generate_separable_im2col_1d(self, node):
66+
dw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
67+
str(node.get_attr('index')) + '_dw',
68+
node.get_attr('n_partitions'),
69+
node.get_input_variable().shape[0],
70+
node.get_input_variable().shape[1],
71+
kernel=node.get_attr('filt_width'),
72+
stride=node.get_attr('stride_width'),
73+
pad=(node.get_attr('pad_left'), node.get_attr('pad_right')),
74+
)
75+
76+
node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
77+
78+
pw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
79+
str(node.get_attr('index')) + '_pw',
80+
node.get_attr('n_partitions'),
81+
node.get_output_variable().shape[0],
82+
node.get_input_variable().shape[1],
83+
kernel=1,
84+
)
85+
86+
node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))
87+
88+
def _generate_separable_im2col_2d(self, node):
89+
dw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
90+
str(node.get_attr('index')) + '_dw',
91+
node.get_attr('n_partitions'),
92+
node.get_input_variable().shape[0],
93+
node.get_input_variable().shape[1],
94+
node.get_input_variable().shape[2],
95+
kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
96+
stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
97+
pad=(
98+
node.get_attr('pad_top'),
99+
node.get_attr('pad_bottom'),
100+
node.get_attr('pad_left'),
101+
node.get_attr('pad_right'),
102+
),
103+
)
104+
105+
node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
106+
107+
pw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
108+
str(node.get_attr('index')) + '_pw',
109+
node.get_attr('n_partitions'),
110+
node.get_output_variable().shape[0],
111+
node.get_output_variable().shape[1],
112+
node.get_input_variable().shape[2],
113+
kernel=(1, 1),
114+
)
115+
116+
node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))

hls4ml/model/optimizer/passes/bit_exact.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def to_hls4ml_fixed(k, i, f, name, *args):
5151
if B >= 1:
5252
ptype = FixedPrecisionType(B, I, signed, *args)
5353
else:
54-
ptype = FixedPrecisionType(1, 32, False, 'TRN', 'WRAP')
54+
ptype = FixedPrecisionType(2, 32, False, 'TRN', 'WRAP')
5555
return NamedType(name, ptype)
5656

5757

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#ifndef NNET_INSTR_GEN_H_
2+
#define NNET_INSTR_GEN_H_
3+
4+
namespace nnet {
5+
6+
// hls4ml insert code
7+
8+
} // namespace nnet
9+
10+
#endif

hls4ml/templates/oneapi/firmware/parameters.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44
#include "defines.h"
55

6+
#include "nnet_utils/nnet_code_gen.h"
67
#include "nnet_utils/nnet_helpers.h"
8+
79
// hls-fpga-machine-learning insert includes
810

911
// hls-fpga-machine-learning insert layer-config

hls4ml/writer/oneapi_writer.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -927,6 +927,33 @@ def write_activation_tables(self, model):
927927
self.__write_exp_table_legacy(model, dstpath)
928928
self.__write_invert_table_legacy(model, dstpath)
929929

930+
def write_generated_code(self, model):
931+
"""Write the generated code (nnet_code_gen.h)
932+
933+
Args:
934+
model (ModelGraph): the hls4ml model.
935+
"""
936+
path = f'{model.config.get_output_dir()}/src/firmware/nnet_utils/nnet_code_gen.h'
937+
f = open(path)
938+
contents = f.readlines()
939+
f.close()
940+
f = open(path, 'w')
941+
namespace = model.config.get_writer_config().get('Namespace', None)
942+
943+
for line in contents:
944+
if '// hls4ml insert code' in line:
945+
newline = line
946+
for layer in model.get_layers():
947+
for generated_code in layer.code.values():
948+
newline += str(generated_code)
949+
else:
950+
newline = line
951+
if namespace is not None:
952+
if 'namespace nnet' in newline:
953+
newline = newline.replace('namespace nnet', f'namespace {namespace}')
954+
f.write(newline)
955+
f.close()
956+
930957
def write_yml(self, model):
931958
"""Write the config to the YAML file
932959
@@ -975,5 +1002,6 @@ def write_hls(self, model):
9751002
self.write_build_script(model)
9761003
self.write_nnet_utils(model)
9771004
self.write_activation_tables(model)
1005+
self.write_generated_code(model)
9781006
self.write_yml(model)
9791007
self.write_tar(model)

0 commit comments

Comments
 (0)