Merge pull request #1334 from laurilaatu/oneapihgq

laurilaatu · web-flow · commit b1d65504ede6 · 2025-07-23T08:53:46.000Z
Initial HGQ support for oneAPI
diff --git a/hls4ml/backends/catapult/passes/im2col_codegen.py b/hls4ml/backends/catapult/passes/im2col_codegen.py
diff --git a/hls4ml/backends/fpga/fpga_types.py b/hls4ml/backends/fpga/fpga_types.py
@@ -75,6 +75,10 @@ def definition_cpp(self):
             self._saturation_mode_cpp(self.saturation_mode),
             self.saturation_bits,
         ]
+        if args[0] == 1:
+            # Currently oneAPI ac_fixed requires at least two bits for both signed and unsigned cases
+            # Should be fixed in the future once oneAPI supports 1-bit unsigned ac_fixed
+            args[0] = 2
         if args[3] == 'AC_TRN' and args[4] == 'AC_WRAP':
             # This is the default, so we won't write the full definition for brevity
             args[3] = args[4] = None
diff --git a/hls4ml/backends/fpga/passes/hgq_proxy_model.py b/hls4ml/backends/fpga/passes/hgq_proxy_model.py
@@ -15,6 +15,10 @@ def to_apfixed(k, b, i, RND, SAT):
 
 def to_acfixed(k, b, i, RND, SAT):
     k = 'false' if k == 0 else 'true'
+    if b == 1:
+        # Currently oneAPI ac_fixed requires at least two bits for both signed and unsigned cases
+        # Should be fixed in the future once oneAPI supports 1-bit unsigned ac_fixed
+        b = 2
     return f'ac_fixed<{b},{i},{k},AC_{RND},AC_{SAT}>'
 
 
@@ -23,23 +27,26 @@ def generate_mask_fn(
 ) -> str:
     """Generate heterogenous quantization mask function, ONLY works for IOType=io_parallel"""
     assert k.shape[0] == b.shape[0] == i.shape[0] == 1
-    assert backend.lower() in ('quartus', 'vivado', 'vitis'), f'Backend {backend} not tested'
+    assert backend.lower() in ('oneapi', 'quartus', 'vivado', 'vitis'), f'Backend {backend} not tested'
     Ks, Bs, Is = k[0], b[0], i[0]
     Ks, Bs, Is = np.broadcast_to(Ks, shape), np.broadcast_to(Bs, shape), np.broadcast_to(Is, shape)
     Ks, Bs, Is = Ks.ravel(), Bs.ravel(), Is.ravel()
     masks = []
-    to_fixed = to_acfixed if backend.lower() == 'quartus' else to_apfixed
+    to_fixed = to_acfixed if backend.lower() in ['oneapi', 'quartus'] else to_apfixed
     for idx, (k, b, i) in enumerate(zip(Ks, Bs, Is)):
         if b == 0:
             fn = f'out[{idx}] = 0;'
         else:
             fn = f'out[{idx}] = {to_fixed(k, b, i, RND, SAT)}(inp[{idx}]);'
         masks.append(f'    {fn}')
     body = "\n".join(masks)
+    arguments = (
+        'input_t *inp, output_t *out' if backend.lower() not in ['oneapi', 'quartus'] else 'input_t &inp, output_t &out'
+    )
     mask_fn = f'''
 template<typename input_t, typename output_t>
-void {name}(input_t *inp, output_t *out) {{
-    #pragma HLS INLINE
+void {name}({arguments}) {{
+    {'#pragma HLS INLINE' if backend.lower() not in ['oneapi', 'quartus'] else ''}
 
 {body}
 }}
diff --git a/hls4ml/backends/oneapi/oneapi_backend.py b/hls4ml/backends/oneapi/oneapi_backend.py
@@ -68,7 +68,6 @@ def _register_flows(self):
             'oneapi:quantize_dense_output',
             'fuse_consecutive_batch_normalization',
             'oneapi:xnor_pooling',
-            'oneapi:generate_conv_im2col',
         ]
         quantization_flow = register_flow('quantization', quantization_passes, requires=[init_flow], backend=self.name)
 
@@ -79,6 +78,7 @@ def _register_flows(self):
             'oneapi:skip_softmax',
             'oneapi:fix_softmax_table_size',
             'infer_precision_types',
+            'oneapi:process_fixed_point_quantizer_layer',
         ]
         optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
 
@@ -104,7 +104,6 @@ def _register_flows(self):
             + optimization_passes
             + writer_passes
             + ['oneapi:inplace_stream_flatten', 'oneapi:reshape_stream']  # not needed
-            + ['oneapi:process_fixed_point_quantizer_layer']  # not yet supported
         ]
 
         if len(extras) > 0:
diff --git a/hls4ml/backends/quartus/quartus_backend.py b/hls4ml/backends/quartus/quartus_backend.py
@@ -61,7 +61,6 @@ def _register_flows(self):
             'quartus:transform_types',
             'quartus:register_bram_weights',
             'quartus:apply_resource_strategy',
-            'quartus:generate_conv_im2col',
             'quartus:apply_winograd_kernel_transformation',
         ]
         quartus_types_flow = register_flow('specific_types', quartus_types, requires=[init_flow], backend=self.name)
diff --git a/hls4ml/backends/vivado/passes/im2col_codegen.py b/hls4ml/backends/vivado/passes/im2col_codegen.py
@@ -0,0 +1,116 @@
+from hls4ml.model.layers import Conv1D, Conv2D, SeparableConv1D, SeparableConv2D
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import Source
+
+
+class GenerateConvIm2col(OptimizerPass):
+    '''Generates tcode for im2col step of 1D/2d convolution'''
+
+    # Note, DepthwizeConv1D/2D also matches because it inherits from Conv1D/2D
+    def match(self, node):
+        return (
+            isinstance(node, (Conv1D, Conv2D, SeparableConv1D, SeparableConv2D))
+            and node.model.config.get_config_value('IOType') == 'io_parallel'
+        )
+
+    def transform(self, model, node):
+        node_class = node.class_name
+        if 'Separable' in node_class:
+            if '1D' in node_class:
+                self._generate_separable_im2col_1d(node)
+            elif '2D' in node_class:
+                self._generate_separable_im2col_2d(node)
+            else:
+                raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
+        else:
+            if '1D' in node_class:
+                self._generate_im2col_1d(node)
+            elif '2D' in node_class:
+                self._generate_im2col_2d(node)
+            else:
+                raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
+
+    def _generate_im2col_1d(self, node):
+        code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
+            node.get_attr('index'),
+            node.get_attr('n_partitions'),
+            node.get_input_variable().shape[0],
+            node.get_input_variable().shape[1],
+            kernel=node.get_attr('filt_width'),
+            stride=node.get_attr('stride_width'),
+            pad=(node.get_attr('pad_left'), node.get_attr('pad_right')),
+        )
+
+        node.set_attr('line_buffer_codegen', Source(code_str))
+
+    def _generate_im2col_2d(self, node):
+        code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
+            node.get_attr('index'),
+            node.get_attr('n_partitions'),
+            node.get_input_variable().shape[0],
+            node.get_input_variable().shape[1],
+            node.get_input_variable().shape[2],
+            kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
+            stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
+            pad=(
+                node.get_attr('pad_top'),
+                node.get_attr('pad_bottom'),
+                node.get_attr('pad_left'),
+                node.get_attr('pad_right'),
+            ),
+        )
+
+        node.set_attr('line_buffer_codegen', Source(code_str))
+
+    def _generate_separable_im2col_1d(self, node):
+        dw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
+            str(node.get_attr('index')) + '_dw',
+            node.get_attr('n_partitions'),
+            node.get_input_variable().shape[0],
+            node.get_input_variable().shape[1],
+            kernel=node.get_attr('filt_width'),
+            stride=node.get_attr('stride_width'),
+            pad=(node.get_attr('pad_left'), node.get_attr('pad_right')),
+        )
+
+        node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
+
+        pw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
+            str(node.get_attr('index')) + '_pw',
+            node.get_attr('n_partitions'),
+            node.get_output_variable().shape[0],
+            node.get_input_variable().shape[1],
+            kernel=1,
+        )
+
+        node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))
+
+    def _generate_separable_im2col_2d(self, node):
+        dw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
+            str(node.get_attr('index')) + '_dw',
+            node.get_attr('n_partitions'),
+            node.get_input_variable().shape[0],
+            node.get_input_variable().shape[1],
+            node.get_input_variable().shape[2],
+            kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
+            stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
+            pad=(
+                node.get_attr('pad_top'),
+                node.get_attr('pad_bottom'),
+                node.get_attr('pad_left'),
+                node.get_attr('pad_right'),
+            ),
+        )
+
+        node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
+
+        pw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
+            str(node.get_attr('index')) + '_pw',
+            node.get_attr('n_partitions'),
+            node.get_output_variable().shape[0],
+            node.get_output_variable().shape[1],
+            node.get_input_variable().shape[2],
+            kernel=(1, 1),
+        )
+
+        node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))
diff --git a/hls4ml/model/optimizer/passes/bit_exact.py b/hls4ml/model/optimizer/passes/bit_exact.py
@@ -51,7 +51,7 @@ def to_hls4ml_fixed(k, i, f, name, *args):
     if B >= 1:
         ptype = FixedPrecisionType(B, I, signed, *args)
     else:
-        ptype = FixedPrecisionType(1, 32, False, 'TRN', 'WRAP')
+        ptype = FixedPrecisionType(2, 32, False, 'TRN', 'WRAP')
     return NamedType(name, ptype)
 
 
diff --git a/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_code_gen.h b/hls4ml/templates/oneapi/firmware/nnet_utils/nnet_code_gen.h
@@ -0,0 +1,10 @@
+#ifndef NNET_INSTR_GEN_H_
+#define NNET_INSTR_GEN_H_
+
+namespace nnet {
+
+// hls4ml insert code
+
+} // namespace nnet
+
+#endif
diff --git a/hls4ml/templates/oneapi/firmware/parameters.h b/hls4ml/templates/oneapi/firmware/parameters.h
@@ -3,7 +3,9 @@
 
 #include "defines.h"
 
+#include "nnet_utils/nnet_code_gen.h"
 #include "nnet_utils/nnet_helpers.h"
+
 // hls-fpga-machine-learning insert includes
 
 // hls-fpga-machine-learning insert layer-config
diff --git a/hls4ml/writer/oneapi_writer.py b/hls4ml/writer/oneapi_writer.py
@@ -927,6 +927,33 @@ def write_activation_tables(self, model):
         self.__write_exp_table_legacy(model, dstpath)
         self.__write_invert_table_legacy(model, dstpath)
 
+    def write_generated_code(self, model):
+        """Write the generated code (nnet_code_gen.h)
+
+        Args:
+            model (ModelGraph): the hls4ml model.
+        """
+        path = f'{model.config.get_output_dir()}/src/firmware/nnet_utils/nnet_code_gen.h'
+        f = open(path)
+        contents = f.readlines()
+        f.close()
+        f = open(path, 'w')
+        namespace = model.config.get_writer_config().get('Namespace', None)
+
+        for line in contents:
+            if '// hls4ml insert code' in line:
+                newline = line
+                for layer in model.get_layers():
+                    for generated_code in layer.code.values():
+                        newline += str(generated_code)
+            else:
+                newline = line
+            if namespace is not None:
+                if 'namespace nnet' in newline:
+                    newline = newline.replace('namespace nnet', f'namespace {namespace}')
+            f.write(newline)
+        f.close()
+
     def write_yml(self, model):
         """Write the config to the YAML file
 
@@ -975,5 +1002,6 @@ def write_hls(self, model):
         self.write_build_script(model)
         self.write_nnet_utils(model)
         self.write_activation_tables(model)
+        self.write_generated_code(model)
         self.write_yml(model)
         self.write_tar(model)

Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,6 @@ def _register_flows(self):`
`68`	`68`	`'oneapi:quantize_dense_output',`
`69`	`69`	`'fuse_consecutive_batch_normalization',`
`70`	`70`	`'oneapi:xnor_pooling',`
`71`		`- 'oneapi:generate_conv_im2col',`
`72`	`71`	`]`
`73`	`72`	`quantization_flow = register_flow('quantization', quantization_passes, requires=[init_flow], backend=self.name)`
`74`	`73`
`@@ -79,6 +78,7 @@ def _register_flows(self):`
`79`	`78`	`'oneapi:skip_softmax',`
`80`	`79`	`'oneapi:fix_softmax_table_size',`
`81`	`80`	`'infer_precision_types',`
	`81`	`+ 'oneapi:process_fixed_point_quantizer_layer',`
`82`	`82`	`]`
`83`	`83`	`optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)`
`84`	`84`
`@@ -104,7 +104,6 @@ def _register_flows(self):`
`104`	`104`	`+ optimization_passes`
`105`	`105`	`+ writer_passes`
`106`	`106`	`+ ['oneapi:inplace_stream_flatten', 'oneapi:reshape_stream'] # not needed`
`107`		`- + ['oneapi:process_fixed_point_quantizer_layer'] # not yet supported`
`108`	`107`	`]`
`109`	`108`
`110`	`109`	`if len(extras) > 0:`
Original file line number	Diff line number	Diff line change
`@@ -61,7 +61,6 @@ def _register_flows(self):`
`61`	`61`	`'quartus:transform_types',`
`62`	`62`	`'quartus:register_bram_weights',`
`63`	`63`	`'quartus:apply_resource_strategy',`
`64`		`- 'quartus:generate_conv_im2col',`
`65`	`64`	`'quartus:apply_winograd_kernel_transformation',`
`66`	`65`	`]`
`67`	`66`	`quartus_types_flow = register_flow('specific_types', quartus_types, requires=[init_flow], backend=self.name)`