From 0765ec44135debde756548b0932f5ccce12da8b5 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 11 Jul 2023 11:00:56 -0500
Subject: [PATCH 01/59] Add needed layer types for QONNX

---
 hls4ml/model/layers.py                  | 136 +++++++++++++++++++++++-
 hls4ml/model/optimizer/passes/qkeras.py |  30 +-----
 2 files changed, 132 insertions(+), 34 deletions(-)

diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index d9da2cc741..6a23a9b934 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -333,7 +333,7 @@ class Input(Layer):
     def initialize(self):
         shape = self.attributes['input_shape']
         if shape[0] is None:
-            shape = shape[1:]
+            raise RuntimeError(f"Unexpectedly have a None in {shape=} of Input layer")
         dims = [f'N_INPUT_{i}_{self.index}' for i in range(1, len(shape) + 1)]
         if self.index == 1:
             default_type_name = 'input_t'
@@ -344,6 +344,41 @@ def initialize(self):
         self.add_output_variable(shape, dims, var_name=self.name, type_name=type_name, precision=precision)
 
 
+class Constant(Layer):
+    _expected_attributes = [
+        Attribute('value', value_type=np.ndarray),
+    ]
+
+    def initialize(self):
+        value = self.attributes['value']
+        self.value = value  # note, this is unquantized; Only here for easier access
+        shape = value.shape
+        if not shape:
+            shape = (1,)
+            self.value = np.array([self.value])
+        dims = [f'{self.name}_{i}' for i in range(len(shape))]
+        self.add_output_variable(shape, dims, var_name=self.name, precision=self.get_attr("precision"))
+
+
+class Quant(Layer):  # The QONNX quantization layer
+    """
+    This is a QONNX quantization layer. Optimizations should convert it
+    before HLS is produced.
+    """
+
+    _expected_attributes = [
+        Attribute('narrow', value_type=bool),
+        Attribute('rounding_mode', value_type=str),
+        Attribute('signed', value_type=bool),
+    ]
+
+    def initialize(self):
+        inp = self.get_input_variable(self.inputs[0])
+        shape = inp.shape
+        dims = inp.dim_names
+        self.add_output_variable(shape, dims)
+
+
 class Reshape(Layer):
     _expected_attributes = [
         Attribute('target_shape', value_type=typing.Sequence),
@@ -351,19 +386,20 @@ class Reshape(Layer):
 
     def initialize(self):
         input_shape = self.get_input_variable(self.inputs[0]).shape
-        target_shape = self.get_attr('target_shape')
+        target_shape = self.get_attr('target_shape')  # this should not have a batch dimension
         if target_shape is None:
             # need to get it from the input
             shape_node = self.get_input_node(self.inputs[1])
             # for QONNX, remove batch dimension
+            # (onnx cleaning should have removed reshape dimension)
             if shape_node:
                 target_shape = shape_node.value[1:]
             else:
                 raise RuntimeError("Reshape for ONNX requires the target shape to be a second input.")
 
-        # remove Nones -- is this ever triggered?
+        # nones should not exist here
         if target_shape[0] is None:
-            target_shape = target_shape[1:]
+            raise RuntimeError(f"Unexpectedly have a None in {target_shape=}")
 
         # take care of -1 shapes
         shape = self._infer_output_shape(input_shape, target_shape)
@@ -395,7 +431,7 @@ class Dense(Layer):
     ]
 
     def initialize(self):
-        shape = self.get_input_variable().shape[:]
+        shape = list(self.get_input_variable().shape)
         shape[-1] = self.attributes['n_out']
         if len(shape) > 1:
             dims = [f'N_LAYER_{i}_{self.index}' for i in range(1, len(shape) + 1)]
@@ -406,6 +442,27 @@ def initialize(self):
         self.add_bias(quantizer=self.get_attr('bias_quantizer'))
 
 
+class Conv(Layer):
+    """
+    This is for the ONNX Conv node. Currently, it is only supported as an intermediate
+    form that gets converted to an explicit ConvXD.
+
+    Note:  these are always channels-last.
+    """
+
+    def initialize(self):
+        # use negative indexing because it is not clear if batch dimension is always stripped
+        if self.attributes['n_dim'] == 1:
+            # this is 1D convolution
+            shape = [self.attributes['out_width'], self.attributes['n_filt']]
+            dims = [f'N_OUTPUTS_{self.index}', f'N_FILT_{self.index}']
+        else:
+            shape = [self.attributes['out_height'], self.attributes['out_width'], self.attributes['n_filt']]
+            dims = [f'OUT_HEIGHT_{self.index}', f'OUT_WIDTH_{self.index}', f'N_FILT_{self.index}']
+
+        self.add_output_variable(shape, dims)
+
+
 class Conv1D(Layer):
     _expected_attributes = [
         Attribute('in_width'),
@@ -811,6 +868,19 @@ def initialize(self):
         super().initialize()
 
 
+class BatchNormOnnx(Layer):
+    '''
+    A transient layer formed from ONNX BatchNormalization that gets converted to
+    BatchNormalization after the scale and bias are determined
+    '''
+
+    def initialize(self):
+        inp = self.get_input_variable()
+        shape = inp.shape
+        dims = inp.dim_names
+        self.add_output_variable(shape, dims)
+
+
 class BatchNormalization(Layer):
     _expected_attributes = [
         Attribute('n_in'),
@@ -841,6 +911,31 @@ def initialize(self):
         self.add_weights_variable(name='bias', var_name='b{index}', data=bias)
 
 
+class ApplyAlpha(BatchNormalization):
+    '''A custom layer to scale the output of a QDense layer which used 'alpha != 1'
+    Inference computation uses BatchNormalization methods'''
+
+    def initialize(self):
+        inp = self.get_input_variable()
+        shape = inp.shape
+        dims = inp.dim_names
+        self.add_output_variable(shape, dims)
+
+        scale = self.get_attr('scale_data')
+        scale_quantizer = self.get_attr('scale_quantizer')
+        bias = self.get_attr('bias_data')
+        bias_quantizer = self.get_attr('bias_quantizer')
+
+        self.add_weights(scale, quantizer=scale_quantizer)
+        self.add_bias(bias, quantizer=bias_quantizer)
+
+    def add_weights(self, scale, quantizer=None):
+        self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer)
+
+    def add_bias(self, bias, quantizer=None):
+        self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer)
+
+
 class Merge(Layer):
     def initialize(self):
         assert len(self.inputs) == 2
@@ -855,6 +950,31 @@ def initialize(self):
         self.add_output_variable(shape, dims)
 
 
+class MatMul(Layer):
+    """
+    This is a matrix multiply. Currently, it is only supported as an intermediate
+    form that gets converted to a Dense layer.
+    """
+
+    def initialize(self):
+        assert len(self.inputs) == 2
+        inp1 = self.get_input_variable(self.inputs[0])
+        inp2 = self.get_input_variable(self.inputs[1])
+        if len(inp2.shape) == 1:
+            # mat vec multiply
+            assert inp1.shape[-1] == inp2.shape[0]
+            shape = tuple(inp1.shape[:-1]) + (inp2.shape[0],)
+        else:
+            assert inp1.shape[-1] == inp2.shape[-2]
+            shape = tuple(inp1.shape[:-1]) + (inp2.shape[-1],)
+        if len(shape) > 1:
+            dims = [f'N_LAYER_{i}_{self.index}' for i in range(1, len(shape) + 1)]
+        else:
+            dims = [f'N_LAYER_{self.index}']
+
+        self.add_output_variable(shape, dims)
+
+
 class Dot(Merge):
     def initialize(self):
         assert len(self.inputs) == 2
@@ -1293,6 +1413,7 @@ def initialize(self):
 layer_map = {
     'Input': Input,
     'InputLayer': Input,
+    'Constant': Constant,
     'Activation': Activation,
     'QActivation': Activation,
     'LeakyReLU': ParametrizedActivation,
@@ -1307,6 +1428,7 @@ def initialize(self):
     'BinaryDense': Dense,
     'TernaryDense': Dense,
     'QDense': Dense,
+    'Conv': Conv,
     'Conv1D': Conv1D,
     'QConv1D': Conv1D,
     'Conv2D': Conv2D,
@@ -1329,6 +1451,7 @@ def initialize(self):
     'ZeroPadding1D': ZeroPadding1D,
     'ZeroPadding2D': ZeroPadding2D,
     'Merge': Merge,
+    'MatMul': MatMul,
     'Dot': Dot,
     'Concatenate': Concatenate,
     'Resize': Resize,
@@ -1341,6 +1464,9 @@ def initialize(self):
     'GRU': GRU,
     'GarNet': GarNet,
     'GarNetStack': GarNetStack,
+    'Quant': Quant,
+    'ApplyAlpha': ApplyAlpha,
+    'BatchNormOnnx': BatchNormOnnx,
     'LayerGroup': LayerGroup,
     # TensorFlow-specific layers:
     'BiasAdd': BiasAdd,
diff --git a/hls4ml/model/optimizer/passes/qkeras.py b/hls4ml/model/optimizer/passes/qkeras.py
index cdbb56ec46..2d2b6b0f77 100644
--- a/hls4ml/model/optimizer/passes/qkeras.py
+++ b/hls4ml/model/optimizer/passes/qkeras.py
@@ -1,7 +1,7 @@
 import numpy as np
 import tensorflow as tf
 
-from hls4ml.model.layers import BatchNormalization, register_layer
+from hls4ml.model.layers import ApplyAlpha, BatchNormalization
 from hls4ml.model.optimizer import ConfigurableOptimizerPass, OptimizerPass, register_pass
 from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, QKerasPO2Quantizer
 
@@ -76,35 +76,7 @@ def precision_string_modify(self, pstr):
         return pstr
 
 
-class ApplyAlpha(BatchNormalization):
-    '''A custom layer to scale the output of a QDense layer which used 'alpha != 1'
-    Inference computation uses BatchNormalization methods'''
-
-    def initialize(self):
-        inp = self.get_input_variable()
-        shape = inp.shape
-        dims = inp.dim_names
-        self.add_output_variable(shape, dims)
-
-        scale = self.get_attr('scale_data')
-        scale_quantizer = self.get_attr('scale_quantizer')
-        bias = self.get_attr('bias_data')
-        bias_quantizer = self.get_attr('bias_quantizer')
-
-        self.add_weights(scale, quantizer=scale_quantizer)
-        self.add_bias(bias, quantizer=bias_quantizer)
-
-    def add_weights(self, scale, quantizer=None):
-        self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer)
-
-    def add_bias(self, bias, quantizer=None):
-        self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer)
-
-
 def register_qkeras():
-    # Register the layer types to the layer map
-    register_layer('ApplyAlpha', ApplyAlpha)
-
     # Register the optimization passes
     register_pass('output_rounding_saturation_mode', OutputRoundingSaturationMode)
     register_pass('qkeras_factorize_alpha', QKerasFactorizeAlpha)

From ff788eae9a541e88c74e0876d405a487537632cc Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 11 Jul 2023 19:32:13 -0500
Subject: [PATCH 02/59] add qonnx pytest

---
 test/pytest/test_qonnx.py | 189 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 189 insertions(+)
 create mode 100755 test/pytest/test_qonnx.py

diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
new file mode 100755
index 0000000000..be567d81f9
--- /dev/null
+++ b/test/pytest/test_qonnx.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python
+import os
+import urllib
+from pathlib import Path
+
+import numpy as np
+import pytest
+import qonnx.core.onnx_exec as oxe
+import qonnx.util.cleanup
+import qonnx.util.to_channels_last
+
+# To conveniently run QONNX inference
+from qonnx.core.modelwrapper import ModelWrapper
+
+import hls4ml
+
+test_root_path = Path(__file__).parent
+
+
+def test_tfc_2w2a():
+    # download test model
+    dl_dir = test_root_path
+    dl_file = str(dl_dir / "qonnx-tfc-2w2a.onnx")
+    tfc_w2a2_qonnx_url = (
+        "https://raw.githubusercontent.com/fastmachinelearning/"
+        "QONNX_model_zoo/main/models/MNIST/Brevitas_FINN_TFC/TFC/TFC_2W2A.onnx"
+    )
+    urllib.request.urlretrieve(tfc_w2a2_qonnx_url, dl_file)
+    assert os.path.isfile(dl_file)
+    out_file = str(dl_dir / "qonnx-tfc-2w2a-clean.onnx")
+
+    # cleanup
+    qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
+    model = ModelWrapper(out_file)
+
+    # Execute QONNX model inference
+    # TODO make the test bigger
+    ishape = (1, 1, 28, 28)
+    np.random.seed(0)
+    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    idict = {model.graph.input[0].name: X}
+    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+
+    # Convert QONNX model, compile, and run inference
+    config = hls4ml.utils.config_from_onnx_model(model)
+    # Some hand-derived config
+    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
+    config['LayerName'] = {}
+    config['LayerName']['global_in'] = {'Precision': 'ap_fixed<16,2>'}
+    hls_model = hls4ml.converters.convert_from_onnx_model(
+        model, output_dir=str(test_root_path / 'hls4mlprj_qonnx_tfc-2w2a'), part='xcu250-figd2104-2L-e', hls_config=config
+    )
+    hls_model.compile()
+    y_hls4ml = hls_model.predict(X)
+
+    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
+
+
+def test_tfc_2w2a_quartus():
+    # download test model
+    dl_dir = test_root_path
+    dl_file = str(dl_dir / "qonnx-tfc-2w2a.onnx")
+    tfc_w2a2_qonnx_url = (
+        "https://raw.githubusercontent.com/fastmachinelearning/"
+        "QONNX_model_zoo/main/models/MNIST/Brevitas_FINN_TFC/TFC/TFC_2W2A.onnx"
+    )
+    urllib.request.urlretrieve(tfc_w2a2_qonnx_url, dl_file)
+    assert os.path.isfile(dl_file)
+    out_file = str(dl_dir / "qonnx-tfc-2w2a-clean.onnx")
+
+    # cleanup
+    qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
+    model = ModelWrapper(out_file)
+
+    # Execute QONNX model inference
+    # TODO make the test bigger
+    ishape = (1, 1, 28, 28)
+    np.random.seed(0)
+    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    idict = {model.graph.input[0].name: X}
+    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+
+    # Convert QONNX model, compile, and run inference
+    config = hls4ml.utils.config_from_onnx_model(model)
+    # Some hand-derived config
+    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
+    config['LayerName'] = {}
+    config['LayerName']['global_in'] = {'Precision': 'ac_fixed<16,2>'}
+    hls_model = hls4ml.converters.convert_from_onnx_model(
+        model,
+        output_dir=str(test_root_path / 'hls4mlprj_qonnx_tfc-2w2a-quartus'),
+        part='Arria10',
+        backend='Quartus',
+        hls_config=config,
+    )
+    hls_model.compile()
+    y_hls4ml = hls_model.predict(X)
+
+    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
+
+
+def test_cnv_2w2a():
+    # download test model
+    dl_dir = test_root_path
+    dl_file = str(dl_dir / "qonnx-cnv-2w2a.onnx")
+    cnv_w2a2_qonnx_url = (
+        "https://raw.githubusercontent.com/fastmachinelearning/"
+        "QONNX_model_zoo/main/models/CIFAR10/Brevitas_FINN_CNV/CNV_2W2A.onnx"
+    )
+    urllib.request.urlretrieve(cnv_w2a2_qonnx_url, dl_file)
+    assert os.path.isfile(dl_file)
+    out_clean = str(dl_dir / "qonnx-cnv-2w2a-clean.onnx")
+    out_chanlast = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last.onnx")
+    out_file = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last-clean.onnx")
+
+    # cleanup
+    qonnx.util.cleanup.cleanup(dl_file, out_file=out_clean)
+    qonnx.util.to_channels_last.to_channels_last(out_clean, make_input_channels_last=True, out_file=out_chanlast)
+    qonnx.util.cleanup.cleanup(out_chanlast, out_file=out_file)
+    model = ModelWrapper(out_file)
+
+    # Execute QONNX model inference
+    # TODO make the test bigger
+    ishape = (1, 32, 32, 3)
+    np.random.seed(1)
+    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    idict = {model.graph.input[0].name: X}
+    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+
+    # Convert QONNX model, compile, and run inference
+    config = hls4ml.utils.config_from_onnx_model(model)
+    config['Model']['Precision'] = 'ap_fixed<32,16>'
+    # Some hand-derived config
+    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
+
+    hls_model = hls4ml.converters.convert_from_onnx_model(
+        model,
+        output_dir=str(test_root_path / 'hls4mlprj_qonnx_cnv-2w2a'),
+        part='xcu250-figd2104-2L-e',
+        io_type='io_stream',
+        hls_config=config,
+    )
+    hls_model.compile()
+    y_hls4ml = hls_model.predict(X)
+
+    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
+
+
+@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
+def test_jet_tagging(backend):
+    # download test model
+    dl_dir = test_root_path
+    dl_file = dl_dir / "qkeras_jettagging.onnx"
+    jet_tagging_qonnx_url = (
+        "https://raw.githubusercontent.com/fastmachinelearning/"
+        "QONNX_model_zoo/main/models/JetTagging/QKeras_hls4ml_3layer/qkeras_jettagging.onnx"
+    )
+    urllib.request.urlretrieve(jet_tagging_qonnx_url, dl_file)
+    assert os.path.isfile(dl_file)
+    out_file = dl_dir / "qkeras_jettagging-clean.onnx"
+
+    # cleanup
+    qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
+    model = ModelWrapper(out_file)
+
+    # Execute QONNX model inference
+    # TODO make the test bigger
+    ishape = (1, 16)
+    np.random.seed(0)
+    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    idict = {model.graph.input[0].name: X}
+    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+
+    # Convert QONNX model, compile, and run inference
+    config = hls4ml.utils.config_from_onnx_model(model)
+    # Some hand-derived config
+    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
+
+    hls_model = hls4ml.converters.convert_from_onnx_model(
+        model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_jettag_{backend}'), backend=backend, hls_config=config
+    )
+    hls_model.compile()
+    y_hls4ml = hls_model.predict(X)
+
+    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
+
+
+if __name__ == '__main__':
+    test_tfc_2w2a()

From cda7208675c85ffadbcde4ce873521bf9187d7c1 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 11 Jul 2023 19:41:59 -0500
Subject: [PATCH 03/59] first migration of onnx parsing

---
 hls4ml/converters/onnx/convolution.py | 127 +++++++-------
 hls4ml/converters/onnx/core.py        | 103 ++++++------
 hls4ml/converters/onnx/merge.py       |  24 +--
 hls4ml/converters/onnx/pooling.py     |  84 ++++------
 hls4ml/converters/onnx/reshape.py     |  37 ++---
 hls4ml/converters/onnx_to_hls.py      | 227 ++++++++++----------------
 6 files changed, 258 insertions(+), 344 deletions(-)

diff --git a/hls4ml/converters/onnx/convolution.py b/hls4ml/converters/onnx/convolution.py
index 39b2232169..85dc0ca804 100644
--- a/hls4ml/converters/onnx/convolution.py
+++ b/hls4ml/converters/onnx/convolution.py
@@ -1,85 +1,72 @@
-from hls4ml.converters.onnx_to_hls import (
-    compute_pads_1d,
-    compute_pads_2d,
-    get_onnx_attribute,
-    get_onnx_input_name,
-    onnx_handler,
-)
-from hls4ml.converters.utils import compute_padding_1d, compute_padding_2d
+import numpy as np
+
+from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler
 
 
 @onnx_handler('Conv')
-def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config):
+def parse_conv_layer(node, input_names, input_shapes, graph):
     layer = {}
     layer['name'] = node.name
-    layer['data_format'] = 'channels_first'  # ONNX's default is channel first
-    layer['inputs'] = get_onnx_input_name(node, graph)
-    reader.add_input(layer['name'], node.input)
+    if node.domain != 'qonnx.custom_op.channels_last':
+        raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last")
+    layer['data_format'] = 'channels_last'  # QONNX needs to be channels-last.
+    layer['inputs'] = input_names
+    layer['outputs'] = node.output
 
     strides = get_onnx_attribute(node, 'strides')
     kernel_shape = get_onnx_attribute(node, 'kernel_shape')
-
-    if len(input_shapes[0]) == 3:  # Conv1D
-        layer['class_name'] = 'Conv1D'
-
-        layer['in_width'] = input_shapes[0][2]
-        layer['n_chan'] = input_shapes[0][1]
-        layer['filt_width'] = kernel_shape[0]
-        layer['n_filt'] = reader.get_weights_data(layer['name'], 'kernel').shape[2]
-        layer['stride_width'] = strides[0]
-        pads = compute_pads_1d(node, layer)
-
+    # Note:  currently don't have support for auto_pad.
+    pads = get_onnx_attribute(node, 'pads')
+    dilations = get_onnx_attribute(node, 'dilations')
+    if dilations is None:
+        dilations = [1] * len(layer['kernel_shape'])
+
+    if get_onnx_attribute(node, 'group') != 1:
+        raise ValueError("Only 1 group supported corrently")
+
+    layer['in_width'] = input_shapes[0][-2]
+    layer['n_chan'] = input_shapes[0][-1]
+    layer['n_filt'] = input_shapes[1][0]
+
+    layer['n_dim'] = len(input_shapes[0]) - 2  # 2 comes from channels and batch dimentions
+    if layer['n_dim'] not in (1, 2):
+        raise ValueError("Only 1D and 2D convolutions are supported")
+    layer['class_name'] = 'Conv'
+
+    # set some values needed later
+    if layer['n_dim'] == 1:
+        # this is 1D convolution
+        full_width = layer['in_width'] + pads[0] + pads[1]
+        eff_kernel_width = kernel_shape[0] * dilations[0]
+        layer['out_width'] = int(np.ceil((full_width - eff_kernel_width + 1) / strides[0]))
+        # for compatibility interpret some variables
         layer['pad_left'] = pads[0]
         layer['pad_right'] = pads[1]
-
-        if all(x == 0 for x in pads):  # No padding, i.e., 'VALID' padding
-            layer['padding'] = 'valid'
-        else:
-            layer['padding'] = 'same'
-
-        (layer['out_width'], _, _) = compute_padding_1d(
-            layer['padding'], layer['in_width'], layer['stride_width'], layer['filt_width']
-        )
-
-        output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_width']]
-
-    elif len(input_shapes[0]) == 4:  # Conv2D
-        layer['class_name'] = 'Conv2D'
-
-        layer['in_height'] = input_shapes[0][2]
-        layer['in_width'] = input_shapes[0][3]
-        layer['n_chan'] = input_shapes[0][1]
-
+        layer['filt_width'] = kernel_shape[0]
+        layer['stride_width'] = strides[0]
+        layer['dilation_width'] = dilations[0]
+    else:
+        # 2d
+        layer['in_height'] = input_shapes[0][-3]
+        full_height = layer['in_height'] + pads[0] + pads[2]
+        eff_kernel_height = kernel_shape[0] * dilations[0]
+        out_height = int(np.ceil((full_height - eff_kernel_height + 1) / strides[0]))
+        layer['out_height'] = out_height
+
+        full_width = input_shapes[0][-2] + pads[1] + pads[3]
+        eff_kernel_width = kernel_shape[1] * dilations[1]
+        out_width = int(np.ceil((full_width - eff_kernel_width + 1) / strides[1]))
+        layer['out_width'] = out_width
+        # for compatibility interpret some variables
+        layer['pad_top'] = pads[0]
+        layer['pad_left'] = pads[1]
+        layer['pad_bottom'] = pads[2]
+        layer['pad_right'] = pads[3]
         layer['filt_height'] = kernel_shape[0]
         layer['filt_width'] = kernel_shape[1]
-
-        layer['n_filt'] = next(
-            (x.type.tensor_type.shape.dim[1].dim_value for x in graph.value_info if x.name == node.output[0]), None
-        )
         layer['stride_height'] = strides[0]
         layer['stride_width'] = strides[1]
-        pads = compute_pads_2d(node, layer)
-
-        layer['pad_top'] = pads[0]
-        layer['pad_bottom'] = pads[2]
-        layer['pad_left'] = pads[1]
-        layer['pad_right'] = pads[3]
-
-        if all(x == 0 for x in pads):  # No padding, i.e., 'VALID' padding in Keras/Tensorflow
-            layer['padding'] = 'valid'
-        else:  # Only 'valid' and 'same' padding are available in Keras
-            layer['padding'] = 'same'
-
-        (layer['out_height'], layer['out_width'], _, _, _, _) = compute_padding_2d(
-            layer['padding'],
-            layer['in_height'],
-            layer['in_width'],
-            layer['stride_height'],
-            layer['stride_width'],
-            layer['filt_height'],
-            layer['filt_width'],
-        )
-
-        output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_height'], layer['out_width']]
+        layer['dilation_height'] = dilations[0]
+        layer['dilation_width'] = dilations[1]
 
-    return layer, output_shape
+    return layer
diff --git a/hls4ml/converters/onnx/core.py b/hls4ml/converters/onnx/core.py
index 940b860870..c6aaa6009c 100644
--- a/hls4ml/converters/onnx/core.py
+++ b/hls4ml/converters/onnx/core.py
@@ -1,28 +1,20 @@
-from hls4ml.converters.onnx_to_hls import get_onnx_attribute, get_onnx_input_name, onnx_handler
+import numpy as np
 
+from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler
 
-@onnx_handler(*['Gemm', 'MatMul'])
-def parse_gemm_layer(reader, node, inputs_map, input_shapes, graph, config):
+
+@onnx_handler('MatMul')
+def parse_matmul_layer(node, input_names, input_shapes, graph):
     layer = {}
 
-    layer['class_name'] = 'Dense'
+    layer['class_name'] = 'MatMul'
     layer['name'] = node.name
-    layer['inputs'] = get_onnx_input_name(node, graph)
-
-    tran_weight = get_onnx_attribute(node, 'transB', 0)
-    reader.add_input(layer['name'], node.input, tran_weight)
-
-    weights_shape = reader.get_weights_data(layer['name'], 'kernel').shape
-    layer['n_in'] = weights_shape[0]
-    layer['n_out'] = weights_shape[1]
-
-    output_shape = input_shapes[0][:]
-    output_shape[-1] = layer['n_out']
+    layer['inputs'] = input_names
+    layer['outputs'] = list(node.output)
 
-    return layer, output_shape
+    return layer
 
 
-# ------------------Global paras for activations
 # TODO: repair HardSigmoid support
 # https://github.com/fastmachinelearning/hls4ml/issues/409
 activation_layers = [
@@ -37,7 +29,7 @@ def parse_gemm_layer(reader, node, inputs_map, input_shapes, graph, config):
     'Softmax',
     'Softsign',
     'Softplus',
-    'Clip',
+    # 'Clip',
 ]
 
 activation_map = {
@@ -53,70 +45,89 @@ def parse_gemm_layer(reader, node, inputs_map, input_shapes, graph, config):
     'Softmax': 'Softmax',
     'Softsign': 'Activation',
     'Softplus': 'Activation',
-    'Clip': 'Clip',
+    # 'Clip': 'Clip',
 }
 # ---------
 
 
 @onnx_handler(*activation_layers)
-def parse_activation_layer(reader, node, inputs_map, input_shapes, graph, config):
+def parse_activation_layer(node, input_names, input_shapes, graph):
     layer = {}
 
     layer['name'] = node.name
     layer['class_name'] = activation_map[node.op_type]
     layer['activation'] = node.op_type.lower()
-    layer['inputs'] = get_onnx_input_name(node, graph)
+    layer['inputs'] = input_names
+    layer['outputs'] = list(node.output)
 
     if layer['class_name'] != 'Activation':
         if layer['class_name'] == 'Softmax':
             layer['activation'] = 'softmax'
+            layer['axis'] = get_onnx_attribute(node, 'axis', -1)
 
         elif layer['class_name'] in ['ELU', 'LeakyReLU', 'ThresholdedReLU']:
             layer['activation'] = layer['class_name']
             layer['activ_param'] = get_onnx_attribute(node, 'alpha', 0.01)
 
-        elif layer['class_name'] == 'Clip':
-            clip_min_node = [x for x in graph.initializer if x.name in node.input]
-            clip_min = clip_min_node[0].float_data[0]
+        # # Don't yet support Clip
+        # elif layer['class_name'] == 'Clip':
+        #     clip_min_node = [x for x in graph.initializer if x.name in input_names]
+        #     clip_min = clip_min_node[0].float_data[0]
 
-            # Check if it's relu or not
-            if clip_min == 0.0:
-                layer['class_name'] = 'Activation'
-                layer['activation'] = 'ReLU'
-            else:
-                raise Exception('Clip with min != 0 is not supported yet!')
+        #     # Check if it's relu or not
+        #     if clip_min == 0.0:
+        #         layer['class_name'] = 'Activation'
+        #         layer['activation'] = 'ReLU'
+        #     else:
+        #         raise Exception('Clip with min != 0 is not supported yet!')
 
         else:
             layer['activation'] = layer['class_name']
             layer['class_name'] = 'Activation'
 
-    return layer, [shape for shape in input_shapes[0]]
+    return layer
 
 
 @onnx_handler('BatchNormalization')
-def parse_batchnorm_layer(reader, node, inputs_map, input_shapes, graph, config):
+def parse_batchnorm_layer(node, input_names, input_shapes, graph):
     layer = {}
 
-    layer['class_name'] = 'BatchNormalization'
-    layer['data_format'] = 'channels_first'
+    layer['class_name'] = 'BatchNormOnnx'
     layer['name'] = node.name
-    layer['inputs'] = get_onnx_input_name(node, graph)
+    layer['inputs'] = input_names
+    layer['outputs'] = list(node.output)
 
     # Other attributes
-    layer['epsilon'] = get_onnx_attribute(node, 'epsilon')
-    layer['momentum'] = get_onnx_attribute(node, 'momentum')
+    layer['epsilon'] = get_onnx_attribute(node, 'epsilon', 1e-05)
+    # layer['momentum'] = get_onnx_attribute(node, 'momentum', 0.9)  # not used
 
-    reader.add_input(layer['name'], node.input)
-
-    in_size = 1
-    for dim in input_shapes[0][1:]:
-        in_size *= dim
-
-    layer['n_in'] = layer['n_out'] = in_size
+    layer['n_in'] = layer['n_out'] = np.prod(input_shapes[0][1:])
 
     if len(input_shapes[0]) == 2:
         layer['n_filt'] = -1
     elif len(input_shapes[0]) > 2:
-        layer['n_filt'] = input_shapes[0][1]  # Always channel first for onnx
+        if node.domain != 'qonnx.custom_op.channels_last':
+            raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last")
+        layer['data_format'] = 'channels_last'  # QONNX needs to be channels-last.
+        layer['n_filt'] = input_shapes[0][-1]
+    else:
+        raise RuntimeError(f"Unexpected input shape: {input_shapes[0]}")
+
+    return layer
+
+
+@onnx_handler('Quant')
+def parse_quant_layer(node, input_names, input_shapes, graph):
+    layer = {}
+
+    layer['class_name'] = 'Quant'
+    layer['name'] = node.name
+    layer['inputs'] = input_names
+    layer['outputs'] = list(node.output)
+
+    # Other attributes
+    layer['narrow'] = bool(get_onnx_attribute(node, 'narrow'))
+    layer['rounding_mode'] = get_onnx_attribute(node, 'rounding_mode')
+    layer['signed'] = bool(get_onnx_attribute(node, 'signed'))
 
-    return layer, [shape for shape in input_shapes[0]]
+    return layer
diff --git a/hls4ml/converters/onnx/merge.py b/hls4ml/converters/onnx/merge.py
index 9ccd432d18..2309cc213f 100644
--- a/hls4ml/converters/onnx/merge.py
+++ b/hls4ml/converters/onnx/merge.py
@@ -1,16 +1,16 @@
-from hls4ml.converters.onnx_to_hls import get_onnx_attribute, get_onnx_input_name, onnx_handler
+from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler
 
-merge_layers = ['Add', 'Sub', 'Mul', 'Average', 'Max', 'Min', 'Concat', 'Sum']
+merge_layers = ['Add', 'Sub', 'Mul', 'Div', 'Average', 'Max', 'Min', 'Concat', 'Sum']
 
 
 @onnx_handler(*merge_layers)
-def parse_merge_layer(reader, node, inputs_map, input_shapes, graph, config):
+def parse_merge_layer(node, input_names, input_shapes, graph):
     layer = {}
     layer['class_name'] = node.op_type
     layer['name'] = node.name
     layer['op'] = layer['class_name'].lower()
-    layer['inputs'] = get_onnx_input_name(node, graph)
-    output_shape = input_shapes[0]
+    layer['inputs'] = input_names
+    layer['outputs'] = list(node.output)
 
     if layer['class_name'] == 'Concat':
         rank = len(input_shapes[0][1:])
@@ -21,22 +21,10 @@ def parse_merge_layer(reader, node, inputs_map, input_shapes, graph, config):
         layer['op'] = layer['class_name'].lower() + f'{rank}d'
         layer['axis'] = get_onnx_attribute(node, 'axis')
 
-        # Calculate output shape
-        new_dim = sum(
-            [x.type.tensor_type.shape.dim[layer['axis']].dim_value for x in graph.value_info if x.name in node.input]
-        )
-        output_shape[layer['axis']] = new_dim
-
-    elif layer['class_name'] == 'Add':
-        # Check if the layer is an AddBias
-        for input in node.input:
-            if "bias" in input:
-                layer['class_name'] = 'BiasAdd'
-                reader.add_input(layer['name'], node.input)
     else:
         layer['class_name'] = 'Merge'
 
     if len(layer['inputs']) > 2:
         raise Exception('ERROR: Merging more than two tensors is not yet supported.')
 
-    return layer, output_shape
+    return layer
diff --git a/hls4ml/converters/onnx/pooling.py b/hls4ml/converters/onnx/pooling.py
index 67fa76c7c7..1f5c431004 100644
--- a/hls4ml/converters/onnx/pooling.py
+++ b/hls4ml/converters/onnx/pooling.py
@@ -1,26 +1,30 @@
-from hls4ml.converters.onnx_to_hls import (
-    compute_pads_1d,
-    compute_pads_2d,
-    get_onnx_attribute,
-    get_onnx_input_name,
-    onnx_handler,
-)
-from hls4ml.converters.utils import compute_padding_1d, compute_padding_2d
+import numpy as np
+
+from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler
 
 pool_operations = ['AveragePool', 'MaxPool']
 
 
 @onnx_handler(*pool_operations)
-def parse_pool_layer(reader, node, inputs_map, input_shapes, graph, config):
+def parse_pool_layer(node, input_names, input_shapes, graph):
     layer = {}
     layer['name'] = node.name
-    layer['inputs'] = get_onnx_input_name(node, graph)
+    layer['inputs'] = input_names
+    layer['outputs'] = list(node.output)
+    if node.domain != 'qonnx.custom_op.channels_last':
+        raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last")
     layer['class_name'] = node.op_type
-    layer['data_format'] = 'channels_first'  # Default ONNX
+    layer['data_format'] = 'channels_last'  # Default QONNX
 
     info = layer['class_name'].replace('Pool', '')
     strides = get_onnx_attribute(node, 'strides')
     kernel_shape = get_onnx_attribute(node, 'kernel_shape')
+    pads = get_onnx_attribute(node, 'pads')
+    layer['pads'] = pads
+    dilations = get_onnx_attribute(node, 'dilations')
+    if dilations is None:
+        dilations = [1] * len(kernel_shape)
+    layer['dilations'] = dilations
 
     if len(input_shapes[0]) == 3:  # 1D
         layer['class_name'] = info + 'Pooling1D'
@@ -31,70 +35,50 @@ def parse_pool_layer(reader, node, inputs_map, input_shapes, graph, config):
         layer['pool_width'] = kernel_shape[0]
         layer['stride_width'] = strides[0]
 
-        # Padding
-        pads = compute_pads_1d(node, layer)
-        layer['pad_left'] = pads[0]
-        layer['pad_right'] = pads[1]
-
-        if all(x == 0 for x in pads):  # No padding, i.e., 'VALID' padding
-            layer['padding'] = 'valid'
-        else:
-            layer['padding'] = 'same'
-
-        (layer['n_out'], _, _) = compute_padding_1d(
-            layer['padding'], layer['n_in'], layer['stride_width'], layer['pool_width']
+        # formula from ONNX Operators.md documentation
+        layer['n_out'] = int(
+            np.floor((layer['n_in'] + np.sum(pads) - ((kernel_shape[0] - 1) * dilations[0] + 1)) / strides[0] + 1)
         )
 
-        output_shape = [input_shapes[0][0], layer['n_filt'], layer['n_out']]
-
     elif len(input_shapes[0]) == 4:  # 2D
         layer['class_name'] = info + 'Pooling2D'
 
-        layer['n_filt'] = input_shapes[0][1]
-        layer['in_height'] = input_shapes[0][2]
-        layer['in_width'] = input_shapes[0][3]
+        layer['n_filt'] = input_shapes[0][3]
+        layer['in_height'] = input_shapes[0][1]
+        layer['in_width'] = input_shapes[0][2]
 
         layer['stride_height'] = strides[0]
         layer['stride_width'] = strides[1]
         layer['pool_height'] = layer['filt_height'] = kernel_shape[0]
         layer['pool_width'] = layer['filt_width'] = kernel_shape[1]
 
-        pads = compute_pads_2d(node, layer)
         layer['pad_top'] = pads[0]
         layer['pad_bottom'] = pads[2]
         layer['pad_left'] = pads[1]
         layer['pad_right'] = pads[3]
 
-        if all(x == 0 for x in pads):  # No padding, i.e., 'VALID' padding in Keras/Tensorflow
-            layer['padding'] = 'valid'
-        else:  # Only 'valid' and 'same' padding are available in Keras
-            layer['padding'] = 'same'
-
-        (layer['out_height'], layer['out_width'], _, _, _, _) = compute_padding_2d(
-            layer['padding'],
-            layer['in_height'],
-            layer['in_width'],
-            layer['stride_height'],
-            layer['stride_width'],
-            layer['filt_height'],
-            layer['filt_width'],
+        # formula from ONNX Operators.md documentation
+        layer['out_height'] = int(
+            np.floor((layer['in_height'] + pads[0] + pads[2] - ((kernel_shape[0] - 1) * dilations[0] + 1)) / strides[0] + 1)
+        )
+        layer['out_width'] = int(
+            np.floor((layer['in_width'] + pads[1] + pads[3] - ((kernel_shape[1] - 1) * dilations[1] + 1)) / strides[1] + 1)
         )
 
-        output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_height'], layer['out_width']]
-
-    return layer, output_shape
+    return layer
 
 
 global_pooling_layers = ['GlobalMaxPool', 'GlobalAveragePool']
 
 
 @onnx_handler(*global_pooling_layers)
-def parse_global_pooling_layer(reader, node, inputs_map, input_shapes, graph, config):
+def parse_global_pooling_layer(node, input_names, input_shapes, graph):
     layer = {}
     layer['name'] = node.name
-    layer['inputs'] = get_onnx_input_name(node, graph)
+    layer['inputs'] = input_names
+    layer['outputs'] = list(node.output)
     layer['class_name'] = node.op_type
-    layer['data_format'] = 'channels_first'
+    layer['data_format'] = 'channels_last'  # default QONNX
 
     # Sonme default parameters for global pooling
     layer['n_out'] = 1
@@ -116,6 +100,4 @@ def parse_global_pooling_layer(reader, node, inputs_map, input_shapes, graph, co
         layer['in_height'] = input_shapes[0][2]
         layer['in_width'] = input_shapes[0][3]
 
-    output_shape = [input_shapes[0][0], layer['n_filt']] + [1] * (len(input_shapes[0]) - 2)
-
-    return layer, output_shape
+    return layer
diff --git a/hls4ml/converters/onnx/reshape.py b/hls4ml/converters/onnx/reshape.py
index 5bbf58b079..9ef20f03d7 100644
--- a/hls4ml/converters/onnx/reshape.py
+++ b/hls4ml/converters/onnx/reshape.py
@@ -1,39 +1,38 @@
-import numpy as np
-
-from hls4ml.converters.onnx_to_hls import get_onnx_input_name, onnx_handler
+from hls4ml.converters.onnx_to_hls import onnx_handler
 
 
 @onnx_handler('Transpose')
-def parse_transpose_layer(reader, node, inputs_map, input_shapes, graph, config):
+def parse_transpose_layer(node, input_names, input_shapes, graph):
     layer = {}
     layer['name'] = node.name
     layer['class_name'] = 'Transpose'
-    layer['inputs'] = get_onnx_input_name(node, graph)
+    layer['inputs'] = input_names
+    layer['outputs'] = list(node.output)
 
     perm = [list(i.ints) for i in node.attribute][0]  # This will get something like [[a,b,c]][0] = [a,b,c]
     layer['perm'] = [x - 1 for x in perm[1:]]  # Ignore the batch dimension in ONNX, and adjust the perm indexing
 
-    output_shape = [input_shapes[0][i] for i in perm]
-
-    return layer, output_shape
+    return layer
 
 
 @onnx_handler('Reshape')
-def parse_reshape_layer(reader, node, inputs_map, input_shapes, graph, config):
+def parse_reshape_layer(node, input_names, input_shapes, graph):
     layer = {}
     layer['name'] = node.name
     layer['class_name'] = 'Reshape'
-    layer['inputs'] = get_onnx_input_name(node, graph)
+    layer['inputs'] = input_names
+    layer['outputs'] = list(node.output)
 
-    target_shape = list([x for x in graph.initializer if x.name == node.input[1]][0].int64_data)[1:]
+    return layer
 
-    if -1 in target_shape:  # Need to infer shape for -1
-        print("WARNING: Inferring -1 shape ... ")
-        dummy_x = np.ones(input_shapes[0][1:])
-        dummy_y = np.reshape(dummy_x, target_shape)
-        target_shape = list(dummy_y.shape)
 
-    layer['target_shape'] = target_shape
-    output_shape = input_shapes[0][:1] + layer['target_shape']
+@onnx_handler('Flatten')
+def parse_flatten_layer(node, input_names, input_shapes, graph):
+    layer = {}
+    layer['name'] = node.name
+    layer['class_name'] = 'Reshape'
+    layer['inputs'] = input_names
+    layer['outputs'] = list(node.output)
+    layer['target_shape'] = [-1]  # does not contain batch dimension
 
-    return layer, output_shape
+    return layer
diff --git a/hls4ml/converters/onnx_to_hls.py b/hls4ml/converters/onnx_to_hls.py
index 106daf62da..8f6c7461fb 100644
--- a/hls4ml/converters/onnx_to_hls.py
+++ b/hls4ml/converters/onnx_to_hls.py
@@ -1,78 +1,10 @@
-import numpy as np
 import onnx
-from onnx import helper, numpy_helper, shape_inference
+from onnx import helper, numpy_helper
 
 from hls4ml.model import ModelGraph
 
-MAXMULT = 4096
 
-
-class ONNXDataReader:
-    """
-    ONNX data reader to be used for extracting relevant information during conversion.
-    """
-
-    def __init__(self, model):
-        self.model = model
-        self.input_map = {}
-        self.index_map = {
-            # Dense
-            'kernel': 1,
-            'bias': 2,
-            # BatchNormalization
-            'gamma': 1,
-            'beta': 2,
-            'moving_mean': 3,
-            'moving_variance': 4,
-        }
-
-    def get_weights_data(self, layer_name, var_name):
-        """Extract weights data from ONNX model.
-
-        Args:
-            layer_name (str): Layer's name in the ONNX model.
-            var_name (str): Variable to be extracted.
-
-        Returns:
-            ndarray: Extracted weights data.
-        """
-        # Get the node associated with the layer name
-        node = next(node for node in self.model.graph.node if node.name == layer_name)
-
-        inputs = self.input_map[layer_name]
-        inp_idx = self.index_map[var_name]
-
-        if inp_idx >= len(inputs['inputs']):
-            # Check if the layer is an AddBias layer
-            if (node.op_type == 'Add') and (var_name == 'bias'):
-                inp_idx = 1
-            else:
-                # Input not found, likely a bias tensor is not available
-                return None
-
-        tensor = next((x for x in self.model.graph.initializer if x.name == inputs['inputs'][inp_idx]), None)
-
-        if tensor is not None:
-            data = numpy_helper.to_array(tensor)
-
-            if inputs['transpose']:
-                if inputs['perm'] is not None and len(data.shape) == len(inputs['perm']):
-                    data = data.transpose(inputs['perm'])
-                else:
-                    data = data.transpose()
-
-            # Check for transB in Gemm
-            if node.op_type == 'Gemm':
-                if not get_onnx_attribute(node, 'transB'):
-                    data = data.transpose()
-
-        return data
-
-    def add_input(self, layer_name, inputs, transpose=True, perm=None):
-        self.input_map[layer_name] = {'inputs': inputs, 'transpose': transpose, 'perm': perm}
-
-
-# ----------------------Helpers--------------------- #
+# ----------------------Helpers---------------------
 def sanitize_layer_name(layer):
     new_name = layer['name']
     if new_name[0].isdigit():
@@ -99,9 +31,52 @@ def get_onnx_attribute(operation, name, default=None):
     return value
 
 
-def get_input_shape(model, operation, input_idx=0):
-    value_info_idx = next((i for i, x in enumerate(model.graph.value_info) if x.name == operation.input[input_idx]), 0)
-    return [d.dim_value for d in model.graph.value_info[value_info_idx].type.tensor_type.shape.dim]
+def get_global_input_shape(graph, inp):
+    """Return the global input shape of the graph with name inp
+
+    Arguments:
+        graph:  the onnx graph
+        inp (str):  the global input name
+
+    Returns:
+        list: The shape
+
+    Raises:
+        StopIteration:  If the global input name is not found
+    """
+    inp_shape = next(x.type.tensor_type.shape.dim for x in graph.input if x.name == inp)
+    return list(x.dim_value for x in inp_shape)
+
+
+def get_input_shape(graph, node):
+    """Return the input shapes of the node in the model
+
+    Arguments:
+        graph:  the onnx graph
+        node:  the onnx node for which the input is desired
+
+    Returns:
+        list of lists: The shapes of all the inputs
+
+    Raises:
+        StopIteration:  If the an input name is not found in the graph
+    """
+    rv = []
+    for inp in node.input:
+        try:
+            value_info_idx = next((i for i, x in enumerate(graph.value_info) if x.name == inp))
+            dim = list(d.dim_value for d in graph.value_info[value_info_idx].type.tensor_type.shape.dim)
+        except StopIteration:
+            # The input is not in the graph, likely it's the input
+            dim = get_global_input_shape(graph, inp)
+        if dim:
+            rv.append(dim)
+    return rv
+
+
+def get_constant_value(graph, constant_name):
+    tensor = next((x for x in graph.initializer if x.name == constant_name), None)
+    return numpy_helper.to_array(tensor)
 
 
 def compute_pads_1d(operation, layer):
@@ -155,7 +130,7 @@ def compute_pads_2d(operation, layer):
     return pads
 
 
-# ----------------------Layer handling--------------------- #
+# ----------------------Layer handling---------------------
 layer_handlers = {}
 
 
@@ -178,27 +153,6 @@ def decorator(function):
     return decorator
 
 
-# --->> A set of functions to address the naming convetion in ONNx's graph
-def get_onnx_input_name(node, graph):
-    """
-    In ONNX, when calling node.input, it returns the node input's index in the graph instead of the input's name.
-    However, the input's name is used for indexing in ModelGraph's graph. This function return the input node's name instead.
-    """
-
-    in_node = [in_node for in_node in graph.node if (in_node.output[0] in node.input)]
-
-    if in_node:
-        if in_node[0].op_type != 'Flatten':
-            input_node_name = [x.name for x in in_node]
-        else:  # IF it's a flatten
-            input_node_name = [x.name for x in graph.node if (x.output[0] in in_node[0].input)]
-
-        return input_node_name
-
-    else:  # If there is no input name it's actually the first layer
-        return [replace_char_inconsitency(node.input[0])]
-
-
 def get_out_layer_name(graph):
     """
     Get the output layer's name for the model.
@@ -226,18 +180,16 @@ def onnx_to_hls(config):
     # Extract model architecture
     print('Interpreting Model ...')
 
-    model = onnx.load(config['OnnxModel']) if isinstance(config['OnnxModel'], str) else config['OnnxModel']
+    onnx_model = onnx.load(config['OnnxModel']) if isinstance(config['OnnxModel'], str) else config['OnnxModel']
 
-    model = shape_inference.infer_shapes(model)
-    graph = model.graph
-
-    reader = ONNXDataReader(model)
+    # We don't infer the shapes because the qonnx package preprocessing does it.
 
     # Obtain list of input/ouput layers
-    all_inputs = [x.name for x in model.graph.input]
-    all_initializers = [x.name for x in model.graph.initializer]
+    all_inputs = [x.name for x in onnx_model.graph.input]
+    all_initializers = [x.name for x in onnx_model.graph.initializer]
     input_layers = [x for x in all_inputs if x not in all_initializers]
-    output_layers = get_out_layer_name(graph)
+    constant_layers = all_initializers  # no need to copy it even though we change it
+    output_layers = get_out_layer_name(onnx_model.graph)
 
     print("Output layers: ", output_layers)
 
@@ -245,69 +197,64 @@ def onnx_to_hls(config):
         input_layer = {}
         input_layer['name'] = replace_char_inconsitency(inp)
         input_layer['class_name'] = 'InputLayer'
-        inp_shape = next((x.type.tensor_type.shape.dim for x in model.graph.input if x.name == inp), None)
-        input_layer['input_shape'] = [x.dim_value for x in inp_shape]
-
-        if len(input_layer['input_shape']) > 1:
-            input_layer['input_shape'][0] = None  # Firt dim is batch
+        inp_shape = get_global_input_shape(onnx_model.graph, inp)
+        # We only support ONNX where the first dimension is the batch dimension.
+        # Remove the batch dimension in all subsequnt use
+        input_layer['input_shape'] = inp_shape[1:]
 
+        print('Input shape:', input_layer['input_shape'])
         # Clean the layer name for specific models
         sanitize_layer_name(input_layer)
         input_layers[i] = input_layer['name']
 
         layer_list.append(input_layer)
 
+    for i, constant in enumerate(constant_layers):
+        constant_layer = {}
+        constant_layer['name'] = replace_char_inconsitency(constant)
+        constant_layer['class_name'] = 'Constant'
+        constant_layer['value'] = get_constant_value(onnx_model.graph, constant)
+
+        # Clean the layer name for specific models
+        sanitize_layer_name(constant_layer)
+        constant_layers[i] = constant_layer['name']
+
+        layer_list.append(constant_layer)
+
     # Defined supported layers and check for unsupported layer type
-    skip_layers = ['Dropout', 'Identity', 'Flatten']
+    skip_layers = ['Dropout', 'Identity']
 
     # Map inputs of skipped layers
     inputs_map = {}
 
     supported_layers = get_supported_onnx_layers() + skip_layers
 
-    # Get input shape
-    current_shape = [input_layer['input_shape']]
-    print('Input shape:', current_shape[0])
-
-    # Loop through layers
-    layer_counter = 0
-
-    # Output shape tracking
-    output_shape = None
-
     print('Topology:')
-    for node in graph.node:
+    for node in onnx_model.graph.node:
         if node.op_type not in supported_layers:
             raise Exception(f'ERROR: Unsupported operation type: {node.op_type}')
 
-        # If not the first layer then input shape is taken from last layer's output
-        if layer_counter != 0:
-            current_shape = [output_shape]
+        # Note that at this point, input shape still contains batch dimension
+        # in cases where it appears. That is not filtered out till later.
+        input_shapes = get_input_shape(onnx_model.graph, node)
 
         if node.op_type in skip_layers:
-            if node.op_type == 'Flatten':
-                output_shape = [current_shape[0][0], np.prod(current_shape[0][1:])]
-
-            else:
-                # Currently supported skipped layers have only one input and output
-                # Skipped layers can follow each other (e.g., Dropout -> Flatten)
-
-                # Mapping inputs
-                input_name = inputs_map.get(node.input[0], node.input[0])
-                output_name = node.output[0]
-                inputs_map[output_name] = input_name
+            # Currently supported skipped layers have only one input and output
+            # Skipped layers can follow each other
 
-                output_shape = current_shape[0]
+            # Mapping inputs
+            input_name = inputs_map.get(node.input[0], node.input[0])
+            output_name = node.output[0]
+            inputs_map[output_name] = input_name
             continue
 
-        if node.op_type in supported_layers:
-            layer_counter = layer_counter + 1
+        input_names = [inputs_map.get(x, x) for x in node.input]
 
         # Process the layer
-        layer, output_shape = layer_handlers[node.op_type](reader, node, inputs_map, current_shape, graph, config)
+        layer = layer_handlers[node.op_type](node, input_names, input_shapes, onnx_model.graph)
 
         sanitize_layer_name(layer)
-        print('Layer name: {}, layer type: {}, current shape: {}'.format(layer['name'], layer['class_name'], current_shape))
+        print(f"Layer name: {layer['name']}, layer type: {layer['class_name']}, current shape: {input_shapes}")
         layer_list.append(layer)
 
     #################
@@ -315,5 +262,5 @@ def onnx_to_hls(config):
     #################
 
     print('Creating HLS model')
-    hls_model = ModelGraph(config, reader, layer_list, input_layers, output_layers)
+    hls_model = ModelGraph(config, layer_list, input_layers, output_layers)
     return hls_model

From af47a0d4563d986db0b7412536983d77ed9cedca Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 12 Jul 2023 13:50:51 -0500
Subject: [PATCH 04/59] change tuples to lists

---
 hls4ml/model/layers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 6a23a9b934..320a1fde57 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -352,7 +352,7 @@ class Constant(Layer):
     def initialize(self):
         value = self.attributes['value']
         self.value = value  # note, this is unquantized; Only here for easier access
-        shape = value.shape
+        shape = list(value.shape)
         if not shape:
             shape = (1,)
             self.value = np.array([self.value])
@@ -963,10 +963,10 @@ def initialize(self):
         if len(inp2.shape) == 1:
             # mat vec multiply
             assert inp1.shape[-1] == inp2.shape[0]
-            shape = tuple(inp1.shape[:-1]) + (inp2.shape[0],)
+            shape = list(inp1.shape[:-1]) + [inp2.shape[0]]
         else:
             assert inp1.shape[-1] == inp2.shape[-2]
-            shape = tuple(inp1.shape[:-1]) + (inp2.shape[-1],)
+            shape = list(inp1.shape[:-1]) + [inp2.shape[-1]]
         if len(shape) > 1:
             dims = [f'N_LAYER_{i}_{self.index}' for i in range(1, len(shape) + 1)]
         else:

From 8f8cc0b21e23f52c5d750cbbc2ea56104008c6d7 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 12 Jul 2023 18:26:28 -0500
Subject: [PATCH 05/59] snapshot of adding qonnx optimizers

---
 hls4ml/backends/fpga/fpga_backend.py          |   8 +-
 hls4ml/model/layers.py                        |   4 +-
 hls4ml/model/optimizer/__init__.py            |   8 +
 .../model/optimizer/passes/batchnorm_opt.py   | 169 ++++++++
 .../model/optimizer/passes/conv_to_convxd.py  |  90 ++++
 .../optimizer/passes/matmul_const_to_dense.py |  58 +++
 hls4ml/model/optimizer/passes/merge_const.py  | 192 +++++++++
 hls4ml/model/optimizer/passes/move_scales.py  | 301 ++++++++++++++
 .../passes/propagate_conv_precision.py        |  77 ++++
 .../passes/propagate_dense_precision.py       |  70 ++++
 hls4ml/model/optimizer/passes/qkeras.py       |  35 +-
 hls4ml/model/optimizer/passes/quant_opt.py    | 387 ++++++++++++++++++
 .../model/optimizer/passes/reshape_const.py   |  27 ++
 13 files changed, 1389 insertions(+), 37 deletions(-)
 create mode 100644 hls4ml/model/optimizer/passes/batchnorm_opt.py
 create mode 100644 hls4ml/model/optimizer/passes/conv_to_convxd.py
 create mode 100644 hls4ml/model/optimizer/passes/matmul_const_to_dense.py
 create mode 100644 hls4ml/model/optimizer/passes/merge_const.py
 create mode 100644 hls4ml/model/optimizer/passes/move_scales.py
 create mode 100644 hls4ml/model/optimizer/passes/propagate_conv_precision.py
 create mode 100644 hls4ml/model/optimizer/passes/propagate_dense_precision.py
 create mode 100644 hls4ml/model/optimizer/passes/quant_opt.py
 create mode 100644 hls4ml/model/optimizer/passes/reshape_const.py

diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py
index 8cfaec8b3f..97e458f7fd 100644
--- a/hls4ml/backends/fpga/fpga_backend.py
+++ b/hls4ml/backends/fpga/fpga_backend.py
@@ -13,6 +13,8 @@
     LSTM,
     Activation,
     BatchNormalization,
+    BatchNormOnnx,
+    Conv,
     Conv1D,
     Conv2D,
     Dense,
@@ -22,8 +24,10 @@
     GarNetStack,
     GlobalPooling1D,
     GlobalPooling2D,
+    MatMul,
     Pooling1D,
     Pooling2D,
+    Quant,
     SeparableConv1D,
     SeparableConv2D,
     SimpleRNN,
@@ -63,6 +67,8 @@ def __init__(self, name):
             LSTM,
             GRU,
             Dot,
+            Conv,
+            MatMul,
         ]
 
         for layer in accum_layers:
@@ -70,7 +76,7 @@ def __init__(self, name):
             attrs.append(TypeAttribute('accum'))
             self.attribute_map[layer] = attrs
 
-        rf_layers = accum_layers + [BatchNormalization, Activation, Embedding, GarNet, GarNetStack]
+        rf_layers = accum_layers + [BatchNormalization, Activation, Embedding, GarNet, GarNetStack, Quant, BatchNormOnnx]
 
         for layer in rf_layers:
             attrs = self.attribute_map.get(layer, [])
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 320a1fde57..bd465ff7b9 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -391,8 +391,8 @@ def initialize(self):
             # need to get it from the input
             shape_node = self.get_input_node(self.inputs[1])
             # for QONNX, remove batch dimension
-            # (onnx cleaning should have removed reshape dimension)
-            if shape_node:
+            # (onnx cleaning should have removed reshapes not on data path)
+            if isinstance(shape_node, Constant):
                 target_shape = shape_node.value[1:]
             else:
                 raise RuntimeError("Reshape for ONNX requires the target shape to be a second input.")
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 2e9b197475..db65370e40 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -37,6 +37,14 @@
         'fuse_bias_add',
         'remove_useless_transpose',
         'expand_layer_group',
+        'reshape_constant',
+        'quant_constant_parameters',
+        'quant_to_activation',
+        'fuse_quant_with_constant',
+        'quant_to_alpha_activation_alpha',
+        'const_quant_to_const_alpha',
+        'matmul_const_to_dense',
+        'conv_to_conv_x_d',
         'output_rounding_saturation_mode',
         'qkeras_factorize_alpha',
         'extract_ternary_threshold',
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
new file mode 100644
index 0000000000..a7b0c27209
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -0,0 +1,169 @@
+import numpy as np
+
+from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant
+from hls4ml.model.optimizer import OptimizerPass
+
+_base_attributes = ('Trace', 'reuse_factor', 'n_in', 'n_filt')
+
+
+class BatchNormOnnxConstantParameters(OptimizerPass):
+    """Remove Constant from the BatchNormalization node parameters (but not input[0])"""
+
+    def match(self, node):
+        is_match = isinstance(node, BatchNormOnnx) and any(node.inputs[1:])
+
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Remove Constant from the BatchNormalization node parameters (but not input[0])
+        """
+
+        if not (len(node.inputs) == 5 and all(node.inputs)):
+            raise ValueError(f"All {len.node.inputs} BatchNormOnnnx inputs need to be defined")
+
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
+
+        gamma_node = node.get_input_node(node.inputs[1])
+        if not isinstance(gamma_node, Constant):
+            raise TypeError("Only consant gammas supported")
+        gamma = gamma_node.value
+        attributes['gamma_data'] = gamma
+        node.inputs[1] = ''
+        model.remove_node(gamma_node, rewire=False)
+
+        beta_node = node.get_input_node(node.inputs[2])
+        if not isinstance(beta_node, Constant):
+            raise TypeError("Only consant betas supported")
+        beta = beta_node.value
+        attributes['beta_data'] = beta
+        node.inputs[2] = ''
+        model.remove_node(beta_node, rewire=False)
+
+        moving_mean_node = node.get_input_node(node.inputs[3])
+        if not isinstance(moving_mean_node, Constant):
+            raise TypeError("Only consant moving_means supported")
+        moving_mean = moving_mean_node.value
+        attributes['mean_data'] = moving_mean
+        node.inputs[3] = ''
+        model.remove_node(moving_mean_node, rewire=False)
+
+        moving_variance_node = node.get_input_node(node.inputs[4])
+        if not isinstance(moving_variance_node, Constant):
+            raise TypeError("Only consant moving_variances supported")
+        moving_variance = moving_variance_node.value
+        attributes['variance_data'] = moving_variance
+        node.inputs[4] = ''
+        model.remove_node(moving_variance_node, rewire=False)
+
+        # scale = gamma / np.sqrt(moving_variance + node.get_attr('epsilon'))
+        # bias = beta - gamma * moving_mean / np.sqrt(moving_variance + node.get_attr('epsilon'))
+        # attributes["scale_data"] = scale
+        # attributes["bias_data"] = bias
+
+        new_node = model.make_node(BatchNormalization, node.name, attributes, [node.inputs[0]], [x for x in node.outputs])
+
+        model.replace_node(node, new_node)
+
+        return True
+
+
+class ConstantBatchNormFusion(OptimizerPass):
+    """
+    Merge BatchNorm into Const (after parameters have already been merged in BatchNormalization)
+    """
+
+    def match(self, node):
+        is_match = (
+            isinstance(node, BatchNormalization)
+            and not any(node.inputs[1:])
+            and isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and not node.get_input_node(node.inputs[0]).get_attr("quant_precision")
+        )
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Remove the batch norm
+        """
+        const_node = node.get_input_node(node.inputs[0])
+
+        new_val = const_node.value * node.weights["scale"].data_unquantized + node.weights["bias"].data_unquantized
+        const_node.set_attr("value", new_val)
+        const_node.set_attr("quantizer", node.get_attr("quantizer"))  # None if not defined
+        const_node.set_attr("quant_precision", node.get_attr("quant_precision"))
+
+        # reinitialize (which also runs quantization if quantizer exists)
+        const_node.initialize()
+
+        # remove the batch norm node
+        model.remove_node(node, rewire=True)
+
+        return True
+
+
+class FuseConsecutiveBatchNormalization(OptimizerPass):
+    '''
+    OptimizerPass to merge consecutive BatchNormalization layers,
+    only if the earlier one does not have quantization specified
+    '''
+
+    def match(self, node):
+        prev_node = node.get_input_node(node.inputs[0])
+        basic_match = (
+            isinstance(node, BatchNormalization)
+            and isinstance(prev_node, BatchNormalization)
+            and not prev_node.get_attr("quant_precision")
+        )
+
+        # check for compatibility to merge
+        if basic_match:
+            s0 = prev_node.weights['scale'].data_unquantized
+            b0 = prev_node.weights['bias'].data_unquantized
+            s1 = node.weights['scale'].data_unquantized
+            b1 = node.weights['bias'].data_unquantized
+            scale_compatible = (
+                (prev_node.get_attr("scale_quantizer") is None and node.get_attr("scale_quantizer") is None)
+                or (s0 == np.ones_like(s0)).all()
+                or (s1 == np.ones_like(s1)).all()
+            )
+            bias_compatible = (
+                (prev_node.get_attr("bias_quantizer") is None and node.get_attr("bias_quantizer") is None)
+                or (b0 == np.zeros_like(b0)).all()
+                or (b1 == np.zeros_like(b1)).all()
+            )
+            return scale_compatible and bias_compatible
+        else:
+            return False
+
+    def transform(self, model, node):
+        prev_node = node.get_input_node(node.inputs[0])
+
+        s0 = prev_node.weights['scale'].data_unquantized
+        b0 = prev_node.weights['bias'].data_unquantized
+        s1 = node.weights['scale'].data_unquantized
+        b1 = node.weights['bias'].data_unquantized
+
+        s_quantizer = (
+            node.get_attr("scale_quantizer") if (s0 == np.ones_like(s0)).all() else prev_node.get_attr("scale_quantizer")
+        )
+        b_quantizer = (
+            node.get_attr("bias_quantizer") if (b0 == np.zeros_like(b0)).all() else prev_node.get_attr("bias_quantizer")
+        )
+
+        node.set_attr("scale_quantizer", s_quantizer)
+        node.set_attr("bias_quantizer", b_quantizer)
+        if s_quantizer:
+            node.set_attr("scale_precision", s_quantizer.hls_type)
+        if b_quantizer:
+            node.set_attr("bias_precision", b_quantizer.hls_type)
+
+        scale_new = s0 * s1
+        bias_new = s1 * b0 + b1
+
+        # call function so that quantizer would be called if needed
+        node.add_weights(scale_new, quantizer=s_quantizer)
+        node.add_bias(bias_new, quantizer=b_quantizer)
+
+        model.remove_node(prev_node, rewire=True)
+        return True
diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
new file mode 100644
index 0000000000..28f4d4c0bd
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -0,0 +1,90 @@
+import numpy as np
+
+from hls4ml.model.layers import Constant, Conv, Conv1D, Conv2D
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import IntegerPrecisionType
+
+# these are attributes to copy
+_base_attributes = (
+    'Trace',
+    'reuse_factor',
+    'in_width',
+    'out_width',
+    'n_chan',
+    'n_filt',
+    'pad_left',
+    'pad_right',
+    'filt_width',
+    'stride_width',
+    'dilation_width',
+    'in_height',
+    'out_height',
+    'pad_top',
+    'pad_bottom',
+    'filt_height',
+    'stride_height',
+    'dilation_height',
+    'strategy',
+    'data_format',
+)
+
+
+class ConvToConvXD(OptimizerPass):
+    """Convert Conv with constant to a Conv1D or Conv2D layer"""
+
+    def match(self, node):
+        is_match = isinstance(node, Conv) and (
+            (len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant))
+            or (
+                len(node.inputs) == 3
+                and isinstance(node.get_input_node(node.inputs[1]), Constant)
+                and isinstance(node.get_input_node(node.inputs[2]), Constant)
+            )
+        )
+
+        return is_match
+
+    def transform(self, model, node):
+        """Convert Conv with constant to a Conv1D or Conv2D layer"""
+
+        weight_node = node.get_input_node(node.inputs[1])
+        weight_precision = weight_node.get_attr("quant_precision")
+        bias_node = None
+        bias_precision = None
+        if len(node.inputs) == 3:
+            bias_node = node.get_input_node(node.inputs[2])
+            bias_precision = bias_node.get_attr("quant_precision")
+
+        # creating the attributes
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
+
+        # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C)
+        if node.attributes['n_dim'] == 1:
+            newtype = Conv1D
+            attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 0))
+        else:
+            newtype = Conv2D
+            attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 3, 0))
+        attributes["weight_precision"] = weight_precision
+        attributes["weight_quantizer"] = weight_node.get_attr("quantizer")
+
+        if bias_node:
+            attributes["bias_data"] = bias_node.value
+            attributes["bias_precision"] = bias_precision
+            attributes["bias_quantizer"] = bias_node.get_attr("quantizer")
+        else:
+            attributes["bias_data"] = np.zeros(attributes['n_filt'])
+            attributes["bias_precision"] = IntegerPrecisionType(1, False)
+
+        # making new node
+        new_node = model.make_node(
+            newtype, f"{newtype.__name__}_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs]
+        )
+
+        # removing and replacing old nodes
+        model.remove_node(weight_node, rewire=False)
+        if bias_node:
+            model.remove_node(bias_node, rewire=False)
+        model.replace_node(node, new_node)
+
+        return True
diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
new file mode 100644
index 0000000000..82c7b56313
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -0,0 +1,58 @@
+import numpy as np
+
+from hls4ml.model.layers import Constant, Dense, MatMul
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import IntegerPrecisionType
+
+_base_attributes = ('Trace', 'reuse_factor', 'weight', 'weight_t', 'bias', 'bias_t')
+
+
+class MatmulConstToDense(OptimizerPass):
+    """
+    Convert MatMul with constant to a dense layer. Note, this only supports the second input
+    being the constant. If needed, one could add transposes to make that be the case in
+    other yet to be written optimizers.
+    """
+
+    def match(self, node):
+        is_match = (
+            isinstance(node, MatMul) and len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant)
+        )
+        return is_match
+
+    def transform(self, model, node):
+        """Substitute Matmul + Constant for a single dense"""
+        # determining Constant layer input
+        const_node = node.get_input_node(node.inputs[1])
+        other_var = node.get_input_variable(node.inputs[0])
+
+        weight_precision = const_node.get_attr("quant_precision")
+        weight_quantizer = const_node.get_attr("quantizer")
+
+        in_shape = other_var.shape
+        n_in = np.prod(in_shape)
+        out_shape = list(in_shape[:-1]) + [const_node.value.shape[-1]]
+        n_out = np.prod(out_shape)
+
+        # creating the attributes
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes.update(
+            {
+                "weight_data": const_node.value,
+                "weight_precision": weight_precision,
+                "weight_quantizer": weight_quantizer,
+                "bias_data": np.zeros(out_shape),
+                "bias_precision": IntegerPrecisionType(1, False),
+                "n_in": n_in,
+                "n_out": n_out,
+            }
+        )
+
+        # making new node
+        new_dense = model.make_node(Dense, f"Dense_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs])
+
+        # removing and replacing old nodes
+        model.remove_node(const_node, rewire=False)
+        model.replace_node(node, new_dense)
+
+        return True
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
new file mode 100644
index 0000000000..4e339ccc3f
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -0,0 +1,192 @@
+import numpy as np
+
+from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
+from hls4ml.model.layers import BatchNormalization, Constant, Merge
+from hls4ml.model.optimizer import OptimizerPass
+
+_base_attributes = ('Trace', 'reuse_factor', 'n_in')
+
+# TODO This doesn't yet support quantization in the constants
+
+
+class MergeTwoConstants(OptimizerPass):
+    """Merge of two constants makes another constant"""
+
+    def match(self, node):
+        is_match = (
+            isinstance(node, Merge)
+            and isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and isinstance(node.get_input_node(node.inputs[1]), Constant)
+        )
+
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Merge of two constants makes another constant
+        """
+        const_node0 = node.get_input_node(node.inputs[0])
+        const_node1 = node.get_input_node(node.inputs[1])
+
+        val0 = const_node0.value
+        val1 = const_node1.value
+
+        op = node.attributes["op"]
+        if op in ('add', 'sum'):
+            new_val = val0 + val1
+        elif op == 'sub':
+            new_val = val0 - val1
+        elif op == 'mul':
+            new_val = val0 * val1
+        elif op == 'div':
+            new_val = val0 / val1
+        elif op == 'average':
+            new_val = np.mean(np.array([val0, val1]), axis=0)
+        elif op == 'max':
+            new_val = np.maximum(val0, val1)
+        elif op == 'min':
+            new_val = np.minimum(val0, val1)
+        else:
+            raise RuntimeError(f"Unexpected op_type: {op}")
+
+        quantizer = node.get_attr("quantizer")  # None if not defined
+        if quantizer:
+            const_node0.set_attr("quantizer", quantizer)
+        const_node0.set_attr("value", new_val)
+
+        quant_precision = node.get_attr("quant_precision")
+        if quant_precision:
+            const_node0.set_attr("quant_precision", quant_precision)
+
+        # reinitialize (which also runs quantization if quantizer exists)
+        const_node0.initialize()
+
+        model.remove_node(const_node1, rewire=False)
+
+        # remove the batch norm node
+        model.remove_node(node, rewire=True)
+
+        return True
+
+
+class MergeToBatchNormalization(OptimizerPass):
+    """Convert Add, Sub, Mul, or Div Merges with consant to BatchNormalization"""
+
+    def match(self, node):
+        is_match = (
+            isinstance(node, Merge)
+            and node.attributes["op"] in ("add", "sum", "sub", "mul")  # Div is separate
+            and (
+                isinstance(node.get_input_node(node.inputs[0]), Constant)
+                != isinstance(node.get_input_node(node.inputs[1]), Constant)
+            )
+        )
+        # note: != for booleans is xor.
+        return is_match
+
+    def transform(self, model, node):
+        node1 = node.get_input_node(node.inputs[1])
+
+        node1const = isinstance(node1, Constant)
+        if node1const:
+            const_node = node1
+            input_node_idx = 0
+        else:
+            const_node = node.get_input_node(node.inputs[0])
+            input_node_idx = 1
+
+        input_shape = node.get_input_variable(node.inputs[input_node_idx]).shape
+        n_in = np.prod(input_shape)
+
+        scale_precision = None
+        scale_quantizer = None
+        bias_precision = None
+        bias_quantizer = None
+
+        op = node.attributes["op"]
+        if op in ('add', 'sum'):
+            scale = np.array(1)
+            bias = const_node.value
+            bias_precision = const_node.get_attr("quant_precision")
+            bias_quantizer = const_node.get_attr("quantizer")
+        elif op == 'sub':
+            if node1const:
+                scale = np.array(1)
+                bias = -const_node.value
+            else:
+                scale = np.array(-1)
+                bias = const_node.value
+            bias_precision = const_node.get_attr("quant_precision")
+            bias_quantizer = const_node.get_attr("quantizer")
+            if bias_precision and not bias_precision.signed:
+                # need to add a bit
+                bias_precision.signed = 1
+                bias_precision.width += 1
+                bias_precision.integer += 1
+                bias_quantizer = QuantNodeQuantizer(bias_precision)
+
+        elif op == 'mul':
+            scale = const_node.value
+            bias = np.array(0)
+            scale_precision = const_node.get_attr("quant_precision")
+            scale_quantizer = const_node.get_attr("quantizer")
+
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes.update(
+            {
+                "scale_data": scale,
+                "bias_data": bias,
+                "n_in": n_in,
+                "n_out": n_in,
+                "n_filt": -1,
+                "scale_precision": scale_precision,
+                "scale_quantizer": scale_quantizer,
+                "bias_precision": bias_precision,
+                "bias_quantizer": bias_quantizer,
+            }
+        )
+
+        bn_layer = model.make_node(
+            BatchNormalization, f"bn_{node.name}", attributes, [node.inputs[input_node_idx]], [x for x in node.outputs]
+        )
+
+        model.remove_node(const_node, rewire=False)
+        model.replace_node(node, bn_layer)
+
+        return True
+
+
+class MergeToBatchNormalizationDiv(OptimizerPass):
+    """
+    Convert Div Merges with consant to BatchNormalization
+
+    TODO:  propagate precision
+    """
+
+    def match(self, node):
+        is_match = (
+            isinstance(node, Merge)
+            and node.attributes["op"] == 'div'
+            and isinstance(node.get_input_node(node.inputs[1]), Constant)
+        )  # only second can be const
+
+        return is_match
+
+    def transform(self, model, node):
+        input_shape = node.get_input_variable().shape
+        n_in = np.prod(input_shape)
+        const_node = node.get_input_node(node.inputs[1])
+        scale = 1 / const_node.value
+        bias = np.array(0)
+
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes.update({"scale_data": scale, "bias_data": bias, "n_in": n_in, "n_out": n_in, "n_filt": -1})
+
+        bn_layer = model.make_node(
+            "BatchNormalization", f"bn_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs]
+        )
+
+        model.remove_node(const_node, rewire=False)
+        model.replace_node(node, bn_layer)
+
+        return True
diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py
new file mode 100644
index 0000000000..e97fd89947
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/move_scales.py
@@ -0,0 +1,301 @@
+'''
+This file includes optimizations related to moving the ApplyAphas across MatMul and Conv nodes.
+
+TODO:  Check that biases are properly handled. (Attempt to do it via Merge)
+
+'''
+import numpy as np
+
+from hls4ml.model.layers import ApplyAlpha, Constant, Conv, MatMul, Merge
+from hls4ml.model.optimizer import OptimizerPass
+
+
+class ScaleDownMatMul(OptimizerPass):
+    '''Shift an ApplyAlpha below a MatMul'''
+
+    def match(self, node):
+        '''
+        Check to see if we have a MatMul with at least one input ApplyAlpha.
+        Note, if both are this optimition runs twice.
+        '''
+        is_match = (
+            isinstance(node, MatMul)
+            and len(node.inputs) == 2
+            and (
+                isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
+                or isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha)
+            )
+        )
+        return is_match
+
+    def transform(self, model, node):
+        # determine input with ApplyAlpha. If both, first propagate apply alpha associated with a constant
+        is_aa = [False, False]
+        from_const = [False, False]
+        inp = [node.get_input_node(node.inputs[0]), node.get_input_node(node.inputs[1])]
+        for i in range(2):
+            if isinstance(inp[i], ApplyAlpha):
+                is_aa[i] = True
+                from_const[i] = isinstance(inp[i].get_input_node(inp[i].inputs[0]), Constant)
+
+        # prefer alpha from constant
+        if from_const[0]:
+            alpha_idx = 0
+        elif from_const[1]:
+            alpha_idx = 1
+        elif is_aa[0]:
+            alpha_idx = 0
+        else:
+            alpha_idx = 1  # is_aa[1] must be true
+
+        apply_alpha = inp[alpha_idx]
+        other_idx = 0 if alpha_idx else 1
+
+        # Check if we can move
+        scale = apply_alpha.weights['scale'].data_unquantized
+        bias = apply_alpha.weights['bias'].data_unquantized
+
+        scale1d = np.ravel(scale)
+        if (scale1d[0] == scale).all():
+            # scalar scale
+            scale = np.array(scale1d[0])
+
+        bias1d = np.ravel(bias)
+        if (bias1d[0] == bias).all():
+            # scalar bias
+            bias = np.array(bias1d[0])
+
+        output = node.get_output_variable()
+
+        can_propagate = False
+        if not bias.shape and bias == 0:
+            # zero bias, propagate through, if possible
+            # (always possible if scale is scalar)
+            try:
+                np.broadcast_to(scale, output.shape)  # check size compatibility
+                newscale = scale
+                newbias = np.array(0)
+                can_propagate = True
+            except ValueError:
+                can_propagate = False
+
+        # if did not succeed in propagating, try again
+        if not can_propagate and isinstance(inp[other_idx], Constant):
+            # can handle nonzero bias in some cases if other value is a Constant
+            try:
+                np.broadcast_to(scale, output.shape)  # check size compatibility
+                newscale = scale
+                newbias = inp[other_idx].value * bias
+                np.broadcast_to(newbias, output.shape)
+                can_propagate = True
+            except ValueError:
+                can_propagate = False
+
+        if not can_propagate:
+            return False
+
+        model.remove_node(apply_alpha)
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
+        new_node.add_weights(newscale)
+        new_node.add_bias(newbias)
+        model.insert_node(new_node)
+        return True
+
+
+class ScaleDownAdd(OptimizerPass):
+    '''Shift an identical ApplyAlpha below a Merge (Add)'''
+
+    def match(self, node):
+        '''Check to see if we have an add with two ApplyAlphas with identical scale'''
+        is_match = isinstance(node, Merge) and len(node.inputs) == 2 and node.attributes["op"] == "add"
+        if is_match:
+            in0 = node.get_input_node(node.inputs[0])
+            in1 = node.get_input_node(node.inputs[1])
+            is_match = (
+                isinstance(in0, ApplyAlpha)
+                and isinstance(in1, ApplyAlpha)
+                and (in0.weights['scale'].data_unquantized == in1.weights['scale'].data_unquantized).all()
+            )
+        return is_match
+
+    def transform(self, model, node):
+        in0 = node.get_input_node(node.inputs[0])
+        in1 = node.get_input_node(node.inputs[1])
+
+        # Check if we can move
+        scale = in0.weights['scale'].data_unquantized
+        bias0 = in0.weights['bias'].data_unquantized
+        bias1 = in1.weights['bias'].data_unquantized
+        try:
+            bias = bias0 + bias1
+        except ValueError:
+            return False
+
+        model.remove_node(in0)
+        model.remove_node(in1)
+
+        new_node = model.make_node('ApplyAlpha', in0.name, in0.attributes, [x for x in node.outputs])
+        new_node.add_weights(scale)
+        new_node.add_bias(bias)
+        model.insert_node(new_node)
+        return True
+
+
+class ScaleDownConv(OptimizerPass):
+    '''Shift an ApplyAlpha on input below a Conv'''
+
+    def match(self, node):
+        '''Shift an ApplyAlpha from the Weight'''
+        is_match = isinstance(node, Conv) and isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
+
+        return is_match
+
+    def transform(self, model, node):
+        apply_alpha = node.get_input_node(node.inputs[0])
+
+        # Check if we can move
+        scale = apply_alpha.weights['scale'].data_unquantized
+        bias = apply_alpha.weights['bias'].data_unquantized
+
+        scale1d = np.ravel(scale)
+        if (scale1d[0] == scale).all():
+            # scalar scale
+            scale = np.array(scale1d[0])
+
+        bias1d = np.ravel(bias)
+        if (bias1d[0] == bias).all():
+            # scalar bias
+            bias = np.array(bias1d[0])
+
+        output = node.get_output_variable()
+
+        can_propagate = False
+        if not bias.shape and bias == 0:
+            # zero bias, propagate through, if possible
+            # (always possible if scale is scalar)
+            try:
+                np.broadcast_to(scale, output.shape)  # check broadcastable
+                newscale = scale
+                newbias = np.array(0)
+                can_propagate = True
+            except ValueError:
+                can_propagate = False
+
+        if not can_propagate:
+            return False
+
+        model.remove_node(apply_alpha)
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
+        new_node.add_weights(newscale)
+        new_node.add_bias(newbias)
+        model.insert_node(new_node)
+        return True
+
+
+class ScaleDownWeightConv(OptimizerPass):
+    '''Shift an ApplyAlpha weight (from conv side) below a Conv'''
+
+    def match(self, node):
+        '''Shift an ApplyAlpha from the Weight'''
+        is_match = (
+            isinstance(node, Conv) and len(node.inputs) > 1 and isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha)
+        )
+
+        return is_match
+
+    def transform(self, model, node):
+        apply_alpha = node.get_input_node(node.inputs[1])
+
+        # Check if we can move
+        scale = apply_alpha.weights['scale'].data_unquantized
+        bias = apply_alpha.weights['bias'].data_unquantized
+
+        scale1d = np.ravel(scale)
+        if (scale1d[0] == scale).all():
+            # scalar scale
+            scale = np.array(scale1d[0])
+
+        bias1d = np.ravel(bias)
+        if (bias1d[0] == bias).all():
+            # scalar bias
+            bias = np.array(bias1d[0])
+
+        output = node.get_output_variable()
+
+        can_propagate = False
+        if not bias.shape and bias == 0:
+            # zero bias, propagate through, if possible
+            # (always possible if scale is scalar)
+            try:
+                np.broadcast_to(scale, output.shape)  # make sure broadcastable
+                newscale = scale
+                newbias = np.array(0)
+                can_propagate = True
+            except ValueError:
+                can_propagate = False
+
+        if not can_propagate:
+            return False
+
+        model.remove_node(apply_alpha)
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
+        new_node.add_weights(newscale)
+        new_node.add_bias(newbias)
+        model.insert_node(new_node)
+        return True
+
+
+class ScaleDownBiasConv(OptimizerPass):
+    '''Shift an ApplyAlpha bias (from conv side) below a Conv'''
+
+    def match(self, node):
+        '''Shift an ApplyAlpha from the Weight'''
+        is_match = (
+            isinstance(node, Conv) and len(node.inputs) > 2 and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha)
+        )
+
+        return is_match
+
+    def transform(self, model, node):
+        apply_alpha = node.get_input_node(node.inputs[2])
+
+        # Check if we can move
+        scale = apply_alpha.weights['scale'].data_unquantized
+        bias = apply_alpha.weights['bias'].data_unquantized
+
+        scale1d = np.ravel(scale)
+        if (scale1d[0] == scale).all():
+            # scalar scale
+            scale = np.array(scale1d[0])
+
+        bias1d = np.ravel(bias)
+        if (bias1d[0] == bias).all():
+            # scalar bias
+            bias = np.array(bias1d[0])
+
+        output = node.get_output_variable()
+
+        can_propagate = False
+        if not scale.shape and scale == 1:
+            # No scale, just additional bias
+            try:
+                np.broadcast_to(bias, output.shape)
+                newscale = np.array(1)
+                newbias = bias
+                can_propagate = True
+            except ValueError:
+                can_propagate = False
+
+        if not can_propagate:
+            return False
+
+        model.remove_node(apply_alpha)
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
+        new_node.add_weights(newscale)
+        new_node.add_bias(newbias)
+        model.insert_node(new_node)
+        return True
diff --git a/hls4ml/model/optimizer/passes/propagate_conv_precision.py b/hls4ml/model/optimizer/passes/propagate_conv_precision.py
new file mode 100644
index 0000000000..17e357df88
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/propagate_conv_precision.py
@@ -0,0 +1,77 @@
+import math  # prefer to use math.ceil for scalar values (returns int)
+
+import numpy as np
+
+from hls4ml.model.layers import Conv1D, Conv2D
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import FixedPrecisionType, NamedType
+
+
+class PropagateConvPrecision(OptimizerPass):
+    """Propagate precision for conv nodes. Restrict it to only cases where
+    the precision is set by a quant node, since otherwise the values get huge.
+    """
+
+    def match(self, node):
+        is_match = isinstance(node, (Conv1D, Conv2D))
+        return is_match
+
+    def transform(self, model, node):
+        input_precision = node.get_input_node().get_attr("quant_precision")
+        weight_precision = node.get_attr("weight_precision")
+        if not input_precision or not weight_precision:
+            return False
+
+        bias_precision = node.get_attr("bias_precision")
+        num_feature_maps = node.weights['weight'].data_unquantized.shape[-1]
+        filt_width = node.get_attr('filt_width')
+        filt_height = node.get_attr('filt_height', 1)
+
+        accum_precision = _propagate_type_conv(
+            input_precision,
+            weight_precision,
+            bias_precision,
+            num_feature_maps=num_feature_maps,
+            filt_width=filt_width,
+            filt_height=filt_height,
+        )
+
+        accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision)
+        node.set_attr('accum_t', accum_t)
+
+        if not node.get_attr("quant_precision"):
+            # output precision not explicitly set by quant node
+            node.update_output_precision(accum_precision)
+
+        return False
+
+
+def _propagate_type_conv(input_precision, weight_precision, bias_precision, num_feature_maps, filt_width, filt_height):
+    '''
+    Propagate the precion type across a multiply. Rounding modes are propagated from input_precision
+    '''
+
+    Nacc = filt_width * filt_height * num_feature_maps
+    bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(Nacc))
+    integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(Nacc))
+    signed = weight_precision.signed or input_precision.signed
+
+    # Because calculating precision, no need to round or sautration
+    rounding_mode = None
+    saturation_mode = None
+
+    frac = bitwidth - integer
+
+    # correct for bias
+    if bias_precision:
+        integer = (
+            max(
+                integer + (bias_precision.signed and not signed),
+                bias_precision.integer + (signed and not bias_precision.signed),
+            )
+            + 1
+        )
+        bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer)
+        signed = signed or bias_precision.signed
+
+    return FixedPrecisionType(bitwidth, integer, signed, rounding_mode, saturation_mode)
diff --git a/hls4ml/model/optimizer/passes/propagate_dense_precision.py b/hls4ml/model/optimizer/passes/propagate_dense_precision.py
new file mode 100644
index 0000000000..cc50bb7553
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/propagate_dense_precision.py
@@ -0,0 +1,70 @@
+import math  # prefer to use math.ceil for scalar values (returns int)
+
+import numpy as np
+
+from hls4ml.model.layers import Dense
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import FixedPrecisionType, NamedType
+
+
+class PropagateDensePrecision(OptimizerPass):
+    """
+    Propagate precision for Dense nodes. Restrict it to only cases where
+    the precision is set by a quant node, since otherwise the values get huge.
+    """
+
+    def match(self, node):
+        is_match = isinstance(node, Dense)
+        return is_match
+
+    def transform(self, model, node):
+        input_precision = node.get_input_node().get_attr("quant_precision")
+        weight_precision = node.get_attr("weight_precision")
+        if not input_precision or not weight_precision:
+            return False
+
+        bias_precision = node.get_attr("bias_precision")
+        input_variable = node.get_input_variable()
+        num_acc = input_variable.shape[-1]
+
+        accum_precision = _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc)
+
+        accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision)
+        node.set_attr('accum_t', accum_t)
+
+        if not node.get_attr("quant_precision"):
+            # output precision not set by quant node
+            node.update_output_precision(accum_precision)
+
+        return False
+
+
+def _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc):
+    '''
+    Propagate the precion type across a multiply. Rounding modes are propagated from input_precision
+    '''
+
+    # check to make sure none are None
+    bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(num_acc))
+    integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(num_acc))
+    signed = weight_precision.signed or input_precision.signed
+
+    # Because calculating precision, no need to round or sautration
+    rounding_mode = None
+    saturation_mode = None
+
+    frac = bitwidth - integer
+
+    # correct for bias
+    if bias_precision:
+        integer = (
+            max(
+                integer + (bias_precision.signed and not signed),
+                bias_precision.integer + (signed and not bias_precision.signed),
+            )
+            + 1
+        )
+        bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer)
+        signed = signed or bias_precision.signed
+
+    return FixedPrecisionType(bitwidth, integer, signed, rounding_mode, saturation_mode)
diff --git a/hls4ml/model/optimizer/passes/qkeras.py b/hls4ml/model/optimizer/passes/qkeras.py
index 2d2b6b0f77..7bed6cb1e7 100644
--- a/hls4ml/model/optimizer/passes/qkeras.py
+++ b/hls4ml/model/optimizer/passes/qkeras.py
@@ -1,7 +1,7 @@
 import numpy as np
 import tensorflow as tf
 
-from hls4ml.model.layers import ApplyAlpha, BatchNormalization
+from hls4ml.model.layers import ApplyAlpha
 from hls4ml.model.optimizer import ConfigurableOptimizerPass, OptimizerPass, register_pass
 from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, QKerasPO2Quantizer
 
@@ -81,7 +81,6 @@ def register_qkeras():
     register_pass('output_rounding_saturation_mode', OutputRoundingSaturationMode)
     register_pass('qkeras_factorize_alpha', QKerasFactorizeAlpha)
     register_pass('extract_ternary_threshold', ExtractTernaryThreshold)
-    register_pass('fuse_consecutive_batch_normalization', FuseConsecutiveBatchNormalization)
 
 
 class QKerasFactorizeAlpha(OptimizerPass):
@@ -181,38 +180,6 @@ def transform(self, model, node):
         return True
 
 
-class FuseConsecutiveBatchNormalization(OptimizerPass):
-    '''OptimizerPass to merge consecutive BatchNormalization layers.
-    These may exist in a model after QKerasFactorizeAlpha layer.
-    Scale and Bias of each layer are combined into scale and bias of a single layer.
-    '''
-
-    def match(self, node):
-        return isinstance(node, BatchNormalization) and isinstance(node.get_input_node(), BatchNormalization)
-
-    def transform(self, model, node):
-        bn0 = node.get_input_node()
-        bn1 = node
-        bn0_map = bn0.get_output_use_map()
-        bn1_map = bn1.get_output_use_map()
-        if len(bn0_map[bn0.name]) > 1 or len(bn1_map[bn1.name]) > 1:
-            return False
-
-        s0 = bn0.weights['scale'].data
-        b0 = bn0.weights['bias'].data
-        s1 = bn1.weights['scale'].data
-        b1 = bn1.weights['bias'].data
-
-        s2 = s0 * s1
-        b2 = s1 * b0 + b1
-
-        bn0.weights['scale'].data = s2
-        bn0.weights['bias'].data = b2
-
-        model.remove_node(node, rewire=True)
-        return True
-
-
 class ExtractTernaryThreshold(OptimizerPass):
     '''The input value (threshold) at which the output of a a ternary activation
     changes is configurable. This pass extracts that threshold point, inserting
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
new file mode 100644
index 0000000000..f0a5129d52
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -0,0 +1,387 @@
+'''
+This file includes optimizations related to quant nodes.
+
+As a first step, QuantConstantParameters converts the extra inputs to attributes. It is always the first step
+
+The next step differs between the case of (1) unitary scale and zero offset, or (2) nonunitary scale and/or
+nonzero offset. In the first case no scaling is required, so a Quant node effectively becomes a linear activation.
+For the common case when this is applied on a constant weight, the activation is immediately merged with the weight,
+qantizing the weights. In case 2, we need to explictly scale and unscale, so the Quant node becomes 3 nodes, an
+ApplyAlpha node to apply a scale/shift, a Linear node to apply the quantization, and another ApplyAlpha to unscale/shift.
+We depend on optimization steps to move the unscaling ApplyAlpha down as needed. Again, when the Quant is a applied to a
+Constant, the scaling and Linear nodes are immediately merged into the Constant. This is done because it simplifies some
+of the other optimizations.
+
+UPDATE:  Case 1 is loosened to also include power of 2 scalar scales, not just unitary scale, if
+    _ALSO_INCLUDE_PO2 is set to true (the default)
+
+'''
+import math  # prefer to use math.ceil for scalar values
+
+import numpy as np
+
+from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
+from hls4ml.model.layers import Activation, ApplyAlpha, Constant, Quant
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import FixedPrecisionType
+
+_ALSO_MATCH_PO2 = True
+
+_base_attributes = ('Trace', 'reuse_factor')
+
+
+class QuantConstantParameters(OptimizerPass):
+    """Remove Constant from the Qaunt node parameters (but not input[0])"""
+
+    def match(self, node):
+        is_match = isinstance(node, Quant) and (
+            (node.get_input_node(node.inputs[1]) and isinstance(node.get_input_node(node.inputs[1]), Constant))
+            or (node.get_input_node(node.inputs[2]) and isinstance(node.get_input_node(node.inputs[2]), Constant))
+            or (node.get_input_node(node.inputs[3]) and isinstance(node.get_input_node(node.inputs[3]), Constant))
+        )
+
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Remove Constant from the Qaunt node parameters (but not input[0])
+        """
+        if node.get_input_node(node.inputs[1]):
+            scale_node = node.get_input_node(node.inputs[1])
+            if isinstance(scale_node, Constant):
+                node.set_attr('scale', scale_node.value)
+                node.inputs[1] = ''
+                model.remove_node(scale_node, rewire=False)
+
+        if node.get_input_node(node.inputs[2]):
+            zeropt_node = node.get_input_node(node.inputs[2])
+            if isinstance(zeropt_node, Constant):
+                node.set_attr('zeropt', zeropt_node.value)
+                node.inputs[2] = ''
+                model.remove_node(zeropt_node, rewire=False)
+
+        if node.get_input_node(node.inputs[3]):
+            bitwidth_node = node.get_input_node(node.inputs[3])
+            if isinstance(bitwidth_node, Constant):
+                if np.squeeze(bitwidth_node.value).shape:
+                    raise RuntimeError("Only scalar bitwidth values are supporeted by the Quant node")
+                node.set_attr('bitwidth', bitwidth_node.value)
+                node.inputs[3] = ''
+                model.remove_node(bitwidth_node, rewire=False)
+
+        return True
+
+
+class QuantToActivation(OptimizerPass):
+    '''
+    This is for the case when scale is 1 and zeropt is 0. It is a a 1:1 transformation of
+    a Quant to an Activation.
+
+    As an optimization, this is not called when the input is constant.
+
+    UPDATE:  this is also called when scale is scalar and power of 2, not just 1.
+    '''
+
+    def match(self, node):
+        # only matches after the other inputs are already folded
+
+        is_match = (
+            isinstance(node, Quant)
+            and not isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and not node.get_input_node(node.inputs[1])
+            and not node.get_input_node(node.inputs[2])
+            and not node.get_input_node(node.inputs[3])
+        )
+
+        # Only match if the scale is 1s and the zero-point is 0s
+        if is_match:  # to make sure this is a quant node with inputs
+            scale = node.get_attr("scale")
+            bias = node.get_attr("zeropt")
+            is_match = is_match and (bias == np.zeros_like(bias)).all()
+
+            # check if scale is ones-like or a power of two
+            scale_unit_or_po2 = (scale == np.ones_like(scale)).all()
+            if not scale_unit_or_po2 and _ALSO_MATCH_PO2:
+                sqscale = np.squeeze(scale)
+                if not sqscale.shape:
+                    # not an array
+                    mantissa, _ = np.frexp(sqscale)
+                    scale_unit_or_po2 = mantissa == 0.5
+
+            is_match = is_match and scale_unit_or_po2
+
+        return is_match
+
+    def transform(self, model, node):
+        '''
+        Change quant node to Activation
+        '''
+        input_shape = node.get_input_variable().shape
+
+        n_in = np.prod(input_shape)
+
+        rounding_mode = node.get_attr("rounding_mode")
+        narrow = node.get_attr("narrow")
+        signed = node.get_attr("signed")
+        bitwidth = node.get_attr("bitwidth")
+        integer = bitwidth
+        scale = node.get_attr("scale")
+        if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all():
+            _, exp = np.frexp(np.squeeze(scale))
+            integer = bitwidth + exp - 1
+
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode)
+
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer, 'n_in': n_in})
+
+        new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs])
+        new_node.get_output_variable().type.precision = precision
+        model.replace_node(node, new_node)
+
+        return True
+
+
+class FuseQuantWithConstant(OptimizerPass):
+    '''
+    This is for the case when scale is 1 and zeropt is 0. It directly applies the quantization to a constant.
+    UPDATE:  this is also called when scale is scalar and power of 2, not just 1.
+    '''
+
+    def match(self, node):
+        # only matches after the other inputs are already folded
+        is_match = (
+            isinstance(node, Quant)
+            and isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and not node.get_input_node(node.inputs[1])
+            and not node.get_input_node(node.inputs[2])
+            and not node.get_input_node(node.inputs[3])
+        )
+
+        # Only match if the scale is 1s and the zero-point is 0s
+        if is_match:  # to make sure this is a quant node with inputs
+            scale = node.get_attr("scale")
+            bias = node.get_attr("zeropt")
+            is_match = is_match and (bias == np.zeros_like(bias)).all()
+
+            # check if scale is ones-like or a power of two
+            scale_unit_or_po2 = (scale == np.ones_like(scale)).all()
+            if not scale_unit_or_po2 and _ALSO_MATCH_PO2:
+                sqscale = np.squeeze(scale)
+                if not sqscale.shape:
+                    # not an array
+                    mantissa, _ = np.frexp(sqscale)
+                    scale_unit_or_po2 = mantissa == 0.5
+
+            is_match = is_match and scale_unit_or_po2
+
+        return is_match
+
+    def transform(self, model, node):
+        '''
+        Fuse Quant with Constant.
+        '''
+
+        rounding_mode = node.get_attr("rounding_mode")
+        narrow = node.get_attr("narrow")
+        signed = node.get_attr("signed")
+        bitwidth = node.get_attr("bitwidth")
+        integer = bitwidth
+        scale = node.get_attr("scale")
+        if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all():
+            _, exp = np.frexp(np.squeeze(scale))
+            integer = bitwidth + exp - 1
+
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode)
+
+        const_node = node.get_input_node(node.inputs[0])
+        const_node.set_attr("quant_precision", precision)
+        const_node.set_attr("quantizer", quantizer)
+
+        # reinitialize (which also runs quantization if quantizer exists)
+        const_node.initialize()
+
+        # remove the Quant node
+        model.remove_node(node, rewire=True)
+
+        return True
+
+
+class QuantToAlphaActivationAlpha(OptimizerPass):
+    '''
+    This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of
+    a Quant to an ApplyAlpha (to scale), Activatio, ApplyAlpho (to rescale).
+
+    As an optimization, this is not called when the input is constant.
+    '''
+
+    def match(self, node):
+        # only matches after the other inputs are already folded
+        is_match = (
+            isinstance(node, Quant)
+            and not isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and not node.get_input_node(node.inputs[1])
+            and not node.get_input_node(node.inputs[2])
+            and not node.get_input_node(node.inputs[3])
+        )
+
+        if is_match:  # to make sure this is a quant node with inputs
+            scale = node.get_attr("scale")
+            bias = node.get_attr("zeropt")
+            is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any())
+        return is_match
+
+    def transform(self, model, node):
+        '''
+        Change quant node to ApplyAlhpa, Activation, ApplyAlpha
+        '''
+
+        # Do the Activation as in the simple case
+
+        input_shape = node.get_input_variable().shape
+
+        n_in = np.prod(input_shape)
+
+        rounding_mode = node.get_attr("rounding_mode")
+        narrow = node.get_attr("narrow")
+        signed = node.get_attr("signed")
+        bitwidth = node.get_attr("bitwidth")
+
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode)
+
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer, 'n_in': n_in})
+
+        new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs])
+        new_node.get_output_variable().type.precision = precision
+        model.replace_node(node, new_node)
+
+        # but now add the ApplyAlhpas before and after
+
+        scale = node.get_attr("scale")
+        bias = node.get_attr("zeropt")
+
+        attributes_scale = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes_scale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1})
+
+        attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes_rescale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1})
+
+        firstscale = 1 / scale
+        firstbias = bias
+        attributes_scale["scale_data"] = firstscale
+        attributes_scale["bias_data"] = firstbias
+
+        scale_node = model.make_node(ApplyAlpha, node.name + '_scale', attributes_scale, [node.inputs[0]])
+        model.insert_node(scale_node)
+
+        rescale = scale
+        rebias = -bias * scale
+        attributes_rescale["scale_data"] = rescale
+        attributes_rescale["bias_data"] = rebias
+
+        rescale_node = model.make_node(ApplyAlpha, node.name + '_rescale', attributes_rescale, [new_node.outputs[0]])
+        model.insert_node(rescale_node)
+
+        return True
+
+
+class ConstQuantToConstAlpha(OptimizerPass):
+    '''
+    This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of
+    a Quant to an ApplyAlpha (to scale), Activation, ApplyAlpho (to unscale), but an input
+    consts allows for optimization, so the ApplyAlpha (to scale), Activation are
+    optimized away right away.
+    '''
+
+    def match(self, node):
+        # only matches after the other inputs are already folded
+        is_match = (
+            isinstance(node, Quant)
+            and isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and not node.get_input_node(node.inputs[1])
+            and not node.get_input_node(node.inputs[2])
+            and not node.get_input_node(node.inputs[3])
+        )
+
+        if is_match:  # to make sure this is a quant node with inputs
+            scale = node.get_attr("scale")
+            bias = node.get_attr("zeropt")
+            is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any())
+        return is_match
+
+    def transform(self, model, node):
+        '''
+        Change Constant + Quant node to Constant, ApplyAlpha
+        '''
+
+        # Do the Activation as in the simple case
+
+        input_shape = node.get_input_variable().shape
+
+        n_in = np.prod(input_shape)
+
+        rounding_mode = node.get_attr("rounding_mode")
+        narrow = node.get_attr("narrow")
+        signed = node.get_attr("signed")
+        bitwidth = node.get_attr("bitwidth")
+
+        precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode)
+
+        const_node = node.get_input_node(node.inputs[0])
+
+        scale = node.get_attr("scale")
+        bias = node.get_attr("zeropt")
+
+        # caclucate the new value
+        new_val = const_node.value / scale + bias
+        const_node.set_attr('value', new_val)
+        const_node.set_attr("quant_precision", precision)
+        const_node.set_attr("quantizer", quantizer)
+
+        # reinitialize (which also runs quantization if quantizer exists)
+        const_node.initialize()
+
+        attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes_rescale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1})
+
+        rescale = scale
+        rebias = -bias * scale
+        attributes_rescale["scale_data"] = rescale
+        attributes_rescale["bias_data"] = rebias
+
+        rescale_node = model.make_node(
+            ApplyAlpha, node.name + '_rescale', attributes_rescale, [x for x in node.inputs], [x for x in node.outputs]
+        )
+        model.replace_node(node, rescale_node)
+
+        return True
+
+
+def _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode):
+    '''
+    A function to determine the precision and quantizer
+    '''
+    if rounding_mode == "ROUND":
+        bn_round = "AP_RND_CONV"
+    elif rounding_mode == "FLOOR":
+        bn_round = "AP_TRN"
+    else:
+        raise NotImplementedError(
+            f"Rounding mode {rounding_mode} not supported in Quant node. Only ROUND and FLOOR supported."
+        )
+
+    if narrow and not signed:
+        raise NotImplementedError("Narrow mode is only supported for singed numbers.")
+
+    if narrow:
+        bn_sat = "AP_SAT_SYM"
+    else:
+        bn_sat = "AP_SAT"
+
+    bitwidth = math.ceil(bitwidth)
+    integer = math.ceil(integer)
+
+    precision = FixedPrecisionType(bitwidth, integer, signed, bn_round, bn_sat)
+    quantizer = QuantNodeQuantizer(precision)
+    return (precision, quantizer)
diff --git a/hls4ml/model/optimizer/passes/reshape_const.py b/hls4ml/model/optimizer/passes/reshape_const.py
new file mode 100644
index 0000000000..0012b2761e
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/reshape_const.py
@@ -0,0 +1,27 @@
+from hls4ml.model.layers import Constant, Reshape
+from hls4ml.model.optimizer import OptimizerPass
+
+
+class ReshapeConstant(OptimizerPass):
+    """
+    ONNX has the target shape come as an input, not a parameter. This removes
+    the Constant input from new shape input. (Non-constant inputs are not supported.)
+    The constant value was already used; this is just a cleanup uptimization.
+    """
+
+    def match(self, node):
+        is_match = isinstance(node, Reshape) and len(node.inputs) > 1 and node.get_input_node(node.inputs[1])
+
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Remove Constant from new shape input. Note, input shape node is already used on initialize
+        """
+        shape_node = node.get_input_node(node.inputs[1])
+        node.inputs[1] = ''
+        if not isinstance(shape_node, Constant):
+            raise RuntimeError("Nonconstant shape inputs are not currently supported")
+        model.remove_node(shape_node, rewire=False)
+
+        return True

From 5cea82d1bf0b6b82c3302bda6c7f482d603d8937 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 13 Jul 2023 11:49:57 -0500
Subject: [PATCH 06/59] snapshot that runs qonnx test, but gets incorrect
 results

---
 hls4ml/backends/fpga/fpga_backend.py          | 12 +++-
 hls4ml/model/layers.py                        |  5 ++
 hls4ml/model/optimizer/__init__.py            | 10 +++
 .../model/optimizer/passes/batchnorm_opt.py   |  2 +-
 hls4ml/model/optimizer/passes/merge_const.py  | 16 ++---
 ...recision.py => propagate_acc_precision.py} | 53 ++++++++++----
 .../passes/propagate_dense_precision.py       | 70 -------------------
 7 files changed, 74 insertions(+), 94 deletions(-)
 rename hls4ml/model/optimizer/passes/{propagate_conv_precision.py => propagate_acc_precision.py} (59%)
 delete mode 100644 hls4ml/model/optimizer/passes/propagate_dense_precision.py

diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py
index 97e458f7fd..27620b1949 100644
--- a/hls4ml/backends/fpga/fpga_backend.py
+++ b/hls4ml/backends/fpga/fpga_backend.py
@@ -25,6 +25,7 @@
     GlobalPooling1D,
     GlobalPooling2D,
     MatMul,
+    Merge,
     Pooling1D,
     Pooling2D,
     Quant,
@@ -76,7 +77,16 @@ def __init__(self, name):
             attrs.append(TypeAttribute('accum'))
             self.attribute_map[layer] = attrs
 
-        rf_layers = accum_layers + [BatchNormalization, Activation, Embedding, GarNet, GarNetStack, Quant, BatchNormOnnx]
+        rf_layers = accum_layers + [
+            BatchNormalization,
+            Activation,
+            Embedding,
+            GarNet,
+            GarNetStack,
+            Quant,
+            BatchNormOnnx,
+            Merge,
+        ]
 
         for layer in rf_layers:
             attrs = self.attribute_map.get(layer, [])
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index bd465ff7b9..1922dcec8c 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -249,6 +249,11 @@ def add_output_variable(
 
         self.set_attr(out_name, out)
 
+    def update_output_precision(self, precision, output_name=None):
+        if output_name is None:
+            output_name = self.outputs[0]
+        self.variables[output_name].type.precision = precision
+
     def add_weights(self, quantizer=None, compression=False):
         self.add_weights_variable(
             name='weight', var_name='w{index}', data='weight', quantizer=quantizer, compression=compression
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index db65370e40..38844992db 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -43,6 +43,16 @@
         'fuse_quant_with_constant',
         'quant_to_alpha_activation_alpha',
         'const_quant_to_const_alpha',
+        'batch_norm_onnx_constant_parameters',
+        'constant_batch_norm_fusion',
+        'merge_two_constants',
+        'scale_down_add',
+        'scale_down_mat_mul',
+        'scale_down_weight_conv',
+        'scale_down_bias_conv',
+        'scale_down_conv',
+        'merge_to_apply_alpha',
+        'merge_to_apply_alpha_div',
         'matmul_const_to_dense',
         'conv_to_conv_x_d',
         'output_rounding_saturation_mode',
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index a7b0c27209..b9c651fd8f 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -3,7 +3,7 @@
 from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant
 from hls4ml.model.optimizer import OptimizerPass
 
-_base_attributes = ('Trace', 'reuse_factor', 'n_in', 'n_filt')
+_base_attributes = ('Trace', 'reuse_factor', 'epsilon', 'n_in', 'n_filt')
 
 
 class BatchNormOnnxConstantParameters(OptimizerPass):
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index 4e339ccc3f..da70eb55f3 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
-from hls4ml.model.layers import BatchNormalization, Constant, Merge
+from hls4ml.model.layers import ApplyAlpha, Constant, Merge
 from hls4ml.model.optimizer import OptimizerPass
 
 _base_attributes = ('Trace', 'reuse_factor', 'n_in')
@@ -69,8 +69,8 @@ def transform(self, model, node):
         return True
 
 
-class MergeToBatchNormalization(OptimizerPass):
-    """Convert Add, Sub, Mul, or Div Merges with consant to BatchNormalization"""
+class MergeToApplyAlpha(OptimizerPass):
+    """Convert Add, Sub, Mul, or Div Merges with consant to ApplyAlpha"""
 
     def match(self, node):
         is_match = (
@@ -147,7 +147,7 @@ def transform(self, model, node):
         )
 
         bn_layer = model.make_node(
-            BatchNormalization, f"bn_{node.name}", attributes, [node.inputs[input_node_idx]], [x for x in node.outputs]
+            ApplyAlpha, f"bn_{node.name}", attributes, [node.inputs[input_node_idx]], [x for x in node.outputs]
         )
 
         model.remove_node(const_node, rewire=False)
@@ -156,9 +156,9 @@ def transform(self, model, node):
         return True
 
 
-class MergeToBatchNormalizationDiv(OptimizerPass):
+class MergeToApplyAlphaDiv(OptimizerPass):
     """
-    Convert Div Merges with consant to BatchNormalization
+    Convert Div Merges with consant to ApplyAlpha
 
     TODO:  propagate precision
     """
@@ -182,9 +182,7 @@ def transform(self, model, node):
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update({"scale_data": scale, "bias_data": bias, "n_in": n_in, "n_out": n_in, "n_filt": -1})
 
-        bn_layer = model.make_node(
-            "BatchNormalization", f"bn_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs]
-        )
+        bn_layer = model.make_node(ApplyAlpha, f"bn_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs])
 
         model.remove_node(const_node, rewire=False)
         model.replace_node(node, bn_layer)
diff --git a/hls4ml/model/optimizer/passes/propagate_conv_precision.py b/hls4ml/model/optimizer/passes/propagate_acc_precision.py
similarity index 59%
rename from hls4ml/model/optimizer/passes/propagate_conv_precision.py
rename to hls4ml/model/optimizer/passes/propagate_acc_precision.py
index 17e357df88..6c1facc23b 100644
--- a/hls4ml/model/optimizer/passes/propagate_conv_precision.py
+++ b/hls4ml/model/optimizer/passes/propagate_acc_precision.py
@@ -2,11 +2,43 @@
 
 import numpy as np
 
-from hls4ml.model.layers import Conv1D, Conv2D
+from hls4ml.model.layers import Conv1D, Conv2D, Dense
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.types import FixedPrecisionType, NamedType
 
 
+class PropagateDensePrecision(OptimizerPass):
+    """
+    Propagate precision for Dense nodes. Restrict it to only cases where
+    the precision is set by a quant node, since otherwise the values get huge.
+    """
+
+    def match(self, node):
+        is_match = isinstance(node, Dense)
+        return is_match
+
+    def transform(self, model, node):
+        input_precision = node.get_input_node().get_attr("quant_precision")
+        weight_precision = node.get_attr("weight_precision")
+        if not input_precision or not weight_precision:
+            return False
+
+        bias_precision = node.get_attr("bias_precision")
+        input_variable = node.get_input_variable()
+        num_acc = input_variable.shape[-1]
+
+        accum_precision = _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc)
+
+        accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision)
+        node.set_attr('accum_t', accum_t)
+
+        if not node.get_attr("quant_precision"):
+            # output precision not set by quant node
+            node.update_output_precision(accum_precision)
+
+        return False
+
+
 class PropagateConvPrecision(OptimizerPass):
     """Propagate precision for conv nodes. Restrict it to only cases where
     the precision is set by a quant node, since otherwise the values get huge.
@@ -27,14 +59,9 @@ def transform(self, model, node):
         filt_width = node.get_attr('filt_width')
         filt_height = node.get_attr('filt_height', 1)
 
-        accum_precision = _propagate_type_conv(
-            input_precision,
-            weight_precision,
-            bias_precision,
-            num_feature_maps=num_feature_maps,
-            filt_width=filt_width,
-            filt_height=filt_height,
-        )
+        num_acc = filt_width * filt_height * num_feature_maps
+
+        accum_precision = _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc)
 
         accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision)
         node.set_attr('accum_t', accum_t)
@@ -46,14 +73,14 @@ def transform(self, model, node):
         return False
 
 
-def _propagate_type_conv(input_precision, weight_precision, bias_precision, num_feature_maps, filt_width, filt_height):
+def _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc):
     '''
     Propagate the precion type across a multiply. Rounding modes are propagated from input_precision
     '''
 
-    Nacc = filt_width * filt_height * num_feature_maps
-    bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(Nacc))
-    integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(Nacc))
+    # check to make sure none are None
+    bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(num_acc))
+    integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(num_acc))
     signed = weight_precision.signed or input_precision.signed
 
     # Because calculating precision, no need to round or sautration
diff --git a/hls4ml/model/optimizer/passes/propagate_dense_precision.py b/hls4ml/model/optimizer/passes/propagate_dense_precision.py
deleted file mode 100644
index cc50bb7553..0000000000
--- a/hls4ml/model/optimizer/passes/propagate_dense_precision.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import math  # prefer to use math.ceil for scalar values (returns int)
-
-import numpy as np
-
-from hls4ml.model.layers import Dense
-from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import FixedPrecisionType, NamedType
-
-
-class PropagateDensePrecision(OptimizerPass):
-    """
-    Propagate precision for Dense nodes. Restrict it to only cases where
-    the precision is set by a quant node, since otherwise the values get huge.
-    """
-
-    def match(self, node):
-        is_match = isinstance(node, Dense)
-        return is_match
-
-    def transform(self, model, node):
-        input_precision = node.get_input_node().get_attr("quant_precision")
-        weight_precision = node.get_attr("weight_precision")
-        if not input_precision or not weight_precision:
-            return False
-
-        bias_precision = node.get_attr("bias_precision")
-        input_variable = node.get_input_variable()
-        num_acc = input_variable.shape[-1]
-
-        accum_precision = _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc)
-
-        accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision)
-        node.set_attr('accum_t', accum_t)
-
-        if not node.get_attr("quant_precision"):
-            # output precision not set by quant node
-            node.update_output_precision(accum_precision)
-
-        return False
-
-
-def _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc):
-    '''
-    Propagate the precion type across a multiply. Rounding modes are propagated from input_precision
-    '''
-
-    # check to make sure none are None
-    bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(num_acc))
-    integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(num_acc))
-    signed = weight_precision.signed or input_precision.signed
-
-    # Because calculating precision, no need to round or sautration
-    rounding_mode = None
-    saturation_mode = None
-
-    frac = bitwidth - integer
-
-    # correct for bias
-    if bias_precision:
-        integer = (
-            max(
-                integer + (bias_precision.signed and not signed),
-                bias_precision.integer + (signed and not bias_precision.signed),
-            )
-            + 1
-        )
-        bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer)
-        signed = signed or bias_precision.signed
-
-    return FixedPrecisionType(bitwidth, integer, signed, rounding_mode, saturation_mode)

From d5394d4e59046daa6069ca22c8e3aec9ad934db7 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 13 Jul 2023 11:55:33 -0500
Subject: [PATCH 07/59] add quant node quantizer

---
 hls4ml/converters/onnx/quantizer.py | 97 +++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 hls4ml/converters/onnx/quantizer.py

diff --git a/hls4ml/converters/onnx/quantizer.py b/hls4ml/converters/onnx/quantizer.py
new file mode 100644
index 0000000000..7f69652c04
--- /dev/null
+++ b/hls4ml/converters/onnx/quantizer.py
@@ -0,0 +1,97 @@
+"""
+Quantizer for the Quant node, after scale and zeropoint hafe been extracted
+(unless scale is a power of 2, if doing special case po2)
+
+This is based on the sample implementation in finn-base
+"""
+
+import numpy as np
+
+from hls4ml.model.types import Quantizer, RoundingMode, SaturationMode
+
+
+class QuantNodeQuantizer(Quantizer):
+    """This implements a quantizer for a FixedPrecisionType with width==integer"""
+
+    def __init__(self, precision):
+        super().__init__(precision.width, precision)
+
+    def __call__(self, data):
+        """Apply the quantization on the data"""
+
+        scale = 2 ** (self.hls_type.width - self.hls_type.integer)
+
+        data = data * scale  # (not using *= to avoid modifying data)
+        # Clamping
+        min_int_val = self._min_int(self.hls_type.signed, self.hls_type.saturation_mode, self.bits)
+        max_int_val = self._max_int(self.hls_type.signed, self.bits)
+        data = np.where(data > max_int_val, max_int_val, data)
+        data = np.where(data < min_int_val, min_int_val, data)
+        # Rounding
+        rounding_fx = self._resolve_rounding_mode(self.hls_type.rounding_mode)
+        return rounding_fx(data) / scale
+
+    @staticmethod
+    def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int:
+        """Compute the minimum integer representable by a given number of bits.
+        Args:
+            signed (bool): Indicates whether the represented integer is signed or not.
+            saturation_mode (bool): Indicates the saturation mode used (AP_SAT_SYM or AP_SAT)
+            bit_width (int): Number of bits available for the representation.
+        Returns:
+            int: Maximum unsigned integer that can be represented according to
+            the input arguments.
+        Examples:
+            >>> min_int(signed=True, saturation_mode='AP_SAT_SYM', bit_width=8)
+            int(-127)
+            >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8)
+            int(0)
+            >>> min_int(signed=True, saturation_mode='AP_SAT', bit_width=8)
+            int(-128)
+            >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8)
+            int(0)
+        """
+        if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT):
+            raise ValueError(f"Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported")
+        if signed and saturation_mode == SaturationMode.SAT_SYM:
+            value = -(2 ** (bit_width - 1)) + 1
+        elif signed:
+            value = -(2 ** (bit_width - 1))
+        else:
+            value = 0
+        return value
+
+    @staticmethod
+    def _max_int(signed: bool, bit_width: int) -> int:
+        """Compute the maximum integer representable by a given number of bits.
+        (Note, narrow and unsigned is not supported by the implementation, so saturation mode is not used)
+        Args:
+            signed (bool): Indicates whether the represented integer is signed or not.
+            bit_width (int): Number of bits available for the representation.
+        Returns:
+            Tensor: Maximum integer that can be represented according to
+            the input arguments.
+        Examples:
+            >>> max_int(signed=True, bit_width=8)
+            int(127)
+            >>> max_int(signed=False, bit_width=8)
+            int(255)
+        """
+        if not signed:
+            value = (2**bit_width) - 1
+        else:
+            value = (2 ** (bit_width - 1)) - 1
+        return value
+
+    @staticmethod
+    def _resolve_rounding_mode(mode):
+        """Resolve the rounding mode  of Quant and Trunc ops
+        to the corresponding numpy functions."""
+        if mode == RoundingMode.RND_CONV:
+            return np.round
+        # elif mode_string == "CEIL":   # not supported
+        #     return np.ceil
+        elif mode == RoundingMode.TRN:
+            return np.floor
+        else:
+            raise ValueError(f"Rounding mode {mode} not supported.")

From 9817ed36f034f73030a6a5820a451f4199812641 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 13 Jul 2023 15:14:53 -0500
Subject: [PATCH 08/59] fix broadcasting when going from Merge to ApplyAlpha

---
 hls4ml/model/optimizer/passes/merge_const.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index da70eb55f3..f38bfd841d 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -131,6 +131,12 @@ def transform(self, model, node):
             scale_precision = const_node.get_attr("quant_precision")
             scale_quantizer = const_node.get_attr("quantizer")
 
+        # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias
+        if scale.shape != tuple(input_shape) and np.squeeze(scale).shape != tuple(input_shape):
+            scale = np.broadcast_to(scale, input_shape)
+        if bias.shape != tuple(input_shape) and np.squeeze(bias).shape != tuple(input_shape):
+            bias = np.broadcast_to(bias, input_shape)
+
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update(
             {
@@ -179,6 +185,12 @@ def transform(self, model, node):
         scale = 1 / const_node.value
         bias = np.array(0)
 
+        # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias
+        if scale.shape != tuple(input_shape) and np.squeeze(scale).shape != tuple(input_shape):
+            scale = np.broadcast_to(scale, input_shape)
+        if bias.shape != tuple(input_shape) and np.squeeze(bias).shape != tuple(input_shape):
+            bias = np.broadcast_to(bias, input_shape)
+
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update({"scale_data": scale, "bias_data": bias, "n_in": n_in, "n_out": n_in, "n_filt": -1})
 

From e494f435b55f396e2bf8d3c8c1350f5fa753fbb3 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 13 Jul 2023 15:49:56 -0500
Subject: [PATCH 09/59] update linear merging

---
 hls4ml/model/optimizer/__init__.py      |  1 +
 hls4ml/model/optimizer/passes/linear.py | 42 +++++++++++++++++++++++++
 hls4ml/model/optimizer/passes/nop.py    | 14 ---------
 3 files changed, 43 insertions(+), 14 deletions(-)
 create mode 100644 hls4ml/model/optimizer/passes/linear.py
 delete mode 100644 hls4ml/model/optimizer/passes/nop.py

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 38844992db..e41973b4e2 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -70,6 +70,7 @@
         'fuse_batch_normalization',
         'replace_multidimensional_dense_with_conv',
         'set_precision_concat',
+        'merge_linear_activation',
     ],
     requires=['convert'],
 )
diff --git a/hls4ml/model/optimizer/passes/linear.py b/hls4ml/model/optimizer/passes/linear.py
new file mode 100644
index 0000000000..72d6dade9f
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/linear.py
@@ -0,0 +1,42 @@
+from hls4ml.model.layers import Activation, BatchNormalization, Conv1D, Conv2D, Dense
+from hls4ml.model.optimizer import OptimizerPass
+
+
+class EliminateLinearActivation(OptimizerPass):
+    def match(self, node):
+        cast = False
+        if isinstance(node, Activation):
+            cast = node.get_input_variable().type.precision != node.get_output_variable().type.precision
+        return isinstance(node, Activation) and node.get_attr('activation') == 'linear' and not cast
+
+    def transform(self, model, node):
+        model.remove_node(node)
+        return True
+
+
+# TODO:  Move migrate this to auto precisoin check from quant precision check
+class MergeLinearActivation(OptimizerPass):
+    '''
+    For many objects it's safe to change the output precision independently of the calculation.
+    '''
+
+    def match(self, node):
+        '''
+        Only match if the parent is safe and the precision is not explicitly set.
+        '''
+        if isinstance(node, Activation) and node.get_attr('activation') == 'linear':
+            parent = node.get_input_node(node.inputs[0])
+            safe_parent = isinstance(parent, (Dense, Conv1D, Conv2D, BatchNormalization))
+            parent_type_fixed = parent.get_attr("quant_precision")
+            return safe_parent and not parent_type_fixed
+        else:
+            return False
+
+    def transform(self, model, node):
+        prev_node = node.get_input_node(node.inputs[0])
+        quant_precision = node.get_attr("quant_precision")
+        prev_node.set_attr("quant_precision", quant_precision)
+        prev_node.set_attr("quantizer", node.get_attr("quantizer"))
+        prev_node.update_output_precision(quant_precision)
+        model.remove_node(node)
+        return True
diff --git a/hls4ml/model/optimizer/passes/nop.py b/hls4ml/model/optimizer/passes/nop.py
deleted file mode 100644
index 55fcf16e93..0000000000
--- a/hls4ml/model/optimizer/passes/nop.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from hls4ml.model.layers import Activation
-from hls4ml.model.optimizer import OptimizerPass
-
-
-class EliminateLinearActivation(OptimizerPass):
-    def match(self, node):
-        cast = False
-        if isinstance(node, Activation):
-            cast = node.get_input_variable().type.precision != node.get_output_variable().type.precision
-        return isinstance(node, Activation) and node.get_attr('activation') == 'linear' and not cast
-
-    def transform(self, model, node):
-        model.remove_node(node)
-        return True

From ffddb5e898a7689cf73cdaf50ca118c4104f3c35 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 13 Jul 2023 16:25:03 -0500
Subject: [PATCH 10/59] update automatic setting of accumulators (QONNX-only
 for now)

---
 hls4ml/model/optimizer/__init__.py                       | 2 ++
 hls4ml/model/optimizer/passes/propagate_acc_precision.py | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index e41973b4e2..6af9698a51 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -69,6 +69,8 @@
         'fuse_consecutive_batch_normalization',
         'fuse_batch_normalization',
         'replace_multidimensional_dense_with_conv',
+        'propagate_dense_precision',
+        'propagate_conv_precision',
         'set_precision_concat',
         'merge_linear_activation',
     ],
diff --git a/hls4ml/model/optimizer/passes/propagate_acc_precision.py b/hls4ml/model/optimizer/passes/propagate_acc_precision.py
index 6c1facc23b..375979de4e 100644
--- a/hls4ml/model/optimizer/passes/propagate_acc_precision.py
+++ b/hls4ml/model/optimizer/passes/propagate_acc_precision.py
@@ -6,6 +6,8 @@
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.types import FixedPrecisionType, NamedType
 
+# TODO: Update these to use the new auto precision, not depdening only on QONNX values
+
 
 class PropagateDensePrecision(OptimizerPass):
     """

From 57c89fb7da6cebdd8d8fe4e72ea6a31ea0c1a16a Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 13 Jul 2023 17:39:47 -0500
Subject: [PATCH 11/59] update qonnx tests

---
 test/pytest/test_qonnx.py | 144 ++++++++++++++------------------------
 1 file changed, 54 insertions(+), 90 deletions(-)
 mode change 100755 => 100644 test/pytest/test_qonnx.py

diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
old mode 100755
new mode 100644
index be567d81f9..535bffb0da
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 import os
 import urllib
 from pathlib import Path
@@ -17,8 +16,11 @@
 test_root_path = Path(__file__).parent
 
 
-def test_tfc_2w2a():
-    # download test model
+@pytest.fixture(scope='module')
+def tfc_2w2a_model():
+    '''
+    Load the tiny fully-connected model
+    '''
     dl_dir = test_root_path
     dl_file = str(dl_dir / "qonnx-tfc-2w2a.onnx")
     tfc_w2a2_qonnx_url = (
@@ -32,50 +34,60 @@ def test_tfc_2w2a():
     # cleanup
     qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
     model = ModelWrapper(out_file)
+    return model
 
-    # Execute QONNX model inference
-    # TODO make the test bigger
-    ishape = (1, 1, 28, 28)
-    np.random.seed(0)
-    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
-    idict = {model.graph.input[0].name: X}
-    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
-    # Convert QONNX model, compile, and run inference
-    config = hls4ml.utils.config_from_onnx_model(model)
-    # Some hand-derived config
-    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
-    config['LayerName'] = {}
-    config['LayerName']['global_in'] = {'Precision': 'ap_fixed<16,2>'}
-    hls_model = hls4ml.converters.convert_from_onnx_model(
-        model, output_dir=str(test_root_path / 'hls4mlprj_qonnx_tfc-2w2a'), part='xcu250-figd2104-2L-e', hls_config=config
+@pytest.fixture(scope='module')
+def cnv_2w2a_model():
+    '''
+    Load the small convolution model
+    '''
+    dl_dir = test_root_path
+    dl_file = str(dl_dir / "qonnx-cnv-2w2a.onnx")
+    cnv_w2a2_qonnx_url = (
+        "https://raw.githubusercontent.com/fastmachinelearning/"
+        "QONNX_model_zoo/main/models/CIFAR10/Brevitas_FINN_CNV/CNV_2W2A.onnx"
     )
-    hls_model.compile()
-    y_hls4ml = hls_model.predict(X)
+    urllib.request.urlretrieve(cnv_w2a2_qonnx_url, dl_file)
+    assert os.path.isfile(dl_file)
+    out_clean = str(dl_dir / "qonnx-cnv-2w2a-clean.onnx")
+    out_chanlast = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last.onnx")
+    out_file = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last-clean.onnx")
 
-    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
+    # cleanup
+    qonnx.util.cleanup.cleanup(dl_file, out_file=out_clean)
+    qonnx.util.to_channels_last.to_channels_last(out_clean, make_input_channels_last=True, out_file=out_chanlast)
+    qonnx.util.cleanup.cleanup(out_chanlast, out_file=out_file)
+    model = ModelWrapper(out_file)
+    return model
 
 
-def test_tfc_2w2a_quartus():
-    # download test model
+@pytest.fixture(scope='module')
+def jettagging_model():
+    '''
+    Load the 3 hidden layer QKeras example model trained on the jet tagging dataset
+    '''
     dl_dir = test_root_path
-    dl_file = str(dl_dir / "qonnx-tfc-2w2a.onnx")
-    tfc_w2a2_qonnx_url = (
+    dl_file = str(dl_dir / "qkeras_jettagging.onnx")
+    jet_tagging_qonnx_url = (
         "https://raw.githubusercontent.com/fastmachinelearning/"
-        "QONNX_model_zoo/main/models/MNIST/Brevitas_FINN_TFC/TFC/TFC_2W2A.onnx"
+        "QONNX_model_zoo/main/models/JetTagging/QKeras_hls4ml_3layer/qkeras_jettagging.onnx"
     )
-    urllib.request.urlretrieve(tfc_w2a2_qonnx_url, dl_file)
+    urllib.request.urlretrieve(jet_tagging_qonnx_url, dl_file)
     assert os.path.isfile(dl_file)
-    out_file = str(dl_dir / "qonnx-tfc-2w2a-clean.onnx")
+    out_file = str(dl_dir / "qkeras_jettagging-clean.onnx")
 
     # cleanup
     qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
     model = ModelWrapper(out_file)
+    return model
+
+
+@pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus'])
+def test_tfc_2w2a(tfc_2w2a_model, backend):
+    model = tfc_2w2a_model
 
-    # Execute QONNX model inference
-    # TODO make the test bigger
     ishape = (1, 1, 28, 28)
-    np.random.seed(0)
     X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
     idict = {model.graph.input[0].name: X}
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
@@ -83,15 +95,10 @@ def test_tfc_2w2a_quartus():
     # Convert QONNX model, compile, and run inference
     config = hls4ml.utils.config_from_onnx_model(model)
     # Some hand-derived config
-    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
     config['LayerName'] = {}
-    config['LayerName']['global_in'] = {'Precision': 'ac_fixed<16,2>'}
+    config['LayerName']['global_in'] = {'Precision': 'ap_fixed<16,2>'}
     hls_model = hls4ml.converters.convert_from_onnx_model(
-        model,
-        output_dir=str(test_root_path / 'hls4mlprj_qonnx_tfc-2w2a-quartus'),
-        part='Arria10',
-        backend='Quartus',
-        hls_config=config,
+        model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_tfc-2w2a_{backend}'), backend=backend, hls_config=config
     )
     hls_model.compile()
     y_hls4ml = hls_model.predict(X)
@@ -99,45 +106,22 @@ def test_tfc_2w2a_quartus():
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
 
-def test_cnv_2w2a():
-    # download test model
-    dl_dir = test_root_path
-    dl_file = str(dl_dir / "qonnx-cnv-2w2a.onnx")
-    cnv_w2a2_qonnx_url = (
-        "https://raw.githubusercontent.com/fastmachinelearning/"
-        "QONNX_model_zoo/main/models/CIFAR10/Brevitas_FINN_CNV/CNV_2W2A.onnx"
-    )
-    urllib.request.urlretrieve(cnv_w2a2_qonnx_url, dl_file)
-    assert os.path.isfile(dl_file)
-    out_clean = str(dl_dir / "qonnx-cnv-2w2a-clean.onnx")
-    out_chanlast = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last.onnx")
-    out_file = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last-clean.onnx")
-
-    # cleanup
-    qonnx.util.cleanup.cleanup(dl_file, out_file=out_clean)
-    qonnx.util.to_channels_last.to_channels_last(out_clean, make_input_channels_last=True, out_file=out_chanlast)
-    qonnx.util.cleanup.cleanup(out_chanlast, out_file=out_file)
-    model = ModelWrapper(out_file)
+@pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus'])
+def test_cnv_2w2a(cnv_2w2a_model, backend):
+    model = cnv_2w2a_model
 
-    # Execute QONNX model inference
-    # TODO make the test bigger
     ishape = (1, 32, 32, 3)
-    np.random.seed(1)
     X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
     idict = {model.graph.input[0].name: X}
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
     # Convert QONNX model, compile, and run inference
-    config = hls4ml.utils.config_from_onnx_model(model)
-    config['Model']['Precision'] = 'ap_fixed<32,16>'
-    # Some hand-derived config
-    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
-
+    config = hls4ml.utils.config_from_onnx_model(model, default_precision='fixed<32,16>')
     hls_model = hls4ml.converters.convert_from_onnx_model(
         model,
-        output_dir=str(test_root_path / 'hls4mlprj_qonnx_cnv-2w2a'),
-        part='xcu250-figd2104-2L-e',
+        output_dir=str(test_root_path / f'hls4mlprj_qonnx_cnv-2w2a_{backend}'),
         io_type='io_stream',
+        backend=backend,
         hls_config=config,
     )
     hls_model.compile()
@@ -146,35 +130,19 @@ def test_cnv_2w2a():
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
 
-@pytest.mark.parametrize('backend', ['Vivado', 'Quartus'])
-def test_jet_tagging(backend):
-    # download test model
-    dl_dir = test_root_path
-    dl_file = dl_dir / "qkeras_jettagging.onnx"
-    jet_tagging_qonnx_url = (
-        "https://raw.githubusercontent.com/fastmachinelearning/"
-        "QONNX_model_zoo/main/models/JetTagging/QKeras_hls4ml_3layer/qkeras_jettagging.onnx"
-    )
-    urllib.request.urlretrieve(jet_tagging_qonnx_url, dl_file)
-    assert os.path.isfile(dl_file)
-    out_file = dl_dir / "qkeras_jettagging-clean.onnx"
-
-    # cleanup
-    qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
-    model = ModelWrapper(out_file)
+@pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus'])
+def test_jet_tagging(jettagging_model, backend):
+    model = jettagging_model
 
     # Execute QONNX model inference
     # TODO make the test bigger
     ishape = (1, 16)
-    np.random.seed(0)
     X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
     idict = {model.graph.input[0].name: X}
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
     # Convert QONNX model, compile, and run inference
     config = hls4ml.utils.config_from_onnx_model(model)
-    # Some hand-derived config
-    # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation
 
     hls_model = hls4ml.converters.convert_from_onnx_model(
         model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_jettag_{backend}'), backend=backend, hls_config=config
@@ -183,7 +151,3 @@ def test_jet_tagging(backend):
     y_hls4ml = hls_model.predict(X)
 
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
-
-
-if __name__ == '__main__':
-    test_tfc_2w2a()

From 233905a0dac338e720a114ec2671aca1a2cd64f4 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 18 Jul 2023 11:32:37 -0500
Subject: [PATCH 12/59] remove batch dimension from flatten in Keras

---
 hls4ml/converters/keras/reshape.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hls4ml/converters/keras/reshape.py b/hls4ml/converters/keras/reshape.py
index bd9d519a2a..1f6dc2a759 100644
--- a/hls4ml/converters/keras/reshape.py
+++ b/hls4ml/converters/keras/reshape.py
@@ -11,8 +11,8 @@ def parse_flatten_layer(keras_layer, input_names, input_shapes, data_reader):
     layer = parse_default_keras_layer(keras_layer, input_names)
 
     layer['class_name'] = 'Reshape'
-    layer['target_shape'] = [input_shapes[0][0], np.prod(input_shapes[0][1:])]
-    output_shape = layer['target_shape']
+    layer['target_shape'] = [np.prod(input_shapes[0][1:])]  # target shape has no batch dimension
+    output_shape = input_shapes[0][:1] + layer['target_shape']
 
     return layer, output_shape
 

From 6f119551c9586ada7cdb6e9c64c5956b1198023c Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 3 Aug 2023 17:15:26 -0500
Subject: [PATCH 13/59] fix optimizer that fuses consecutive batch norms

---
 hls4ml/model/optimizer/passes/batchnorm_opt.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index b9c651fd8f..a74047676d 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -162,8 +162,8 @@ def transform(self, model, node):
         bias_new = s1 * b0 + b1
 
         # call function so that quantizer would be called if needed
-        node.add_weights(scale_new, quantizer=s_quantizer)
-        node.add_bias(bias_new, quantizer=b_quantizer)
+        node.add_weights_variable(name='scale', var_name='s{index}', data=scale_new)
+        node.add_weights_variable(name='bias', var_name='b{index}', data=bias_new)
 
         model.remove_node(prev_node, rewire=True)
         return True

From 76be67b5779b38486a094b465898e087fa9e3339 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 2 Feb 2024 18:52:57 -0600
Subject: [PATCH 14/59] snapshot of work

---
 hls4ml/converters/keras/core.py               |   3 +-
 hls4ml/converters/keras/graph.py              |   2 +-
 hls4ml/converters/keras/qkeras.py             |   3 +-
 hls4ml/converters/onnx/quantizer.py           |  97 -------
 hls4ml/model/layers.py                        |   7 +-
 hls4ml/model/optimizer/passes/merge_const.py  |   2 +-
 .../passes/propagate_acc_precision.py         | 106 -------
 hls4ml/model/optimizer/passes/qkeras.py       |   3 +-
 hls4ml/model/optimizer/passes/quant_opt.py    | 229 +++++++--------
 hls4ml/model/quantizers.py                    | 261 ++++++++++++++++++
 hls4ml/model/types.py                         | 156 -----------
 test/pytest/test_qonnx.py                     |   6 +-
 12 files changed, 378 insertions(+), 497 deletions(-)
 delete mode 100644 hls4ml/converters/onnx/quantizer.py
 delete mode 100644 hls4ml/model/optimizer/passes/propagate_acc_precision.py
 create mode 100644 hls4ml/model/quantizers.py

diff --git a/hls4ml/converters/keras/core.py b/hls4ml/converters/keras/core.py
index f6119c016d..ca7d0b3541 100644
--- a/hls4ml/converters/keras/core.py
+++ b/hls4ml/converters/keras/core.py
@@ -1,5 +1,6 @@
 from hls4ml.converters.keras_to_hls import get_weights_data, keras_handler, parse_default_keras_layer
-from hls4ml.model.types import BinaryQuantizer, IntegerPrecisionType, TernaryQuantizer
+from hls4ml.model.quantizers import BinaryQuantizer, TernaryQuantizer
+from hls4ml.model.types import IntegerPrecisionType
 
 
 @keras_handler('InputLayer')
diff --git a/hls4ml/converters/keras/graph.py b/hls4ml/converters/keras/graph.py
index 5c5c2247c0..954bf20b8f 100644
--- a/hls4ml/converters/keras/graph.py
+++ b/hls4ml/converters/keras/graph.py
@@ -1,5 +1,5 @@
-from hls4ml.converters.keras.core import TernaryQuantizer
 from hls4ml.converters.keras_to_hls import get_weights_data, keras_handler, parse_default_keras_layer
+from hls4ml.model.quantizers import TernaryQuantizer
 
 
 @keras_handler('GarNet', 'GarNetStack')
diff --git a/hls4ml/converters/keras/qkeras.py b/hls4ml/converters/keras/qkeras.py
index ba1401cce0..055ed3a8f4 100644
--- a/hls4ml/converters/keras/qkeras.py
+++ b/hls4ml/converters/keras/qkeras.py
@@ -3,7 +3,8 @@
 from hls4ml.converters.keras.convolution import parse_conv1d_layer, parse_conv2d_layer
 from hls4ml.converters.keras.core import parse_batchnorm_layer, parse_dense_layer
 from hls4ml.converters.keras_to_hls import keras_handler, parse_default_keras_layer
-from hls4ml.model.types import FixedPrecisionType, QKerasBinaryQuantizer, QKerasPO2Quantizer, QKerasQuantizer
+from hls4ml.model.quantizers import QKerasBinaryQuantizer, QKerasPO2Quantizer, QKerasQuantizer
+from hls4ml.model.types import FixedPrecisionType
 
 
 def get_quantizer_from_config(keras_layer, quantizer_var):
diff --git a/hls4ml/converters/onnx/quantizer.py b/hls4ml/converters/onnx/quantizer.py
deleted file mode 100644
index 7f69652c04..0000000000
--- a/hls4ml/converters/onnx/quantizer.py
+++ /dev/null
@@ -1,97 +0,0 @@
-"""
-Quantizer for the Quant node, after scale and zeropoint hafe been extracted
-(unless scale is a power of 2, if doing special case po2)
-
-This is based on the sample implementation in finn-base
-"""
-
-import numpy as np
-
-from hls4ml.model.types import Quantizer, RoundingMode, SaturationMode
-
-
-class QuantNodeQuantizer(Quantizer):
-    """This implements a quantizer for a FixedPrecisionType with width==integer"""
-
-    def __init__(self, precision):
-        super().__init__(precision.width, precision)
-
-    def __call__(self, data):
-        """Apply the quantization on the data"""
-
-        scale = 2 ** (self.hls_type.width - self.hls_type.integer)
-
-        data = data * scale  # (not using *= to avoid modifying data)
-        # Clamping
-        min_int_val = self._min_int(self.hls_type.signed, self.hls_type.saturation_mode, self.bits)
-        max_int_val = self._max_int(self.hls_type.signed, self.bits)
-        data = np.where(data > max_int_val, max_int_val, data)
-        data = np.where(data < min_int_val, min_int_val, data)
-        # Rounding
-        rounding_fx = self._resolve_rounding_mode(self.hls_type.rounding_mode)
-        return rounding_fx(data) / scale
-
-    @staticmethod
-    def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int:
-        """Compute the minimum integer representable by a given number of bits.
-        Args:
-            signed (bool): Indicates whether the represented integer is signed or not.
-            saturation_mode (bool): Indicates the saturation mode used (AP_SAT_SYM or AP_SAT)
-            bit_width (int): Number of bits available for the representation.
-        Returns:
-            int: Maximum unsigned integer that can be represented according to
-            the input arguments.
-        Examples:
-            >>> min_int(signed=True, saturation_mode='AP_SAT_SYM', bit_width=8)
-            int(-127)
-            >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8)
-            int(0)
-            >>> min_int(signed=True, saturation_mode='AP_SAT', bit_width=8)
-            int(-128)
-            >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8)
-            int(0)
-        """
-        if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT):
-            raise ValueError(f"Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported")
-        if signed and saturation_mode == SaturationMode.SAT_SYM:
-            value = -(2 ** (bit_width - 1)) + 1
-        elif signed:
-            value = -(2 ** (bit_width - 1))
-        else:
-            value = 0
-        return value
-
-    @staticmethod
-    def _max_int(signed: bool, bit_width: int) -> int:
-        """Compute the maximum integer representable by a given number of bits.
-        (Note, narrow and unsigned is not supported by the implementation, so saturation mode is not used)
-        Args:
-            signed (bool): Indicates whether the represented integer is signed or not.
-            bit_width (int): Number of bits available for the representation.
-        Returns:
-            Tensor: Maximum integer that can be represented according to
-            the input arguments.
-        Examples:
-            >>> max_int(signed=True, bit_width=8)
-            int(127)
-            >>> max_int(signed=False, bit_width=8)
-            int(255)
-        """
-        if not signed:
-            value = (2**bit_width) - 1
-        else:
-            value = (2 ** (bit_width - 1)) - 1
-        return value
-
-    @staticmethod
-    def _resolve_rounding_mode(mode):
-        """Resolve the rounding mode  of Quant and Trunc ops
-        to the corresponding numpy functions."""
-        if mode == RoundingMode.RND_CONV:
-            return np.round
-        # elif mode_string == "CEIL":   # not supported
-        #     return np.ceil
-        elif mode == RoundingMode.TRN:
-            return np.floor
-        else:
-            raise ValueError(f"Rounding mode {mode} not supported.")
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 0df69b753e..7da730b60a 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -349,17 +349,17 @@ def initialize(self):
 
 
 class Constant(Layer):
+    # one could consider making this a weight attribute, but given it's transient nature, I am not sure it helps
     _expected_attributes = [
         Attribute('value', value_type=np.ndarray),
     ]
 
     def initialize(self):
         value = self.attributes['value']
-        self.value = value  # note, this is unquantized; Only here for easier access
         shape = list(value.shape)
         if not shape:
             shape = (1,)
-            self.value = np.array([self.value])
+            self.set_attr('value', np.array([value]))
         dims = [f'{self.name}_{i}' for i in range(len(shape))]
         self.add_output_variable(shape, dims, var_name=self.name, precision=self.get_attr("precision"))
 
@@ -455,7 +455,6 @@ class Conv(Layer):
     """
 
     def initialize(self):
-        # use negative indexing because it is not clear if batch dimension is always stripped
         if self.attributes['n_dim'] == 1:
             # this is 1D convolution
             shape = [self.attributes['out_width'], self.attributes['n_filt']]
@@ -932,6 +931,7 @@ def initialize(self):
         self.add_weights_variable(name='bias', var_name='b{index}', data=bias)
 
 
+# TODO:  discuss whether this should be renamed to soemthing more descriptive, and whether the class hierarchy makes sense
 class ApplyAlpha(BatchNormalization):
     '''A custom layer to scale the output of a QDense layer which used 'alpha != 1'
     Inference computation uses BatchNormalization methods'''
@@ -941,6 +941,7 @@ def initialize(self):
         shape = inp.shape
         dims = inp.dim_names
         self.add_output_variable(shape, dims)
+        self.set_attr('n_in', inp.size())
 
         scale = self.get_attr('scale_data')
         scale_quantizer = self.get_attr('scale_quantizer')
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index f38bfd841d..adc7dff093 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -1,8 +1,8 @@
 import numpy as np
 
-from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
 from hls4ml.model.layers import ApplyAlpha, Constant, Merge
 from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.quantizers import QuantNodeQuantizer
 
 _base_attributes = ('Trace', 'reuse_factor', 'n_in')
 
diff --git a/hls4ml/model/optimizer/passes/propagate_acc_precision.py b/hls4ml/model/optimizer/passes/propagate_acc_precision.py
deleted file mode 100644
index 375979de4e..0000000000
--- a/hls4ml/model/optimizer/passes/propagate_acc_precision.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import math  # prefer to use math.ceil for scalar values (returns int)
-
-import numpy as np
-
-from hls4ml.model.layers import Conv1D, Conv2D, Dense
-from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import FixedPrecisionType, NamedType
-
-# TODO: Update these to use the new auto precision, not depdening only on QONNX values
-
-
-class PropagateDensePrecision(OptimizerPass):
-    """
-    Propagate precision for Dense nodes. Restrict it to only cases where
-    the precision is set by a quant node, since otherwise the values get huge.
-    """
-
-    def match(self, node):
-        is_match = isinstance(node, Dense)
-        return is_match
-
-    def transform(self, model, node):
-        input_precision = node.get_input_node().get_attr("quant_precision")
-        weight_precision = node.get_attr("weight_precision")
-        if not input_precision or not weight_precision:
-            return False
-
-        bias_precision = node.get_attr("bias_precision")
-        input_variable = node.get_input_variable()
-        num_acc = input_variable.shape[-1]
-
-        accum_precision = _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc)
-
-        accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision)
-        node.set_attr('accum_t', accum_t)
-
-        if not node.get_attr("quant_precision"):
-            # output precision not set by quant node
-            node.update_output_precision(accum_precision)
-
-        return False
-
-
-class PropagateConvPrecision(OptimizerPass):
-    """Propagate precision for conv nodes. Restrict it to only cases where
-    the precision is set by a quant node, since otherwise the values get huge.
-    """
-
-    def match(self, node):
-        is_match = isinstance(node, (Conv1D, Conv2D))
-        return is_match
-
-    def transform(self, model, node):
-        input_precision = node.get_input_node().get_attr("quant_precision")
-        weight_precision = node.get_attr("weight_precision")
-        if not input_precision or not weight_precision:
-            return False
-
-        bias_precision = node.get_attr("bias_precision")
-        num_feature_maps = node.weights['weight'].data_unquantized.shape[-1]
-        filt_width = node.get_attr('filt_width')
-        filt_height = node.get_attr('filt_height', 1)
-
-        num_acc = filt_width * filt_height * num_feature_maps
-
-        accum_precision = _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc)
-
-        accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision)
-        node.set_attr('accum_t', accum_t)
-
-        if not node.get_attr("quant_precision"):
-            # output precision not explicitly set by quant node
-            node.update_output_precision(accum_precision)
-
-        return False
-
-
-def _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc):
-    '''
-    Propagate the precion type across a multiply. Rounding modes are propagated from input_precision
-    '''
-
-    # check to make sure none are None
-    bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(num_acc))
-    integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(num_acc))
-    signed = weight_precision.signed or input_precision.signed
-
-    # Because calculating precision, no need to round or sautration
-    rounding_mode = None
-    saturation_mode = None
-
-    frac = bitwidth - integer
-
-    # correct for bias
-    if bias_precision:
-        integer = (
-            max(
-                integer + (bias_precision.signed and not signed),
-                bias_precision.integer + (signed and not bias_precision.signed),
-            )
-            + 1
-        )
-        bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer)
-        signed = signed or bias_precision.signed
-
-    return FixedPrecisionType(bitwidth, integer, signed, rounding_mode, saturation_mode)
diff --git a/hls4ml/model/optimizer/passes/qkeras.py b/hls4ml/model/optimizer/passes/qkeras.py
index 7bed6cb1e7..a97438832d 100644
--- a/hls4ml/model/optimizer/passes/qkeras.py
+++ b/hls4ml/model/optimizer/passes/qkeras.py
@@ -3,7 +3,8 @@
 
 from hls4ml.model.layers import ApplyAlpha
 from hls4ml.model.optimizer import ConfigurableOptimizerPass, OptimizerPass, register_pass
-from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, QKerasPO2Quantizer
+from hls4ml.model.quantizers import QKerasPO2Quantizer
+from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType
 
 
 class OutputRoundingSaturationMode(ConfigurableOptimizerPass):
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index f0a5129d52..dc6deab14b 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -1,28 +1,25 @@
-'''
+"""
 This file includes optimizations related to quant nodes.
 
-As a first step, QuantConstantParameters converts the extra inputs to attributes. It is always the first step
+As a first step, QuantConstantParameters converts the extra inputs to attributes.
 
-The next step differs between the case of (1) unitary scale and zero offset, or (2) nonunitary scale and/or
-nonzero offset. In the first case no scaling is required, so a Quant node effectively becomes a linear activation.
-For the common case when this is applied on a constant weight, the activation is immediately merged with the weight,
-qantizing the weights. In case 2, we need to explictly scale and unscale, so the Quant node becomes 3 nodes, an
-ApplyAlpha node to apply a scale/shift, a Linear node to apply the quantization, and another ApplyAlpha to unscale/shift.
-We depend on optimization steps to move the unscaling ApplyAlpha down as needed. Again, when the Quant is a applied to a
-Constant, the scaling and Linear nodes are immediately merged into the Constant. This is done because it simplifies some
-of the other optimizations.
+The next step differs between the case of (1) (positive) power-of-2 scale and zero offset, or (2) other cases. In the first
+case no explicit scaling is required, so a Quant node logically becomes a linear activation. (Cases when the scale is a
+power of 2 not equal to one are implicitly scaled with fixed precision types.) When the activation is applied to a constant
+weight, the activation is immediately merged with the weight, quantizing the weights. In case (2), we need to explicitly
+scale and unscale, so the Quant node becomes 3 nodes, an ApplyAlpha node to apply a scale/shift, a Linear node to apply the
+quantization, and another ApplyAlpha to unscale/shift. We depend on optimization steps to move the unscaling ApplyAlpha
+down as needed so that we can do integer or fixed-point calculations. When the Quant is a applied to a weight, the scaling
+and Linear nodes are immediately merged into the Constant.
 
-UPDATE:  Case 1 is loosened to also include power of 2 scalar scales, not just unitary scale, if
-    _ALSO_INCLUDE_PO2 is set to true (the default)
-
-'''
+"""
 import math  # prefer to use math.ceil for scalar values
 
 import numpy as np
 
-from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer
 from hls4ml.model.layers import Activation, ApplyAlpha, Constant, Quant
 from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.quantizers import QuantNodeQuantizer
 from hls4ml.model.types import FixedPrecisionType
 
 _ALSO_MATCH_PO2 = True
@@ -44,28 +41,29 @@ def match(self, node):
 
     def transform(self, model, node):
         """
-        Remove Constant from the Qaunt node parameters (but not input[0])
+        Remove Constant from the Quant node parameters (but not input[0])
         """
         if node.get_input_node(node.inputs[1]):
             scale_node = node.get_input_node(node.inputs[1])
             if isinstance(scale_node, Constant):
-                node.set_attr('scale', scale_node.value)
+                node.set_attr('scale', scale_node.get_attr('value'))
                 node.inputs[1] = ''
                 model.remove_node(scale_node, rewire=False)
 
         if node.get_input_node(node.inputs[2]):
             zeropt_node = node.get_input_node(node.inputs[2])
             if isinstance(zeropt_node, Constant):
-                node.set_attr('zeropt', zeropt_node.value)
+                node.set_attr('zeropt', zeropt_node.get_attr('value'))
                 node.inputs[2] = ''
                 model.remove_node(zeropt_node, rewire=False)
 
         if node.get_input_node(node.inputs[3]):
             bitwidth_node = node.get_input_node(node.inputs[3])
             if isinstance(bitwidth_node, Constant):
-                if np.squeeze(bitwidth_node.value).shape:
-                    raise RuntimeError("Only scalar bitwidth values are supporeted by the Quant node")
-                node.set_attr('bitwidth', bitwidth_node.value)
+                bitwidth = bitwidth_node.get_attr('value')
+                if bitwidth.size != 1:
+                    raise RuntimeError('Only scalar bitwidth values are supporeted by the Quant node')
+                node.set_attr('bitwidth', bitwidth)
                 node.inputs[3] = ''
                 model.remove_node(bitwidth_node, rewire=False)
 
@@ -73,14 +71,12 @@ def transform(self, model, node):
 
 
 class QuantToActivation(OptimizerPass):
-    '''
-    This is for the case when scale is 1 and zeropt is 0. It is a a 1:1 transformation of
+    """
+    This is for the case when scale is a (positive) power of 2 and zeropt is 0. It is a a 1:1 transformation of
     a Quant to an Activation.
 
     As an optimization, this is not called when the input is constant.
-
-    UPDATE:  this is also called when scale is scalar and power of 2, not just 1.
-    '''
+    """
 
     def match(self, node):
         # only matches after the other inputs are already folded
@@ -93,47 +89,43 @@ def match(self, node):
             and not node.get_input_node(node.inputs[3])
         )
 
-        # Only match if the scale is 1s and the zero-point is 0s
+        # Only match if the scale is power of 2 and the zero-point is 0s
         if is_match:  # to make sure this is a quant node with inputs
-            scale = node.get_attr("scale")
-            bias = node.get_attr("zeropt")
+            scale = node.get_attr('scale')
+            bias = node.get_attr('zeropt')
             is_match = is_match and (bias == np.zeros_like(bias)).all()
 
             # check if scale is ones-like or a power of two
             scale_unit_or_po2 = (scale == np.ones_like(scale)).all()
             if not scale_unit_or_po2 and _ALSO_MATCH_PO2:
-                sqscale = np.squeeze(scale)
-                if not sqscale.shape:
-                    # not an array
-                    mantissa, _ = np.frexp(sqscale)
+                # This optimization only works if all scales are the same
+                if np.all(scale[0] == scale):
+                    mantissa, _ = np.frexp(scale[0])
                     scale_unit_or_po2 = mantissa == 0.5
 
-            is_match = is_match and scale_unit_or_po2
+            is_match = scale_unit_or_po2
 
         return is_match
 
     def transform(self, model, node):
-        '''
+        """
         Change quant node to Activation
-        '''
-        input_shape = node.get_input_variable().shape
-
-        n_in = np.prod(input_shape)
+        """
 
-        rounding_mode = node.get_attr("rounding_mode")
-        narrow = node.get_attr("narrow")
-        signed = node.get_attr("signed")
-        bitwidth = node.get_attr("bitwidth")
+        rounding_mode = node.get_attr('rounding_mode')
+        narrow = node.get_attr('narrow')
+        signed = node.get_attr('signed')
+        bitwidth = node.get_attr('bitwidth')
         integer = bitwidth
-        scale = node.get_attr("scale")
+        scale = node.get_attr('scale')
         if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all():
-            _, exp = np.frexp(np.squeeze(scale))
+            _, exp = np.frexp(scale[0])
             integer = bitwidth + exp - 1
 
         precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode)
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer, 'n_in': n_in})
+        attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer})
 
         new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs])
         new_node.get_output_variable().type.precision = precision
@@ -143,10 +135,9 @@ def transform(self, model, node):
 
 
 class FuseQuantWithConstant(OptimizerPass):
-    '''
-    This is for the case when scale is 1 and zeropt is 0. It directly applies the quantization to a constant.
-    UPDATE:  this is also called when scale is scalar and power of 2, not just 1.
-    '''
+    """
+    This is for the case when scale is a positive power of 2 and zeropt is 0.
+    """
 
     def match(self, node):
         # only matches after the other inputs are already folded
@@ -158,36 +149,35 @@ def match(self, node):
             and not node.get_input_node(node.inputs[3])
         )
 
-        # Only match if the scale is 1s and the zero-point is 0s
+        # Only match if the scale is power of 2 and the zero-point is 0s
         if is_match:  # to make sure this is a quant node with inputs
-            scale = node.get_attr("scale")
-            bias = node.get_attr("zeropt")
+            scale = node.get_attr('scale')
+            bias = node.get_attr('zeropt')
             is_match = is_match and (bias == np.zeros_like(bias)).all()
 
             # check if scale is ones-like or a power of two
             scale_unit_or_po2 = (scale == np.ones_like(scale)).all()
             if not scale_unit_or_po2 and _ALSO_MATCH_PO2:
-                sqscale = np.squeeze(scale)
-                if not sqscale.shape:
-                    # not an array
-                    mantissa, _ = np.frexp(sqscale)
+                # This optimization only works if all scales are the same
+                if np.all(scale[0] == scale):
+                    mantissa, _ = np.frexp(scale[0])
                     scale_unit_or_po2 = mantissa == 0.5
 
-            is_match = is_match and scale_unit_or_po2
+            is_match = scale_unit_or_po2
 
         return is_match
 
     def transform(self, model, node):
-        '''
+        """
         Fuse Quant with Constant.
-        '''
+        """
 
-        rounding_mode = node.get_attr("rounding_mode")
-        narrow = node.get_attr("narrow")
-        signed = node.get_attr("signed")
-        bitwidth = node.get_attr("bitwidth")
+        rounding_mode = node.get_attr('rounding_mode')
+        narrow = node.get_attr('narrow')
+        signed = node.get_attr('signed')
+        bitwidth = node.get_attr('bitwidth')
         integer = bitwidth
-        scale = node.get_attr("scale")
+        scale = node.get_attr('scale')
         if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all():
             _, exp = np.frexp(np.squeeze(scale))
             integer = bitwidth + exp - 1
@@ -195,11 +185,9 @@ def transform(self, model, node):
         precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode)
 
         const_node = node.get_input_node(node.inputs[0])
-        const_node.set_attr("quant_precision", precision)
-        const_node.set_attr("quantizer", quantizer)
-
-        # reinitialize (which also runs quantization if quantizer exists)
-        const_node.initialize()
+        const_node.set_attr('quant_precision', precision)
+        const_node.set_attr('quantizer', quantizer)
+        const_node.get_output_variable().type.precision = precision
 
         # remove the Quant node
         model.remove_node(node, rewire=True)
@@ -208,12 +196,12 @@ def transform(self, model, node):
 
 
 class QuantToAlphaActivationAlpha(OptimizerPass):
-    '''
+    """
     This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of
     a Quant to an ApplyAlpha (to scale), Activatio, ApplyAlpho (to rescale).
 
-    As an optimization, this is not called when the input is constant.
-    '''
+    NOTE:  It needs to be scheduled after QuantToActivation (or we need to make the match criteria stricter)
+    """
 
     def match(self, node):
         # only matches after the other inputs are already folded
@@ -224,33 +212,24 @@ def match(self, node):
             and not node.get_input_node(node.inputs[2])
             and not node.get_input_node(node.inputs[3])
         )
-
-        if is_match:  # to make sure this is a quant node with inputs
-            scale = node.get_attr("scale")
-            bias = node.get_attr("zeropt")
-            is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any())
         return is_match
 
     def transform(self, model, node):
-        '''
+        """
         Change quant node to ApplyAlhpa, Activation, ApplyAlpha
-        '''
+        """
 
         # Do the Activation as in the simple case
 
-        input_shape = node.get_input_variable().shape
-
-        n_in = np.prod(input_shape)
-
-        rounding_mode = node.get_attr("rounding_mode")
-        narrow = node.get_attr("narrow")
-        signed = node.get_attr("signed")
-        bitwidth = node.get_attr("bitwidth")
+        rounding_mode = node.get_attr('rounding_mode')
+        narrow = node.get_attr('narrow')
+        signed = node.get_attr('signed')
+        bitwidth = node.get_attr('bitwidth')
 
         precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode)
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer, 'n_in': n_in})
+        attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer})
 
         new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs])
         new_node.get_output_variable().type.precision = precision
@@ -258,27 +237,25 @@ def transform(self, model, node):
 
         # but now add the ApplyAlhpas before and after
 
-        scale = node.get_attr("scale")
-        bias = node.get_attr("zeropt")
+        scale = node.get_attr('scale')
+        bias = node.get_attr('zeropt')
 
         attributes_scale = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes_scale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1})
 
         attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes_rescale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1})
 
         firstscale = 1 / scale
         firstbias = bias
-        attributes_scale["scale_data"] = firstscale
-        attributes_scale["bias_data"] = firstbias
+        attributes_scale['scale_data'] = firstscale
+        attributes_scale['bias_data'] = firstbias
 
         scale_node = model.make_node(ApplyAlpha, node.name + '_scale', attributes_scale, [node.inputs[0]])
         model.insert_node(scale_node)
 
         rescale = scale
         rebias = -bias * scale
-        attributes_rescale["scale_data"] = rescale
-        attributes_rescale["bias_data"] = rebias
+        attributes_rescale['scale_data'] = rescale
+        attributes_rescale['bias_data'] = rebias
 
         rescale_node = model.make_node(ApplyAlpha, node.name + '_rescale', attributes_rescale, [new_node.outputs[0]])
         model.insert_node(rescale_node)
@@ -287,12 +264,12 @@ def transform(self, model, node):
 
 
 class ConstQuantToConstAlpha(OptimizerPass):
-    '''
+    """
     This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of
     a Quant to an ApplyAlpha (to scale), Activation, ApplyAlpho (to unscale), but an input
     consts allows for optimization, so the ApplyAlpha (to scale), Activation are
     optimized away right away.
-    '''
+    """
 
     def match(self, node):
         # only matches after the other inputs are already folded
@@ -305,39 +282,37 @@ def match(self, node):
         )
 
         if is_match:  # to make sure this is a quant node with inputs
-            scale = node.get_attr("scale")
-            bias = node.get_attr("zeropt")
+            scale = node.get_attr('scale')
+            bias = node.get_attr('zeropt')
             is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any())
         return is_match
 
     def transform(self, model, node):
-        '''
+        """
         Change Constant + Quant node to Constant, ApplyAlpha
-        '''
+        """
 
         # Do the Activation as in the simple case
 
-        input_shape = node.get_input_variable().shape
-
-        n_in = np.prod(input_shape)
+        n_in = node.get_input_variable().size()
 
-        rounding_mode = node.get_attr("rounding_mode")
-        narrow = node.get_attr("narrow")
-        signed = node.get_attr("signed")
-        bitwidth = node.get_attr("bitwidth")
+        rounding_mode = node.get_attr('rounding_mode')
+        narrow = node.get_attr('narrow')
+        signed = node.get_attr('signed')
+        bitwidth = node.get_attr('bitwidth')
 
         precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode)
 
         const_node = node.get_input_node(node.inputs[0])
 
-        scale = node.get_attr("scale")
-        bias = node.get_attr("zeropt")
+        scale = node.get_attr('scale')
+        bias = node.get_attr('zeropt')
 
         # caclucate the new value
-        new_val = const_node.value / scale + bias
+        new_val = const_node.get_attr('value') / scale + bias
         const_node.set_attr('value', new_val)
-        const_node.set_attr("quant_precision", precision)
-        const_node.set_attr("quantizer", quantizer)
+        const_node.set_attr('quant_precision', precision)
+        const_node.set_attr('quantizer', quantizer)
 
         # reinitialize (which also runs quantization if quantizer exists)
         const_node.initialize()
@@ -347,8 +322,8 @@ def transform(self, model, node):
 
         rescale = scale
         rebias = -bias * scale
-        attributes_rescale["scale_data"] = rescale
-        attributes_rescale["bias_data"] = rebias
+        attributes_rescale['scale_data'] = rescale
+        attributes_rescale['bias_data'] = rebias
 
         rescale_node = model.make_node(
             ApplyAlpha, node.name + '_rescale', attributes_rescale, [x for x in node.inputs], [x for x in node.outputs]
@@ -359,25 +334,25 @@ def transform(self, model, node):
 
 
 def _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode):
-    '''
+    """
     A function to determine the precision and quantizer
-    '''
-    if rounding_mode == "ROUND":
-        bn_round = "AP_RND_CONV"
-    elif rounding_mode == "FLOOR":
-        bn_round = "AP_TRN"
+    """
+    if rounding_mode == 'ROUND':
+        bn_round = 'AP_RND_CONV'
+    elif rounding_mode == 'FLOOR':
+        bn_round = 'AP_TRN'
     else:
         raise NotImplementedError(
-            f"Rounding mode {rounding_mode} not supported in Quant node. Only ROUND and FLOOR supported."
+            f'Rounding mode {rounding_mode} not supported in Quant node. Only ROUND and FLOOR supported.'
         )
 
     if narrow and not signed:
-        raise NotImplementedError("Narrow mode is only supported for singed numbers.")
+        raise NotImplementedError('Narrow mode is only supported for singed numbers.')
 
     if narrow:
-        bn_sat = "AP_SAT_SYM"
+        bn_sat = 'AP_SAT_SYM'
     else:
-        bn_sat = "AP_SAT"
+        bn_sat = 'AP_SAT'
 
     bitwidth = math.ceil(bitwidth)
     integer = math.ceil(integer)
diff --git a/hls4ml/model/quantizers.py b/hls4ml/model/quantizers.py
new file mode 100644
index 0000000000..c0a5869d5b
--- /dev/null
+++ b/hls4ml/model/quantizers.py
@@ -0,0 +1,261 @@
+"""
+Quantizer for the Quant node, after scale and zeropoint hafe been extracted
+(unless scale is a power of 2, if doing special case po2)
+
+This is based on the sample implementation in finn-base
+"""
+
+import numpy as np
+import tensorflow as tf
+from qkeras.quantizers import get_quantizer
+
+from hls4ml.model.types import (
+    ExponentPrecisionType,
+    FixedPrecisionType,
+    IntegerPrecisionType,
+    RoundingMode,
+    SaturationMode,
+    XnorPrecisionType,
+)
+
+
+class Quantizer:
+    """
+    Base class for representing quantizers in hls4ml.
+
+    Subclasses of ``Quantizer`` are expected to wrap the quantizers of upstream tools (e.g., QKeras).
+
+    Args:
+        bits (int): Total number of bits used by the quantizer.
+        hls_type (NamedType): The hls4ml type used by the quantizer.
+    """
+
+    def __init__(self, bits, hls_type):
+        self.bits = bits
+        self.hls_type = hls_type
+
+    def __call__(self, data):
+        raise NotImplementedError
+
+
+class BinaryQuantizer(Quantizer):
+    """Quantizer that quantizes to 0 and 1 (``bits=1``) or -1 and 1 (``bits==2``).
+
+    Args:
+        bits (int, optional): Number of bits used by the quantizer. Defaults to 2.
+
+    Raises:
+        Exception: Raised if ``bits>2``
+    """
+
+    def __init__(self, bits=2):
+        if bits == 1:
+            hls_type = XnorPrecisionType()
+        elif bits == 2:
+            hls_type = IntegerPrecisionType(width=2)
+        else:
+            raise Exception(f'BinaryQuantizer suppots 1 or 2 bits, but called with bits={bits}')
+        super().__init__(bits, hls_type)
+
+    def __call__(self, data):
+        zeros = np.zeros_like(data)
+        ones = np.ones_like(data)
+        quant_data = data
+        if self.bits == 1:
+            quant_data = np.where(data > 0, ones, zeros).astype('int')
+        if self.bits == 2:
+            quant_data = np.where(data > 0, ones, -ones)
+        return quant_data
+
+
+class TernaryQuantizer(Quantizer):
+    """Quantizer that quantizes to -1, 0 and 1."""
+
+    def __init__(self):
+        super().__init__(2, IntegerPrecisionType(width=2))
+
+    def __call__(self, data):
+        zeros = np.zeros_like(data)
+        ones = np.ones_like(data)
+        return np.where(data > 0.5, ones, np.where(data <= -0.5, -ones, zeros))
+
+
+class QKerasQuantizer(Quantizer):
+    """Wrapper around QKeras quantizers.
+
+    Args:
+        config (dict): Config of the QKeras quantizer to wrap.
+    """
+
+    def __init__(self, config):
+        self.quantizer_fn = get_quantizer(config)
+        self.alpha = config['config'].get('alpha', None)
+        if config['class_name'] == 'quantized_bits':
+            self.bits = config['config']['bits']
+            self.hls_type = self._get_type(config)
+        # ! includes stochastic_ternary
+        elif 'ternary' in config['class_name']:
+            self.bits = 2
+            self.hls_type = IntegerPrecisionType(width=2, signed=True)
+        # ! includes stochastic_binary
+        elif 'binary' in config['class_name']:
+            self.bits = 1
+            self.hls_type = XnorPrecisionType()
+        else:
+            print("Unsupported quantizer: " + config['class_name'])
+            self.bits = 16
+            self.hls_type = FixedPrecisionType(width=16, integer=6, signed=True)
+
+    def __call__(self, data):
+        tf_data = tf.convert_to_tensor(data)
+        return self.quantizer_fn(tf_data).numpy()
+        # return self.quantizer_fn(data)
+
+    def _get_type(self, quantizer_config):
+        width = quantizer_config['config']['bits']
+        integer = quantizer_config['config'].get('integer', 0)
+        if quantizer_config['class_name'] == 'quantized_po2':
+            return ExponentPrecisionType(width=width, signed=True)
+        if width == integer:
+            if width == 1:
+                return XnorPrecisionType()
+            else:
+                return IntegerPrecisionType(width=width, signed=True)
+        else:
+            return FixedPrecisionType(width=width, integer=integer + 1, signed=True)
+
+
+class QKerasBinaryQuantizer(Quantizer):
+    """Wrapper around QKeras binary quantizer.
+
+    Args:
+        config (dict): Config of the QKeras quantizer to wrap.
+    """
+
+    def __init__(self, config, xnor=False):
+        self.bits = 1 if xnor else 2
+        self.hls_type = XnorPrecisionType() if xnor else IntegerPrecisionType(width=2, signed=True)
+        self.alpha = config['config']['alpha']
+        # Use the QKeras quantizer to handle any stochastic / alpha stuff
+        self.quantizer_fn = get_quantizer(config)
+        # Then we use our BinaryQuantizer to convert to '0,1' format
+        self.binary_quantizer = BinaryQuantizer(1) if xnor else BinaryQuantizer(2)
+
+    def __call__(self, data):
+        x = tf.convert_to_tensor(data)
+        y = self.quantizer_fn(x).numpy()
+        return self.binary_quantizer(y)
+
+
+class QKerasPO2Quantizer(Quantizer):
+    """Wrapper around QKeras power-of-2 quantizers.
+
+    Args:
+        config (dict): Config of the QKeras quantizer to wrap.
+    """
+
+    def __init__(self, config):
+        self.bits = config['config']['bits']
+        self.quantizer_fn = get_quantizer(config)
+        self.hls_type = ExponentPrecisionType(width=self.bits, signed=True)
+
+    def __call__(self, data):
+        # Weights are quantized to nearest power of two
+        x = tf.convert_to_tensor(data)
+        y = self.quantizer_fn(x)
+        if hasattr(y, 'numpy'):
+            y = y.numpy()
+        return y
+
+
+class QuantNodeQuantizer(Quantizer):
+    """
+    This implements a quantizer for a FixedPrecisionType with width==integer
+
+    This is based on the sample implementation in finn-base
+    """
+
+    def __init__(self, precision):
+        super().__init__(precision.width, precision)
+        if not isinstance(precision, FixedPrecisionType):
+            raise TypeError("QuantNodeQuantizer is only defined for FixedPrecisionType")
+
+    def __call__(self, data):
+        """Apply the quantization on the data"""
+
+        scale = 2 ** (self.hls_type.width - self.hls_type.integer)
+
+        data = data * scale  # (not using *= to avoid modifying data)
+        # Clamping
+        min_int_val = self._min_int(self.hls_type.signed, self.hls_type.saturation_mode, self.bits)
+        max_int_val = self._max_int(self.hls_type.signed, self.bits)
+        data = np.where(data > max_int_val, max_int_val, data)
+        data = np.where(data < min_int_val, min_int_val, data)
+        # Rounding
+        rounding_fx = self._resolve_rounding_mode(self.hls_type.rounding_mode)
+        return rounding_fx(data) / scale
+
+    @staticmethod
+    def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int:
+        """Compute the minimum integer representable by a given number of bits.
+        Args:
+            signed (bool): Indicates whether the represented integer is signed or not.
+            saturation_mode (bool): Indicates the saturation mode used (AP_SAT_SYM or AP_SAT)
+            bit_width (int): Number of bits available for the representation.
+        Returns:
+            int: Maximum unsigned integer that can be represented according to
+            the input arguments.
+        Examples:
+            >>> min_int(signed=True, saturation_mode='AP_SAT_SYM', bit_width=8)
+            int(-127)
+            >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8)
+            int(0)
+            >>> min_int(signed=True, saturation_mode='AP_SAT', bit_width=8)
+            int(-128)
+            >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8)
+            int(0)
+        """
+        if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT):
+            raise ValueError(f"Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported")
+        if signed and saturation_mode == SaturationMode.SAT_SYM:
+            value = -(2 ** (bit_width - 1)) + 1
+        elif signed:
+            value = -(2 ** (bit_width - 1))
+        else:
+            value = 0
+        return value
+
+    @staticmethod
+    def _max_int(signed: bool, bit_width: int) -> int:
+        """Compute the maximum integer representable by a given number of bits.
+        (Note, narrow and unsigned is not supported by the implementation, so saturation mode is not used)
+        Args:
+            signed (bool): Indicates whether the represented integer is signed or not.
+            bit_width (int): Number of bits available for the representation.
+        Returns:
+            Tensor: Maximum integer that can be represented according to
+            the input arguments.
+        Examples:
+            >>> max_int(signed=True, bit_width=8)
+            int(127)
+            >>> max_int(signed=False, bit_width=8)
+            int(255)
+        """
+        if not signed:
+            value = (2**bit_width) - 1
+        else:
+            value = (2 ** (bit_width - 1)) - 1
+        return value
+
+    @staticmethod
+    def _resolve_rounding_mode(mode):
+        """Resolve the rounding mode  of Quant and Trunc ops
+        to the corresponding numpy functions."""
+        if mode == RoundingMode.RND_CONV:
+            return np.round
+        # elif mode_string == "CEIL":   # not supported
+        #     return np.ceil
+        elif mode == RoundingMode.TRN:
+            return np.floor
+        else:
+            raise ValueError(f"Rounding mode {mode} not supported.")
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
index fc1cd98f19..8c182f4cca 100644
--- a/hls4ml/model/types.py
+++ b/hls4ml/model/types.py
@@ -8,162 +8,6 @@
 from enum import Enum
 
 import numpy as np
-import tensorflow as tf
-from qkeras.quantizers import get_quantizer
-
-# region Quantizer definition
-
-
-class Quantizer:
-    """
-    Base class for representing quantizers in hls4ml.
-
-    Subclasses of ``Quantizer`` are expected to wrap the quantizers of upstream tools (e.g., QKeras).
-
-    Args:
-        bits (int): Total number of bits used by the quantizer.
-        hls_type (NamedType): The hls4ml type used by the quantizer.
-    """
-
-    def __init__(self, bits, hls_type):
-        self.bits = bits
-        self.hls_type = hls_type
-
-    def __call__(self, data):
-        raise NotImplementedError
-
-
-class BinaryQuantizer(Quantizer):
-    """Quantizer that quantizes to 0 and 1 (``bits=1``) or -1 and 1 (``bits==2``).
-
-    Args:
-        bits (int, optional): Number of bits used by the quantizer. Defaults to 2.
-
-    Raises:
-        Exception: Raised if ``bits>2``
-    """
-
-    def __init__(self, bits=2):
-        if bits == 1:
-            hls_type = XnorPrecisionType()
-        elif bits == 2:
-            hls_type = IntegerPrecisionType(width=2)
-        else:
-            raise Exception(f'BinaryQuantizer suppots 1 or 2 bits, but called with bits={bits}')
-        super().__init__(bits, hls_type)
-
-    def __call__(self, data):
-        zeros = np.zeros_like(data)
-        ones = np.ones_like(data)
-        quant_data = data
-        if self.bits == 1:
-            quant_data = np.where(data > 0, ones, zeros).astype('int')
-        if self.bits == 2:
-            quant_data = np.where(data > 0, ones, -ones)
-        return quant_data
-
-
-class TernaryQuantizer(Quantizer):
-    """Quantizer that quantizes to -1, 0 and 1."""
-
-    def __init__(self):
-        super().__init__(2, IntegerPrecisionType(width=2))
-
-    def __call__(self, data):
-        zeros = np.zeros_like(data)
-        ones = np.ones_like(data)
-        return np.where(data > 0.5, ones, np.where(data <= -0.5, -ones, zeros))
-
-
-class QKerasQuantizer(Quantizer):
-    """Wrapper around QKeras quantizers.
-
-    Args:
-        config (dict): Config of the QKeras quantizer to wrap.
-    """
-
-    def __init__(self, config):
-        self.quantizer_fn = get_quantizer(config)
-        self.alpha = config['config'].get('alpha', None)
-        if config['class_name'] == 'quantized_bits':
-            self.bits = config['config']['bits']
-            self.hls_type = self._get_type(config)
-        # ! includes stochastic_ternary
-        elif 'ternary' in config['class_name']:
-            self.bits = 2
-            self.hls_type = IntegerPrecisionType(width=2, signed=True)
-        # ! includes stochastic_binary
-        elif 'binary' in config['class_name']:
-            self.bits = 1
-            self.hls_type = XnorPrecisionType()
-        else:
-            print("Unsupported quantizer: " + config['class_name'])
-            self.bits = 16
-            self.hls_type = FixedPrecisionType(width=16, integer=6, signed=True)
-
-    def __call__(self, data):
-        tf_data = tf.convert_to_tensor(data)
-        return self.quantizer_fn(tf_data).numpy()
-        # return self.quantizer_fn(data)
-
-    def _get_type(self, quantizer_config):
-        width = quantizer_config['config']['bits']
-        integer = quantizer_config['config'].get('integer', 0)
-        if quantizer_config['class_name'] == 'quantized_po2':
-            return ExponentPrecisionType(width=width, signed=True)
-        if width == integer:
-            if width == 1:
-                return XnorPrecisionType()
-            else:
-                return IntegerPrecisionType(width=width, signed=True)
-        else:
-            return FixedPrecisionType(width=width, integer=integer + 1, signed=True)
-
-
-class QKerasBinaryQuantizer(Quantizer):
-    """Wrapper around QKeras binary quantizer.
-
-    Args:
-        config (dict): Config of the QKeras quantizer to wrap.
-    """
-
-    def __init__(self, config, xnor=False):
-        self.bits = 1 if xnor else 2
-        self.hls_type = XnorPrecisionType() if xnor else IntegerPrecisionType(width=2, signed=True)
-        self.alpha = config['config']['alpha']
-        # Use the QKeras quantizer to handle any stochastic / alpha stuff
-        self.quantizer_fn = get_quantizer(config)
-        # Then we use our BinaryQuantizer to convert to '0,1' format
-        self.binary_quantizer = BinaryQuantizer(1) if xnor else BinaryQuantizer(2)
-
-    def __call__(self, data):
-        x = tf.convert_to_tensor(data)
-        y = self.quantizer_fn(x).numpy()
-        return self.binary_quantizer(y)
-
-
-class QKerasPO2Quantizer(Quantizer):
-    """Wrapper around QKeras power-of-2 quantizers.
-
-    Args:
-        config (dict): Config of the QKeras quantizer to wrap.
-    """
-
-    def __init__(self, config):
-        self.bits = config['config']['bits']
-        self.quantizer_fn = get_quantizer(config)
-        self.hls_type = ExponentPrecisionType(width=self.bits, signed=True)
-
-    def __call__(self, data):
-        # Weights are quantized to nearest power of two
-        x = tf.convert_to_tensor(data)
-        y = self.quantizer_fn(x)
-        if hasattr(y, 'numpy'):
-            y = y.numpy()
-        return y
-
-
-# endregion
 
 # region Precision types
 
diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index 535bffb0da..2c314c13ca 100644
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -88,7 +88,7 @@ def test_tfc_2w2a(tfc_2w2a_model, backend):
     model = tfc_2w2a_model
 
     ishape = (1, 1, 28, 28)
-    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32)
     idict = {model.graph.input[0].name: X}
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
@@ -111,7 +111,7 @@ def test_cnv_2w2a(cnv_2w2a_model, backend):
     model = cnv_2w2a_model
 
     ishape = (1, 32, 32, 3)
-    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32)
     idict = {model.graph.input[0].name: X}
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
@@ -137,7 +137,7 @@ def test_jet_tagging(jettagging_model, backend):
     # Execute QONNX model inference
     # TODO make the test bigger
     ishape = (1, 16)
-    X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32)
+    X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32)
     idict = {model.graph.input[0].name: X}
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 

From 4d529756337961228216dc788aa1f8f79eb76cb3 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 5 Feb 2024 14:34:21 -0600
Subject: [PATCH 15/59] snapshot before removing redundant precision attributes

---
 .../model/optimizer/passes/conv_to_convxd.py  | 25 +++---
 .../optimizer/passes/matmul_const_to_dense.py | 24 +++---
 hls4ml/model/optimizer/passes/merge_const.py  | 79 ++++++++++---------
 hls4ml/model/optimizer/passes/quant_opt.py    | 45 +++++------
 4 files changed, 85 insertions(+), 88 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
index 28f4d4c0bd..efc5f3e89b 100644
--- a/hls4ml/model/optimizer/passes/conv_to_convxd.py
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -48,12 +48,13 @@ def transform(self, model, node):
         """Convert Conv with constant to a Conv1D or Conv2D layer"""
 
         weight_node = node.get_input_node(node.inputs[1])
-        weight_precision = weight_node.get_attr("quant_precision")
+        weight_precision = weight_node.get_attr('quant_precision')
+        weight_data = weight_node.attributes['value']
         bias_node = None
         bias_precision = None
         if len(node.inputs) == 3:
             bias_node = node.get_input_node(node.inputs[2])
-            bias_precision = bias_node.get_attr("quant_precision")
+            bias_precision = bias_node.get_attr('quant_precision')
 
         # creating the attributes
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
@@ -61,24 +62,24 @@ def transform(self, model, node):
         # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C)
         if node.attributes['n_dim'] == 1:
             newtype = Conv1D
-            attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 0))
+            attributes['weight_data'] = np.transpose(weight_data, (1, 2, 0))
         else:
             newtype = Conv2D
-            attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 3, 0))
-        attributes["weight_precision"] = weight_precision
-        attributes["weight_quantizer"] = weight_node.get_attr("quantizer")
+            attributes['weight_data'] = np.transpose(weight_data, (1, 2, 3, 0))
+        attributes['weight_precision'] = weight_precision
+        attributes['weight_quantizer'] = weight_node.get_attr('quantizer')
 
         if bias_node:
-            attributes["bias_data"] = bias_node.value
-            attributes["bias_precision"] = bias_precision
-            attributes["bias_quantizer"] = bias_node.get_attr("quantizer")
+            attributes['bias_data'] = bias_node.attributes['value']
+            attributes['bias_precision'] = bias_precision
+            attributes['bias_quantizer'] = bias_node.get_attr('quantizer')
         else:
-            attributes["bias_data"] = np.zeros(attributes['n_filt'])
-            attributes["bias_precision"] = IntegerPrecisionType(1, False)
+            attributes['bias_data'] = np.zeros(attributes['n_filt'])
+            attributes['bias_precision'] = IntegerPrecisionType(1, False)
 
         # making new node
         new_node = model.make_node(
-            newtype, f"{newtype.__name__}_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs]
+            newtype, f'{newtype.__name__}_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs]
         )
 
         # removing and replacing old nodes
diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
index 82c7b56313..2a89ea0130 100644
--- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -26,30 +26,32 @@ def transform(self, model, node):
         const_node = node.get_input_node(node.inputs[1])
         other_var = node.get_input_variable(node.inputs[0])
 
-        weight_precision = const_node.get_attr("quant_precision")
-        weight_quantizer = const_node.get_attr("quantizer")
+        weight_data = const_node.attributes['value']
+        weight_precision = const_node.get_attr('quant_precision')
+        weight_quantizer = const_node.get_attr('quantizer')
 
         in_shape = other_var.shape
         n_in = np.prod(in_shape)
-        out_shape = list(in_shape[:-1]) + [const_node.value.shape[-1]]
+        out_shape = list(in_shape[:-1]) + [weight_data.shape[-1]]
         n_out = np.prod(out_shape)
 
         # creating the attributes
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update(
             {
-                "weight_data": const_node.value,
-                "weight_precision": weight_precision,
-                "weight_quantizer": weight_quantizer,
-                "bias_data": np.zeros(out_shape),
-                "bias_precision": IntegerPrecisionType(1, False),
-                "n_in": n_in,
-                "n_out": n_out,
+                'weight_data': weight_data,
+                'weight_precision': weight_precision,
+                'weight_quantizer': weight_quantizer,
+                'bias_data': np.zeros(out_shape),
+                'bias_precision': IntegerPrecisionType(1, False),
+                'have_bias': False,
+                'n_in': n_in,
+                'n_out': n_out,
             }
         )
 
         # making new node
-        new_dense = model.make_node(Dense, f"Dense_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs])
+        new_dense = model.make_node(Dense, f'Dense_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs])
 
         # removing and replacing old nodes
         model.remove_node(const_node, rewire=False)
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index adc7dff093..4b13982259 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -6,8 +6,6 @@
 
 _base_attributes = ('Trace', 'reuse_factor', 'n_in')
 
-# TODO This doesn't yet support quantization in the constants
-
 
 class MergeTwoConstants(OptimizerPass):
     """Merge of two constants makes another constant"""
@@ -23,15 +21,18 @@ def match(self, node):
 
     def transform(self, model, node):
         """
-        Merge of two constants makes another constant
+        Merge of two constants makes another constant.
+
+        Note:  full precision is used in the calculation, and precision is not propagated.
+        The precision
         """
         const_node0 = node.get_input_node(node.inputs[0])
         const_node1 = node.get_input_node(node.inputs[1])
 
-        val0 = const_node0.value
-        val1 = const_node1.value
+        val0 = const_node0.attributes['value']
+        val1 = const_node1.attributes['value']
 
-        op = node.attributes["op"]
+        op = node.attributes['op']
         if op in ('add', 'sum'):
             new_val = val0 + val1
         elif op == 'sub':
@@ -47,16 +48,18 @@ def transform(self, model, node):
         elif op == 'min':
             new_val = np.minimum(val0, val1)
         else:
-            raise RuntimeError(f"Unexpected op_type: {op}")
+            raise RuntimeError(f'Unexpected op_type: {op}')
 
-        quantizer = node.get_attr("quantizer")  # None if not defined
+        quantizer = node.get_attr('quantizer')  # None if not defined
+        const_node0.set_attr('quantizer', quantizer)  # overwrite the quantizer
         if quantizer:
-            const_node0.set_attr("quantizer", quantizer)
-        const_node0.set_attr("value", new_val)
+            const_node0.set_attr('quantizer', quantizer)
+
+        const_node0.set_attr('value', new_val)
 
-        quant_precision = node.get_attr("quant_precision")
+        quant_precision = node.get_attr('quant_precision')
         if quant_precision:
-            const_node0.set_attr("quant_precision", quant_precision)
+            const_node0.set_attr('quant_precision', quant_precision)
 
         # reinitialize (which also runs quantization if quantizer exists)
         const_node0.initialize()
@@ -75,7 +78,7 @@ class MergeToApplyAlpha(OptimizerPass):
     def match(self, node):
         is_match = (
             isinstance(node, Merge)
-            and node.attributes["op"] in ("add", "sum", "sub", "mul")  # Div is separate
+            and node.attributes['op'] in ('add', 'sum', 'sub', 'mul')  # Div is separate
             and (
                 isinstance(node.get_input_node(node.inputs[0]), Constant)
                 != isinstance(node.get_input_node(node.inputs[1]), Constant)
@@ -103,21 +106,21 @@ def transform(self, model, node):
         bias_precision = None
         bias_quantizer = None
 
-        op = node.attributes["op"]
+        op = node.attributes['op']
         if op in ('add', 'sum'):
             scale = np.array(1)
-            bias = const_node.value
-            bias_precision = const_node.get_attr("quant_precision")
-            bias_quantizer = const_node.get_attr("quantizer")
+            bias = const_node.attribute['value']
+            bias_precision = const_node.get_attr('quant_precision')
+            bias_quantizer = const_node.get_attr('quantizer')
         elif op == 'sub':
             if node1const:
                 scale = np.array(1)
-                bias = -const_node.value
+                bias = -const_node.attribute['value']
             else:
                 scale = np.array(-1)
-                bias = const_node.value
-            bias_precision = const_node.get_attr("quant_precision")
-            bias_quantizer = const_node.get_attr("quantizer")
+                bias = const_node.attribute['value']
+            bias_precision = const_node.get_attr('quant_precision')
+            bias_quantizer = const_node.get_attr('quantizer')
             if bias_precision and not bias_precision.signed:
                 # need to add a bit
                 bias_precision.signed = 1
@@ -126,10 +129,10 @@ def transform(self, model, node):
                 bias_quantizer = QuantNodeQuantizer(bias_precision)
 
         elif op == 'mul':
-            scale = const_node.value
+            scale = const_node.attribute['value']
             bias = np.array(0)
-            scale_precision = const_node.get_attr("quant_precision")
-            scale_quantizer = const_node.get_attr("quantizer")
+            scale_precision = const_node.get_attr('quant_precision')
+            scale_quantizer = const_node.get_attr('quantizer')
 
         # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias
         if scale.shape != tuple(input_shape) and np.squeeze(scale).shape != tuple(input_shape):
@@ -140,20 +143,20 @@ def transform(self, model, node):
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
         attributes.update(
             {
-                "scale_data": scale,
-                "bias_data": bias,
-                "n_in": n_in,
-                "n_out": n_in,
-                "n_filt": -1,
-                "scale_precision": scale_precision,
-                "scale_quantizer": scale_quantizer,
-                "bias_precision": bias_precision,
-                "bias_quantizer": bias_quantizer,
+                'scale_data': scale,
+                'bias_data': bias,
+                'n_in': n_in,
+                'n_out': n_in,
+                'n_filt': -1,
+                'scale_precision': scale_precision,
+                'scale_quantizer': scale_quantizer,
+                'bias_precision': bias_precision,
+                'bias_quantizer': bias_quantizer,
             }
         )
 
         bn_layer = model.make_node(
-            ApplyAlpha, f"bn_{node.name}", attributes, [node.inputs[input_node_idx]], [x for x in node.outputs]
+            ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[input_node_idx]], [x for x in node.outputs]
         )
 
         model.remove_node(const_node, rewire=False)
@@ -172,7 +175,7 @@ class MergeToApplyAlphaDiv(OptimizerPass):
     def match(self, node):
         is_match = (
             isinstance(node, Merge)
-            and node.attributes["op"] == 'div'
+            and node.attributes['op'] == 'div'
             and isinstance(node.get_input_node(node.inputs[1]), Constant)
         )  # only second can be const
 
@@ -182,7 +185,7 @@ def transform(self, model, node):
         input_shape = node.get_input_variable().shape
         n_in = np.prod(input_shape)
         const_node = node.get_input_node(node.inputs[1])
-        scale = 1 / const_node.value
+        scale = 1 / const_node.attribute['value']
         bias = np.array(0)
 
         # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias
@@ -192,9 +195,9 @@ def transform(self, model, node):
             bias = np.broadcast_to(bias, input_shape)
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({"scale_data": scale, "bias_data": bias, "n_in": n_in, "n_out": n_in, "n_filt": -1})
+        attributes.update({'scale_data': scale, 'bias_data': bias, 'n_in': n_in, 'n_out': n_in, 'n_filt': -1})
 
-        bn_layer = model.make_node(ApplyAlpha, f"bn_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs])
+        bn_layer = model.make_node(ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs])
 
         model.remove_node(const_node, rewire=False)
         model.replace_node(node, bn_layer)
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index dc6deab14b..e49ff99bd7 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -31,10 +31,14 @@ class QuantConstantParameters(OptimizerPass):
     """Remove Constant from the Qaunt node parameters (but not input[0])"""
 
     def match(self, node):
-        is_match = isinstance(node, Quant) and (
-            (node.get_input_node(node.inputs[1]) and isinstance(node.get_input_node(node.inputs[1]), Constant))
-            or (node.get_input_node(node.inputs[2]) and isinstance(node.get_input_node(node.inputs[2]), Constant))
-            or (node.get_input_node(node.inputs[3]) and isinstance(node.get_input_node(node.inputs[3]), Constant))
+        is_match = (
+            isinstance(node, Quant)
+            and len(node.inputs) == 4
+            and (
+                (node.get_input_node(node.inputs[1]) and isinstance(node.get_input_node(node.inputs[1]), Constant))
+                or (node.get_input_node(node.inputs[2]) and isinstance(node.get_input_node(node.inputs[2]), Constant))
+                or (node.get_input_node(node.inputs[3]) and isinstance(node.get_input_node(node.inputs[3]), Constant))
+            )
         )
 
         return is_match
@@ -67,6 +71,10 @@ def transform(self, model, node):
                 node.inputs[3] = ''
                 model.remove_node(bitwidth_node, rewire=False)
 
+        node.inputs = [inp for inp in node.inputs if inp]
+        if len(node.inputs) != 1:
+            raise RuntimeError("hls4ml only supports constant scale, zeropt, and bitwidth values")
+
         return True
 
 
@@ -83,10 +91,8 @@ def match(self, node):
 
         is_match = (
             isinstance(node, Quant)
+            and len(node.inputs) == 1
             and not isinstance(node.get_input_node(node.inputs[0]), Constant)
-            and not node.get_input_node(node.inputs[1])
-            and not node.get_input_node(node.inputs[2])
-            and not node.get_input_node(node.inputs[3])
         )
 
         # Only match if the scale is power of 2 and the zero-point is 0s
@@ -142,11 +148,7 @@ class FuseQuantWithConstant(OptimizerPass):
     def match(self, node):
         # only matches after the other inputs are already folded
         is_match = (
-            isinstance(node, Quant)
-            and isinstance(node.get_input_node(node.inputs[0]), Constant)
-            and not node.get_input_node(node.inputs[1])
-            and not node.get_input_node(node.inputs[2])
-            and not node.get_input_node(node.inputs[3])
+            isinstance(node, Quant) and len(node.inputs) == 1 and isinstance(node.get_input_node(node.inputs[0]), Constant)
         )
 
         # Only match if the scale is power of 2 and the zero-point is 0s
@@ -197,7 +199,7 @@ def transform(self, model, node):
 
 class QuantToAlphaActivationAlpha(OptimizerPass):
     """
-    This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of
+    This is for the case when scale is not power-of-2 or zeropt is not 0. It is a a 1:3 transformation of
     a Quant to an ApplyAlpha (to scale), Activatio, ApplyAlpho (to rescale).
 
     NOTE:  It needs to be scheduled after QuantToActivation (or we need to make the match criteria stricter)
@@ -207,10 +209,8 @@ def match(self, node):
         # only matches after the other inputs are already folded
         is_match = (
             isinstance(node, Quant)
+            and len(node.inputs) == 1
             and not isinstance(node.get_input_node(node.inputs[0]), Constant)
-            and not node.get_input_node(node.inputs[1])
-            and not node.get_input_node(node.inputs[2])
-            and not node.get_input_node(node.inputs[3])
         )
         return is_match
 
@@ -265,7 +265,7 @@ def transform(self, model, node):
 
 class ConstQuantToConstAlpha(OptimizerPass):
     """
-    This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of
+    This is for the case when scale is not power-of-2 or zeropt is not 0. It is a a 1:3 transformation of
     a Quant to an ApplyAlpha (to scale), Activation, ApplyAlpho (to unscale), but an input
     consts allows for optimization, so the ApplyAlpha (to scale), Activation are
     optimized away right away.
@@ -274,11 +274,7 @@ class ConstQuantToConstAlpha(OptimizerPass):
     def match(self, node):
         # only matches after the other inputs are already folded
         is_match = (
-            isinstance(node, Quant)
-            and isinstance(node.get_input_node(node.inputs[0]), Constant)
-            and not node.get_input_node(node.inputs[1])
-            and not node.get_input_node(node.inputs[2])
-            and not node.get_input_node(node.inputs[3])
+            isinstance(node, Quant) and len(node.inputs) == 1 and isinstance(node.get_input_node(node.inputs[0]), Constant)
         )
 
         if is_match:  # to make sure this is a quant node with inputs
@@ -292,10 +288,6 @@ def transform(self, model, node):
         Change Constant + Quant node to Constant, ApplyAlpha
         """
 
-        # Do the Activation as in the simple case
-
-        n_in = node.get_input_variable().size()
-
         rounding_mode = node.get_attr('rounding_mode')
         narrow = node.get_attr('narrow')
         signed = node.get_attr('signed')
@@ -318,7 +310,6 @@ def transform(self, model, node):
         const_node.initialize()
 
         attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes_rescale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1})
 
         rescale = scale
         rebias = -bias * scale

From cf5c9a105f27ffe3d2a81269c5664565e3362ffd Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 7 Feb 2024 10:24:33 -0600
Subject: [PATCH 16/59] snapshot

---
 hls4ml/model/layers.py                        |  24 ++-
 .../model/optimizer/passes/batchnorm_opt.py   | 150 +++++++++++++-----
 hls4ml/model/optimizer/passes/bn_fuse.py      |  41 ++++-
 .../model/optimizer/passes/conv_to_convxd.py  |  10 +-
 .../optimizer/passes/matmul_const_to_dense.py |   5 +-
 hls4ml/model/optimizer/passes/merge_const.py  |  70 +++++---
 hls4ml/model/optimizer/passes/quant_opt.py    |   6 +-
 hls4ml/model/quantizers.py                    |  12 +-
 hls4ml/model/types.py                         |  23 ++-
 9 files changed, 248 insertions(+), 93 deletions(-)

diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 7da730b60a..b5d9f492af 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -22,6 +22,7 @@
     IntegerPrecisionType,
     NamedType,
     TensorVariable,
+    UnspecifiedPrecisionType,
     WeightVariable,
     find_minimum_width,
 )
@@ -361,7 +362,12 @@ def initialize(self):
             shape = (1,)
             self.set_attr('value', np.array([value]))
         dims = [f'{self.name}_{i}' for i in range(len(shape))]
-        self.add_output_variable(shape, dims, var_name=self.name, precision=self.get_attr("precision"))
+        quantizer = self.get_attr('quantizer')
+
+        # Should the else clause below be None or UnspecifiedPrecisionType
+        precision = quantizer.hls_type if quantizer is not None else UnspecifiedPrecisionType()
+
+        self.add_output_variable(shape, dims, var_name=self.name, precision=precision)
 
 
 class Quant(Layer):  # The QONNX quantization layer
@@ -901,6 +907,7 @@ def initialize(self):
         self.add_output_variable(shape, dims)
 
 
+# TODO:  We currently seem to ignore the quantizers to mean, variance, etc.
 class BatchNormalization(Layer):
     _expected_attributes = [
         Attribute('n_in'),
@@ -943,19 +950,22 @@ def initialize(self):
         self.add_output_variable(shape, dims)
         self.set_attr('n_in', inp.size())
 
+        # precision values are ignored if quantizer is not None
         scale = self.get_attr('scale_data')
         scale_quantizer = self.get_attr('scale_quantizer')
+        scale_precision = self.get_attr('scale_precision')
         bias = self.get_attr('bias_data')
         bias_quantizer = self.get_attr('bias_quantizer')
+        bias_precision = self.get_attr('bias_precision')
 
-        self.add_weights(scale, quantizer=scale_quantizer)
-        self.add_bias(bias, quantizer=bias_quantizer)
+        self.add_weights(scale, quantizer=scale_quantizer, precision=scale_precision)
+        self.add_bias(bias, quantizer=bias_quantizer, precision=bias_precision)
 
-    def add_weights(self, scale, quantizer=None):
-        self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer)
+    def add_weights(self, scale, quantizer=None, precision=None):
+        self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer, precision=precision)
 
-    def add_bias(self, bias, quantizer=None):
-        self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer)
+    def add_bias(self, bias, quantizer=None, precision=None):
+        self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer, precision=precision)
 
 
 class Merge(Layer):
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index a74047676d..3e0984dccb 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -1,7 +1,9 @@
 import numpy as np
 
-from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant
+from hls4ml.model.layers import ApplyAlpha, BatchNormalization, BatchNormOnnx, Constant
 from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.quantizers import QuantNodeQuantizer
+from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, UnspecifiedPrecisionType
 
 _base_attributes = ('Trace', 'reuse_factor', 'epsilon', 'n_in', 'n_filt')
 
@@ -17,49 +19,55 @@ def match(self, node):
     def transform(self, model, node):
         """
         Remove Constant from the BatchNormalization node parameters (but not input[0])
+
+        TODO:  Currently the quantizers are not actually used by the underlying layer.
         """
 
         if not (len(node.inputs) == 5 and all(node.inputs)):
-            raise ValueError(f"All {len.node.inputs} BatchNormOnnnx inputs need to be defined")
+            raise ValueError(f'All {len.node.inputs} BatchNormOnnnx inputs need to be defined')
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
 
         gamma_node = node.get_input_node(node.inputs[1])
         if not isinstance(gamma_node, Constant):
-            raise TypeError("Only consant gammas supported")
-        gamma = gamma_node.value
+            raise TypeError('Only consant gammas supported')
+        gamma = gamma_node.attributes['value']
         attributes['gamma_data'] = gamma
+        attributes['gamma_quantizer'] = gamma_node.get_attr['quantizer']
+
         node.inputs[1] = ''
         model.remove_node(gamma_node, rewire=False)
 
         beta_node = node.get_input_node(node.inputs[2])
         if not isinstance(beta_node, Constant):
-            raise TypeError("Only consant betas supported")
-        beta = beta_node.value
+            raise TypeError('Only consant betas supported')
+        beta = beta_node.attributes['value']
         attributes['beta_data'] = beta
+        attributes['beta_quantizer'] = beta_node.get_attr['quantizer']
         node.inputs[2] = ''
         model.remove_node(beta_node, rewire=False)
 
         moving_mean_node = node.get_input_node(node.inputs[3])
         if not isinstance(moving_mean_node, Constant):
-            raise TypeError("Only consant moving_means supported")
-        moving_mean = moving_mean_node.value
+            raise TypeError('Only consant moving_means supported')
+        moving_mean = moving_mean_node.attributes['value']
         attributes['mean_data'] = moving_mean
+        attributes['mean_quantizer'] = moving_mean_node.get_attr['quantizer']
         node.inputs[3] = ''
         model.remove_node(moving_mean_node, rewire=False)
 
         moving_variance_node = node.get_input_node(node.inputs[4])
         if not isinstance(moving_variance_node, Constant):
-            raise TypeError("Only consant moving_variances supported")
-        moving_variance = moving_variance_node.value
+            raise TypeError('Only consant moving_variances supported')
+        moving_variance = moving_variance_node.attributes['value']
         attributes['variance_data'] = moving_variance
+        attributes['variance_quantizer'] = moving_variance_node.get_attr['quantizer']
         node.inputs[4] = ''
         model.remove_node(moving_variance_node, rewire=False)
 
-        # scale = gamma / np.sqrt(moving_variance + node.get_attr('epsilon'))
-        # bias = beta - gamma * moving_mean / np.sqrt(moving_variance + node.get_attr('epsilon'))
-        # attributes["scale_data"] = scale
-        # attributes["bias_data"] = bias
+        node.inputs = [inp for inp in node.inputs if inp]
+        if len(node.inputs) != 1:
+            raise RuntimeError('The QONNX batchnomr had unexpected inputs.')
 
         new_node = model.make_node(BatchNormalization, node.name, attributes, [node.inputs[0]], [x for x in node.outputs])
 
@@ -78,7 +86,6 @@ def match(self, node):
             isinstance(node, BatchNormalization)
             and not any(node.inputs[1:])
             and isinstance(node.get_input_node(node.inputs[0]), Constant)
-            and not node.get_input_node(node.inputs[0]).get_attr("quant_precision")
         )
         return is_match
 
@@ -88,13 +95,48 @@ def transform(self, model, node):
         """
         const_node = node.get_input_node(node.inputs[0])
 
-        new_val = const_node.value * node.weights["scale"].data_unquantized + node.weights["bias"].data_unquantized
-        const_node.set_attr("value", new_val)
-        const_node.set_attr("quantizer", node.get_attr("quantizer"))  # None if not defined
-        const_node.set_attr("quant_precision", node.get_attr("quant_precision"))
-
-        # reinitialize (which also runs quantization if quantizer exists)
-        const_node.initialize()
+        const_prec = const_node.get_output_variable().type.precision
+
+        new_val = const_node.value * node.weights['scale'].data_unquantized + node.weights['bias'].data_unquantized
+
+        const_node.set_attr('value', new_val)
+        const_node.set_attr('quantizer', node.get_attr('quantizer'))  # None if not defined
+
+        if isinstance(node.get_output_variable().type.precision, UnspecifiedPrecisionType):
+            if isinstance(const_prec, UnspecifiedPrecisionType):
+                pass  # leave it as is
+            else:
+                const_node.get_output_variable().type.precision = UnspecifiedPrecisionType()  # default
+                # propagate precision
+                scale_q = node.get_attr('scale_quantizer')
+                bias_q = node.get_attr('bias_quantizer')
+                if scale_q and bias_q:
+                    # propagate precsion
+                    scale_prec = scale_q.hls_type
+                    bias_prec = bias_q.hls_type
+                    if scale_prec not in (IntegerPrecisionType, FixedPrecisionType) or bias_prec not in (
+                        IntegerPrecisionType,
+                        FixedPrecisionType,
+                    ):
+                        print("Warning:  output type not propagated for constant merge")
+                    else:
+                        signed_prod = const_prec.signed or scale_prec.signed
+                        w_prod = const_prec.width + scale_prec.width
+                        i_prod = const_prec.integer + scale_prec.integer
+                        signed = signed_prod or bias_prec.signed
+                        i_tot = (
+                            max(
+                                i_prod + (bias_prec.signed and not signed_prod),
+                                bias_prec.ingeter + (signed_prod and not bias_prec.signed),
+                            )
+                            + 1
+                        )
+                        w_tot = i_tot + max(w_prod - i_prod, bias_prec.width - bias_prec.integer)
+                        new_prec = FixedPrecisionType(w_tot, i_tot, signed)
+                        const_node.set_attr('quantizer', QuantNodeQuantizer(new_prec))
+                        const_node.get_output_variable().type.precision = new_prec
+        else:
+            const_node.get_output_variable().type.precision = node.get_output_variable().type.precision
 
         # remove the batch norm node
         model.remove_node(node, rewire=True)
@@ -103,17 +145,21 @@ def transform(self, model, node):
 
 
 class FuseConsecutiveBatchNormalization(OptimizerPass):
-    '''
+    """
     OptimizerPass to merge consecutive BatchNormalization layers,
     only if the earlier one does not have quantization specified
-    '''
+
+    Note:  Consider restricting this to ApplyAlpha.  Batch Normalization quantization seems to be ignored.
+
+    Note:  This optimizer may not be safe if weights are updateable. May need to turn off.
+    """
 
     def match(self, node):
         prev_node = node.get_input_node(node.inputs[0])
         basic_match = (
-            isinstance(node, BatchNormalization)
-            and isinstance(prev_node, BatchNormalization)
-            and not prev_node.get_attr("quant_precision")
+            isinstance(node, ApplyAlpha)
+            and isinstance(prev_node, ApplyAlpha)
+            and isinstance(prev_node.get_output_variable().type.precision, UnspecifiedPrecisionType)
         )
 
         # check for compatibility to merge
@@ -123,12 +169,12 @@ def match(self, node):
             s1 = node.weights['scale'].data_unquantized
             b1 = node.weights['bias'].data_unquantized
             scale_compatible = (
-                (prev_node.get_attr("scale_quantizer") is None and node.get_attr("scale_quantizer") is None)
+                (prev_node.get_attr('scale_quantizer') is None and node.get_attr('scale_quantizer') is None)
                 or (s0 == np.ones_like(s0)).all()
                 or (s1 == np.ones_like(s1)).all()
             )
             bias_compatible = (
-                (prev_node.get_attr("bias_quantizer") is None and node.get_attr("bias_quantizer") is None)
+                (prev_node.get_attr('bias_quantizer') is None and node.get_attr('bias_quantizer') is None)
                 or (b0 == np.zeros_like(b0)).all()
                 or (b1 == np.zeros_like(b1)).all()
             )
@@ -139,31 +185,57 @@ def match(self, node):
     def transform(self, model, node):
         prev_node = node.get_input_node(node.inputs[0])
 
+        prev_map = prev_node.get_output_use_map()
+        if len(prev_map[prev_node.outputs[0]]) > 1:
+            return False
+
+        # # Not sure why this part is needed
+        # node_map = node.get_output_use_map()
+        # if len(node_map[node.outputs[0]]) > 1:
+        #     return False
+
         s0 = prev_node.weights['scale'].data_unquantized
         b0 = prev_node.weights['bias'].data_unquantized
         s1 = node.weights['scale'].data_unquantized
         b1 = node.weights['bias'].data_unquantized
 
         s_quantizer = (
-            node.get_attr("scale_quantizer") if (s0 == np.ones_like(s0)).all() else prev_node.get_attr("scale_quantizer")
+            node.get_attr('scale_quantizer') if (s0 == np.ones_like(s0)).all() else prev_node.get_attr('scale_quantizer')
         )
         b_quantizer = (
-            node.get_attr("bias_quantizer") if (b0 == np.zeros_like(b0)).all() else prev_node.get_attr("bias_quantizer")
+            node.get_attr('bias_quantizer') if (b0 == np.zeros_like(b0)).all() else prev_node.get_attr('bias_quantizer')
         )
 
-        node.set_attr("scale_quantizer", s_quantizer)
-        node.set_attr("bias_quantizer", b_quantizer)
-        if s_quantizer:
-            node.set_attr("scale_precision", s_quantizer.hls_type)
-        if b_quantizer:
-            node.set_attr("bias_precision", b_quantizer.hls_type)
+        node.set_attr('scale_quantizer', s_quantizer)
+        node.set_attr('bias_quantizer', b_quantizer)
 
         scale_new = s0 * s1
         bias_new = s1 * b0 + b1
 
+        # Not sure if this setting of this is useful
+        s_prec = None
+        if s_quantizer is None and (scale_new == np.ones_like(scale_new)).all():
+            if (
+                isinstance(prev_node.weights['scale'].type, IntegerPrecisionType)
+                and isinstance(node.weights['scale'].type, IntegerPrecisionType)
+                and prev_node.weights['scale'].type.width == 1
+                and node.weights['scale'].type.width == 1
+            ):
+                s_prec = node.weights['scale'].type
+
+        b_prec = None
+        if b_quantizer is None and (bias_new == np.zeros_like(bias_new)).all():
+            if (
+                isinstance(prev_node.weights['bias'].type, IntegerPrecisionType)
+                and isinstance(node.weights['bias'].type, IntegerPrecisionType)
+                and prev_node.weights['bias'].type.width == 1
+                and node.weights['bias'].type.width == 1
+            ):
+                b_prec = node.weights['bias'].type
+
         # call function so that quantizer would be called if needed
-        node.add_weights_variable(name='scale', var_name='s{index}', data=scale_new)
-        node.add_weights_variable(name='bias', var_name='b{index}', data=bias_new)
+        node.add_weights_variable(name='scale', var_name='s{index}', data=scale_new, quantizer=s_quantizer, precision=s_prec)
+        node.add_weights_variable(name='bias', var_name='b{index}', data=bias_new, quantizer=b_quantizer, precision=b_prec)
 
         model.remove_node(prev_node, rewire=True)
         return True
diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py
index 02d9b849ed..3d79de7dc8 100644
--- a/hls4ml/model/optimizer/passes/bn_fuse.py
+++ b/hls4ml/model/optimizer/passes/bn_fuse.py
@@ -1,25 +1,50 @@
+import numpy as np
+
 from hls4ml.model.layers import BatchNormalization, Conv1D, Conv2D, Dense
 from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import UnspecifiedPrecisionType
 
 
 class FuseBatchNormalization(OptimizerPass):
     def match(self, node):
-        is_match = (
+        prev_node = node.get_input_node(node.inputs[0])
+        basic_match = (
             isinstance(node, BatchNormalization)
-            and isinstance(node.get_input_node(), (Dense, Conv1D, Conv2D))
-            and node.get_input_node().get_attr('weight_quantizer') is None
-            and node.get_input_node().get_attr('bias_quantizer') is None
+            and isinstance(prev_node, (Dense, Conv1D, Conv2D))
+            and isinstance(prev_node.get_output_variable().type.precision, UnspecifiedPrecisionType)
         )
-        return is_match
+        if basic_match:
+            s0 = prev_node.weights['weight'].data_unquantized
+            b0 = prev_node.weights['bias'].data_unquantized
+            s1 = node.weights['scale'].data_unquantized
+            b1 = node.weights['bias'].data_unquantized
+            scale_compatible = (
+                (prev_node.get_attr('weight_quantizer') is None and node.get_attr('scale_quantizer') is None)
+                or (s0 == np.ones_like(s0)).all()
+                or (s1 == np.ones_like(s1)).all()
+            )
+            bias_compatible = (
+                (prev_node.get_attr('bias_quantizer') is None and node.get_attr('bias_quantizer') is None)
+                or (b0 == np.zeros_like(b0)).all()
+                or (b1 == np.zeros_like(b1)).all()
+            )
+            return scale_compatible and bias_compatible
+
+        else:
+            return False
 
     def transform(self, model, node):
-        # Fuse weight and bias of Dense/Conv1D/Conv2D layer with BN values
+        """Fuse weight and bias of Dense/Conv1D/Conv2D layer with BN values."""
         parent_node = node.get_input_node()
         parent_map = parent_node.get_output_use_map()
-        node_map = node.get_output_use_map()
-        if len(parent_map[parent_node.name]) > 1 or len(node_map[node.name]) > 1:
+        if len(parent_map[parent_node.outputs[0]]) > 1:
             return False
 
+        # # Not sure why this part is needed
+        # node_map = node.get_output_use_map()
+        # if len(node_map[node.outputs[0]]) > 1:
+        #     return False
+
         parent_weight = parent_node.weights['weight']
         parent_bias = parent_node.weights['bias']
 
diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
index efc5f3e89b..b61b0340be 100644
--- a/hls4ml/model/optimizer/passes/conv_to_convxd.py
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -2,6 +2,7 @@
 
 from hls4ml.model.layers import Constant, Conv, Conv1D, Conv2D
 from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.quantizers import QuantNodeQuantizer
 from hls4ml.model.types import IntegerPrecisionType
 
 # these are attributes to copy
@@ -48,13 +49,10 @@ def transform(self, model, node):
         """Convert Conv with constant to a Conv1D or Conv2D layer"""
 
         weight_node = node.get_input_node(node.inputs[1])
-        weight_precision = weight_node.get_attr('quant_precision')
         weight_data = weight_node.attributes['value']
         bias_node = None
-        bias_precision = None
         if len(node.inputs) == 3:
             bias_node = node.get_input_node(node.inputs[2])
-            bias_precision = bias_node.get_attr('quant_precision')
 
         # creating the attributes
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
@@ -66,16 +64,16 @@ def transform(self, model, node):
         else:
             newtype = Conv2D
             attributes['weight_data'] = np.transpose(weight_data, (1, 2, 3, 0))
-        attributes['weight_precision'] = weight_precision
         attributes['weight_quantizer'] = weight_node.get_attr('quantizer')
 
         if bias_node:
             attributes['bias_data'] = bias_node.attributes['value']
-            attributes['bias_precision'] = bias_precision
             attributes['bias_quantizer'] = bias_node.get_attr('quantizer')
+            attributes['have_bias'] = True
         else:
             attributes['bias_data'] = np.zeros(attributes['n_filt'])
-            attributes['bias_precision'] = IntegerPrecisionType(1, False)
+            attributes['bias_quantizer'] = QuantNodeQuantizer(IntegerPrecisionType(1, False))
+            attributes['have_bias'] = False
 
         # making new node
         new_node = model.make_node(
diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
index 2a89ea0130..7eac0ccca3 100644
--- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -2,6 +2,7 @@
 
 from hls4ml.model.layers import Constant, Dense, MatMul
 from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.quantizers import QuantNodeQuantizer
 from hls4ml.model.types import IntegerPrecisionType
 
 _base_attributes = ('Trace', 'reuse_factor', 'weight', 'weight_t', 'bias', 'bias_t')
@@ -27,7 +28,6 @@ def transform(self, model, node):
         other_var = node.get_input_variable(node.inputs[0])
 
         weight_data = const_node.attributes['value']
-        weight_precision = const_node.get_attr('quant_precision')
         weight_quantizer = const_node.get_attr('quantizer')
 
         in_shape = other_var.shape
@@ -40,10 +40,9 @@ def transform(self, model, node):
         attributes.update(
             {
                 'weight_data': weight_data,
-                'weight_precision': weight_precision,
                 'weight_quantizer': weight_quantizer,
                 'bias_data': np.zeros(out_shape),
-                'bias_precision': IntegerPrecisionType(1, False),
+                'bias_quantizer': QuantNodeQuantizer(IntegerPrecisionType(1, False)),
                 'have_bias': False,
                 'n_in': n_in,
                 'n_out': n_out,
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index 4b13982259..11848c9081 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -3,6 +3,7 @@
 from hls4ml.model.layers import ApplyAlpha, Constant, Merge
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.quantizers import QuantNodeQuantizer
+from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType
 
 _base_attributes = ('Trace', 'reuse_factor', 'n_in')
 
@@ -57,10 +58,6 @@ def transform(self, model, node):
 
         const_node0.set_attr('value', new_val)
 
-        quant_precision = node.get_attr('quant_precision')
-        if quant_precision:
-            const_node0.set_attr('quant_precision', quant_precision)
-
         # reinitialize (which also runs quantization if quantizer exists)
         const_node0.initialize()
 
@@ -101,6 +98,7 @@ def transform(self, model, node):
         input_shape = node.get_input_variable(node.inputs[input_node_idx]).shape
         n_in = np.prod(input_shape)
 
+        # Note:  precision is ignored if quantizer is not None
         scale_precision = None
         scale_quantizer = None
         bias_precision = None
@@ -109,30 +107,40 @@ def transform(self, model, node):
         op = node.attributes['op']
         if op in ('add', 'sum'):
             scale = np.array(1)
+            scale_precision = IntegerPrecisionType(1, False)
             bias = const_node.attribute['value']
-            bias_precision = const_node.get_attr('quant_precision')
             bias_quantizer = const_node.get_attr('quantizer')
         elif op == 'sub':
+            bias_quantizer = const_node.get_attr('quantizer')
             if node1const:
                 scale = np.array(1)
+                scale_precision = IntegerPrecisionType(1, False)
                 bias = -const_node.attribute['value']
+                if (
+                    bias_quantizer is not None
+                    and isinstance(bias_quantizer.hls_type, (IntegerPrecisionType, FixedPrecisionType))
+                    and not bias_quantizer.hls_type.signed
+                ):
+                    # need to make signed and increas the bit, if unsigned
+                    bias_precision = FixedPrecisionType(
+                        bias_quantizer.hls_type.width + 1,
+                        bias_quantizer.hls_type.integer + 1,
+                        True,
+                        bias_quantizer.hls_type.rounding_mode,
+                        bias_quantizer.hls_type.saturation_mode,
+                        bias_quantizer.hls_type.saturation_bits,
+                    )
+                    bias_quantizer = QuantNodeQuantizer(bias_precision)
             else:
                 scale = np.array(-1)
+                scale_precision = IntegerPrecisionType(2, True)
                 bias = const_node.attribute['value']
-            bias_precision = const_node.get_attr('quant_precision')
-            bias_quantizer = const_node.get_attr('quantizer')
-            if bias_precision and not bias_precision.signed:
-                # need to add a bit
-                bias_precision.signed = 1
-                bias_precision.width += 1
-                bias_precision.integer += 1
-                bias_quantizer = QuantNodeQuantizer(bias_precision)
 
         elif op == 'mul':
             scale = const_node.attribute['value']
-            bias = np.array(0)
-            scale_precision = const_node.get_attr('quant_precision')
             scale_quantizer = const_node.get_attr('quantizer')
+            bias = np.array(0)
+            bias_precision = IntegerPrecisionType(1, False)
 
         # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias
         if scale.shape != tuple(input_shape) and np.squeeze(scale).shape != tuple(input_shape):
@@ -155,12 +163,12 @@ def transform(self, model, node):
             }
         )
 
-        bn_layer = model.make_node(
+        aa_layer = model.make_node(
             ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[input_node_idx]], [x for x in node.outputs]
         )
 
         model.remove_node(const_node, rewire=False)
-        model.replace_node(node, bn_layer)
+        model.replace_node(node, aa_layer)
 
         return True
 
@@ -186,7 +194,23 @@ def transform(self, model, node):
         n_in = np.prod(input_shape)
         const_node = node.get_input_node(node.inputs[1])
         scale = 1 / const_node.attribute['value']
+        scale_quantizer = const_node.get_attr('quantizer')
+        if scale_quantizer:
+            scale_precision = scale_quantizer.hls_type
+            i_new = 1 + int(scale_precision.signed) + scale_precision.fractional
+            w_new = 1 + int(scale_precision.signed) + max(scale_precision.fractional, 0)
+            new_scale_precision = FixedPrecisionType(
+                w_new,
+                i_new,
+                scale_precision.signed,
+                rounding_mode=scale_precision.rounding_mode,
+                saturation_mode=scale_precision.saturation_mode,
+                saturation_bits=scale_precision.saturation_bits,
+            )
+            scale_quantizer = QuantNodeQuantizer(new_scale_precision)
+
         bias = np.array(0)
+        bias_precision = IntegerPrecisionType(1, False)
 
         # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias
         if scale.shape != tuple(input_shape) and np.squeeze(scale).shape != tuple(input_shape):
@@ -195,7 +219,17 @@ def transform(self, model, node):
             bias = np.broadcast_to(bias, input_shape)
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({'scale_data': scale, 'bias_data': bias, 'n_in': n_in, 'n_out': n_in, 'n_filt': -1})
+        attributes.update(
+            {
+                'scale_data': scale,
+                'bias_data': bias,
+                'scale_quantizer': scale_quantizer,
+                'bias_precision': bias_precision,
+                'n_in': n_in,
+                'n_out': n_in,
+                'n_filt': -1,
+            }
+        )
 
         bn_layer = model.make_node(ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs])
 
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index e49ff99bd7..79d92ec4d1 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -131,7 +131,7 @@ def transform(self, model, node):
         precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode)
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer})
+        attributes.update({'activation': 'linear', 'quantizer': quantizer})
 
         new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs])
         new_node.get_output_variable().type.precision = precision
@@ -187,7 +187,6 @@ def transform(self, model, node):
         precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode)
 
         const_node = node.get_input_node(node.inputs[0])
-        const_node.set_attr('quant_precision', precision)
         const_node.set_attr('quantizer', quantizer)
         const_node.get_output_variable().type.precision = precision
 
@@ -229,7 +228,7 @@ def transform(self, model, node):
         precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode)
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer})
+        attributes.update({'activation': 'linear', 'quantizer': quantizer})
 
         new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs])
         new_node.get_output_variable().type.precision = precision
@@ -303,7 +302,6 @@ def transform(self, model, node):
         # caclucate the new value
         new_val = const_node.get_attr('value') / scale + bias
         const_node.set_attr('value', new_val)
-        const_node.set_attr('quant_precision', precision)
         const_node.set_attr('quantizer', quantizer)
 
         # reinitialize (which also runs quantization if quantizer exists)
diff --git a/hls4ml/model/quantizers.py b/hls4ml/model/quantizers.py
index c0a5869d5b..cadcdbbc3d 100644
--- a/hls4ml/model/quantizers.py
+++ b/hls4ml/model/quantizers.py
@@ -102,7 +102,7 @@ def __init__(self, config):
             self.bits = 1
             self.hls_type = XnorPrecisionType()
         else:
-            print("Unsupported quantizer: " + config['class_name'])
+            print('Unsupported quantizer: ' + config['class_name'])
             self.bits = 16
             self.hls_type = FixedPrecisionType(width=16, integer=6, signed=True)
 
@@ -177,8 +177,8 @@ class QuantNodeQuantizer(Quantizer):
 
     def __init__(self, precision):
         super().__init__(precision.width, precision)
-        if not isinstance(precision, FixedPrecisionType):
-            raise TypeError("QuantNodeQuantizer is only defined for FixedPrecisionType")
+        if not isinstance(precision, (FixedPrecisionType, IntegerPrecisionType)):
+            raise TypeError('QuantNodeQuantizer is only defined for FixedPrecisionType and IntegerPrecisionType')
 
     def __call__(self, data):
         """Apply the quantization on the data"""
@@ -216,7 +216,7 @@ def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int:
             int(0)
         """
         if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT):
-            raise ValueError(f"Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported")
+            raise ValueError(f'Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported')
         if signed and saturation_mode == SaturationMode.SAT_SYM:
             value = -(2 ** (bit_width - 1)) + 1
         elif signed:
@@ -253,9 +253,9 @@ def _resolve_rounding_mode(mode):
         to the corresponding numpy functions."""
         if mode == RoundingMode.RND_CONV:
             return np.round
-        # elif mode_string == "CEIL":   # not supported
+        # elif mode_string == 'CEIL':   # not supported
         #     return np.ceil
         elif mode == RoundingMode.TRN:
             return np.floor
         else:
-            raise ValueError(f"Rounding mode {mode} not supported.")
+            raise ValueError(f'Rounding mode {mode} not supported.')
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
index 8c182f4cca..f9e75a7d87 100644
--- a/hls4ml/model/types.py
+++ b/hls4ml/model/types.py
@@ -81,7 +81,6 @@ class IntegerPrecisionType(PrecisionType):
 
     def __init__(self, width=16, signed=True):
         super().__init__(width=width, signed=signed)
-        self.integer = width
         self.fractional = 0
 
     def __str__(self):
@@ -96,6 +95,22 @@ def __eq__(self, other):
         eq = eq and self.fractional == other.fractional
         return eq
 
+    @property
+    def integer(self):
+        return self.width
+
+    @property
+    def rounding_mode(self):
+        return RoundingMode.TRN
+
+    @property
+    def saturation_mode(self):
+        return SaturationMode.WRAP
+
+    @property
+    def saturation_bits(self):
+        return None
+
 
 class FixedPrecisionType(PrecisionType):
     """Arbitrary precision fixed-point data type.
@@ -114,11 +129,15 @@ class FixedPrecisionType(PrecisionType):
     def __init__(self, width=16, integer=6, signed=True, rounding_mode=None, saturation_mode=None, saturation_bits=None):
         super().__init__(width=width, signed=signed)
         self.integer = integer
-        self.fractional = width - integer
         self.rounding_mode = rounding_mode
         self.saturation_mode = saturation_mode
         self.saturation_bits = saturation_bits
 
+    # make this a property to avoid inconsistencies
+    @property
+    def fractional(self):
+        self.width - self.integer
+
     @property
     def rounding_mode(self):
         return self._rounding_mode

From 81f3e53533984ca67e24a1bd485b3135910e9e2e Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 7 Feb 2024 19:44:00 -0600
Subject: [PATCH 17/59] bug fixes from attempting to run

---
 hls4ml/model/layers.py                        |  2 +-
 hls4ml/model/optimizer/__init__.py            |  1 +
 .../model/optimizer/passes/batchnorm_opt.py   | 29 +++++++++
 hls4ml/model/optimizer/passes/bn_fuse.py      | 65 +++++++++++++++++--
 hls4ml/model/optimizer/passes/linear.py       | 12 ++--
 .../optimizer/passes/matmul_const_to_dense.py |  1 +
 hls4ml/model/optimizer/passes/merge_const.py  | 13 ++--
 hls4ml/model/optimizer/passes/move_scales.py  |  4 +-
 hls4ml/model/quantizers.py                    |  6 +-
 hls4ml/model/types.py                         | 17 +++--
 10 files changed, 124 insertions(+), 26 deletions(-)

diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index b5d9f492af..ebf7af2124 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -403,7 +403,7 @@ def initialize(self):
             # for QONNX, remove batch dimension
             # (onnx cleaning should have removed reshapes not on data path)
             if isinstance(shape_node, Constant):
-                target_shape = shape_node.value[1:]
+                target_shape = shape_node.attributes['value'][1:]
             else:
                 raise RuntimeError("Reshape for ONNX requires the target shape to be a second input.")
 
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index ebe4934029..bd4da19071 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -69,6 +69,7 @@
         'eliminate_linear_activation',
         'fuse_consecutive_batch_normalization',
         'fuse_batch_normalization',
+        'remove_nop_batch_normalization',
         'replace_multidimensional_dense_with_conv',
         'infer_precision_types',
         'set_precision_concat',
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index 3e0984dccb..f633d763c8 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -194,6 +194,15 @@ def transform(self, model, node):
         # if len(node_map[node.outputs[0]]) > 1:
         #     return False
 
+        # only merge if the types are integer or fixed
+        if (
+            not isinstance(prev_node.weights['scale'].type, (IntegerPrecisionType, FixedPrecisionType))
+            or not isinstance(prev_node.weights['bias'].type, (IntegerPrecisionType, FixedPrecisionType))
+            or not isinstance(node.weights['scale'].type, (IntegerPrecisionType, FixedPrecisionType))
+            or not isinstance(node.weights['bias'].type, (IntegerPrecisionType, FixedPrecisionType))
+        ):
+            return False
+
         s0 = prev_node.weights['scale'].data_unquantized
         b0 = prev_node.weights['bias'].data_unquantized
         s1 = node.weights['scale'].data_unquantized
@@ -239,3 +248,23 @@ def transform(self, model, node):
 
         model.remove_node(prev_node, rewire=True)
         return True
+
+
+class RemoveNopBatchNormalization(OptimizerPass):
+    """
+    OptimizerPass to remove batch normalizations that do nothing (scale 1, bias 0)
+
+    Note:  This optimizer may not be safe if weights are updateable.
+    """
+
+    def match(self, node):
+        if isinstance(node, BatchNormalization):
+            s0 = node.weights['scale'].data_unquantized
+            b0 = node.weights['bias'].data_unquantized
+            return (s0 == np.ones_like(s0)).all() and (b0 == np.zeros_like(b0)).all()
+        else:
+            return False
+
+    def transform(self, model, node):
+        model.remove_node(node, rewire=True)
+        return True
diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py
index 3d79de7dc8..a636af2f86 100644
--- a/hls4ml/model/optimizer/passes/bn_fuse.py
+++ b/hls4ml/model/optimizer/passes/bn_fuse.py
@@ -2,10 +2,19 @@
 
 from hls4ml.model.layers import BatchNormalization, Conv1D, Conv2D, Dense
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import UnspecifiedPrecisionType
+from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, UnspecifiedPrecisionType
 
 
 class FuseBatchNormalization(OptimizerPass):
+    """
+    OptimizerPass to merge BatchNormalization layers,
+    only if the earlier one does not have quantization specified
+
+    Note:  Consider restricting this to ApplyAlpha.  Batch Normalization quantization seems to be ignored.
+
+    Note:  This optimizer may not be safe if weights are updateable. May need to turn off.
+    """
+
     def match(self, node):
         prev_node = node.get_input_node(node.inputs[0])
         basic_match = (
@@ -51,13 +60,59 @@ def transform(self, model, node):
         bn_scale = node.weights['scale']
         bn_bias = node.weights['bias']
 
+        # only merge if the types are integer or fixed
+        if (
+            not isinstance(parent_weight.type, (IntegerPrecisionType, FixedPrecisionType))
+            or not isinstance(parent_bias.type, (IntegerPrecisionType, FixedPrecisionType))
+            or not isinstance(bn_scale.type, (IntegerPrecisionType, FixedPrecisionType))
+            or not isinstance(bn_bias.type, (IntegerPrecisionType, FixedPrecisionType))
+        ):
+            return False
+
         fused_weight = bn_scale.data * parent_weight.data
         fused_bias = bn_scale.data * parent_bias.data + bn_bias.data
 
+        w_quantizer = (
+            node.get_attr('scale_quantizer')
+            if (parent_weight.data == np.ones_like(parent_weight.data)).all()
+            else parent_node.get_attr('weight_quantizer')
+        )
+        b_quantizer = (
+            node.get_attr('bias_quantizer')
+            if (parent_bias.data == np.zeros_like(parent_bias.data)).all()
+            else parent_node.get_attr('bias_quantizer')
+        )
+
+        node.set_attr('weight_quantizer', w_quantizer)
+        node.set_attr('bias_quantizer', b_quantizer)
+
+        # Not sure if this setting of this is useful
+        w_prec = None
+        if w_quantizer is None and (fused_weight == np.ones_like(fused_weight)).all():
+            if (
+                isinstance(parent_weight.type, IntegerPrecisionType)
+                and isinstance(bn_scale.type, IntegerPrecisionType)
+                and parent_weight.type.width == 1
+                and bn_scale.type.width == 1
+            ):
+                w_prec = node.weights['scale'].type
+
+        b_prec = None
+        if b_quantizer is None and (fused_bias == np.zeros_like(fused_bias)).all():
+            if (
+                isinstance(parent_bias.type, IntegerPrecisionType)
+                and isinstance(bn_bias.type, IntegerPrecisionType)
+                and parent_bias.type.width == 1
+                and bn_bias.type.width == 1
+            ):
+                b_prec = node.weights['bias'].type
+
+        # call function so that quantizer would be called if needed
+        node.add_weights_variable(
+            name='weight', var_name='w{index}', data=fused_weight, quantizer=w_quantizer, precision=w_prec
+        )
+        node.add_weights_variable(name='bias', var_name='b{index}', data=fused_bias, quantizer=b_quantizer, precision=b_prec)
+
         model.remove_node(node, rewire=True)
-        parent_weight.data = fused_weight
-        parent_bias.data = fused_bias
-        if not parent_node.get_attr('use_bias', True):
-            parent_bias.update_precision(bn_bias.type.precision)
 
         return True
diff --git a/hls4ml/model/optimizer/passes/linear.py b/hls4ml/model/optimizer/passes/linear.py
index 72d6dade9f..78a808b9a1 100644
--- a/hls4ml/model/optimizer/passes/linear.py
+++ b/hls4ml/model/optimizer/passes/linear.py
@@ -1,5 +1,6 @@
 from hls4ml.model.layers import Activation, BatchNormalization, Conv1D, Conv2D, Dense
 from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import UnspecifiedPrecisionType
 
 
 class EliminateLinearActivation(OptimizerPass):
@@ -14,7 +15,6 @@ def transform(self, model, node):
         return True
 
 
-# TODO:  Move migrate this to auto precisoin check from quant precision check
 class MergeLinearActivation(OptimizerPass):
     '''
     For many objects it's safe to change the output precision independently of the calculation.
@@ -27,16 +27,14 @@ def match(self, node):
         if isinstance(node, Activation) and node.get_attr('activation') == 'linear':
             parent = node.get_input_node(node.inputs[0])
             safe_parent = isinstance(parent, (Dense, Conv1D, Conv2D, BatchNormalization))
-            parent_type_fixed = parent.get_attr("quant_precision")
-            return safe_parent and not parent_type_fixed
+            return safe_parent and isinstance(parent.get_output_variable().type.precision, UnspecifiedPrecisionType)
         else:
             return False
 
     def transform(self, model, node):
         prev_node = node.get_input_node(node.inputs[0])
-        quant_precision = node.get_attr("quant_precision")
-        prev_node.set_attr("quant_precision", quant_precision)
-        prev_node.set_attr("quantizer", node.get_attr("quantizer"))
-        prev_node.update_output_precision(quant_precision)
+        quantizer = node.get_attr("quantizer")
+        prev_node.set_attr("quantizer", quantizer)
+        prev_node.update_output_precision(quantizer.hls_type)
         model.remove_node(node)
         return True
diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
index 7eac0ccca3..889a376cee 100644
--- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -54,6 +54,7 @@ def transform(self, model, node):
 
         # removing and replacing old nodes
         model.remove_node(const_node, rewire=False)
+        del node.inputs[1]
         model.replace_node(node, new_dense)
 
         return True
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index 11848c9081..8ffe053866 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -91,9 +91,11 @@ def transform(self, model, node):
         if node1const:
             const_node = node1
             input_node_idx = 0
+            const_node_idx = 1
         else:
             const_node = node.get_input_node(node.inputs[0])
             input_node_idx = 1
+            const_node_idx = 0
 
         input_shape = node.get_input_variable(node.inputs[input_node_idx]).shape
         n_in = np.prod(input_shape)
@@ -108,14 +110,14 @@ def transform(self, model, node):
         if op in ('add', 'sum'):
             scale = np.array(1)
             scale_precision = IntegerPrecisionType(1, False)
-            bias = const_node.attribute['value']
+            bias = const_node.attributes['value']
             bias_quantizer = const_node.get_attr('quantizer')
         elif op == 'sub':
             bias_quantizer = const_node.get_attr('quantizer')
             if node1const:
                 scale = np.array(1)
                 scale_precision = IntegerPrecisionType(1, False)
-                bias = -const_node.attribute['value']
+                bias = -const_node.attributes['value']
                 if (
                     bias_quantizer is not None
                     and isinstance(bias_quantizer.hls_type, (IntegerPrecisionType, FixedPrecisionType))
@@ -134,10 +136,10 @@ def transform(self, model, node):
             else:
                 scale = np.array(-1)
                 scale_precision = IntegerPrecisionType(2, True)
-                bias = const_node.attribute['value']
+                bias = const_node.attributes['value']
 
         elif op == 'mul':
-            scale = const_node.attribute['value']
+            scale = const_node.attributes['value']
             scale_quantizer = const_node.get_attr('quantizer')
             bias = np.array(0)
             bias_precision = IntegerPrecisionType(1, False)
@@ -168,6 +170,7 @@ def transform(self, model, node):
         )
 
         model.remove_node(const_node, rewire=False)
+        del node.inputs[const_node_idx]
         model.replace_node(node, aa_layer)
 
         return True
@@ -193,7 +196,7 @@ def transform(self, model, node):
         input_shape = node.get_input_variable().shape
         n_in = np.prod(input_shape)
         const_node = node.get_input_node(node.inputs[1])
-        scale = 1 / const_node.attribute['value']
+        scale = 1 / const_node.attributes['value']
         scale_quantizer = const_node.get_attr('quantizer')
         if scale_quantizer:
             scale_precision = scale_quantizer.hls_type
diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py
index e97fd89947..fe1acb7f94 100644
--- a/hls4ml/model/optimizer/passes/move_scales.py
+++ b/hls4ml/model/optimizer/passes/move_scales.py
@@ -16,7 +16,7 @@ class ScaleDownMatMul(OptimizerPass):
     def match(self, node):
         '''
         Check to see if we have a MatMul with at least one input ApplyAlpha.
-        Note, if both are this optimition runs twice.
+        Note, if both are this optimizer runs twice.
         '''
         is_match = (
             isinstance(node, MatMul)
@@ -85,7 +85,7 @@ def transform(self, model, node):
             try:
                 np.broadcast_to(scale, output.shape)  # check size compatibility
                 newscale = scale
-                newbias = inp[other_idx].value * bias
+                newbias = inp[other_idx].attributes['value'] * bias
                 np.broadcast_to(newbias, output.shape)
                 can_propagate = True
             except ValueError:
diff --git a/hls4ml/model/quantizers.py b/hls4ml/model/quantizers.py
index cadcdbbc3d..daae66fe45 100644
--- a/hls4ml/model/quantizers.py
+++ b/hls4ml/model/quantizers.py
@@ -215,8 +215,10 @@ def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int:
             >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8)
             int(0)
         """
-        if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT):
-            raise ValueError(f'Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported')
+        if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT, SaturationMode.WRAP):
+            raise ValueError(
+                f'Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported, WRAP partially'
+            )
         if signed and saturation_mode == SaturationMode.SAT_SYM:
             value = -(2 ** (bit_width - 1)) + 1
         elif signed:
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
index f9e75a7d87..9fe6867262 100644
--- a/hls4ml/model/types.py
+++ b/hls4ml/model/types.py
@@ -81,12 +81,12 @@ class IntegerPrecisionType(PrecisionType):
 
     def __init__(self, width=16, signed=True):
         super().__init__(width=width, signed=signed)
-        self.fractional = 0
 
     def __str__(self):
         typestring = '{signed}int<{width}>'.format(signed='u' if not self.signed else '', width=self.width)
         return typestring
 
+    # Does this need to make sure other is also an IntegerPrecisionType? I could see a match between Fixed and Integer
     def __eq__(self, other):
         eq = self.width == other.width
         eq = eq and self.signed == other.signed
@@ -99,6 +99,10 @@ def __eq__(self, other):
     def integer(self):
         return self.width
 
+    @property
+    def fractional(self):
+        return 0
+
     @property
     def rounding_mode(self):
         return RoundingMode.TRN
@@ -134,9 +138,10 @@ def __init__(self, width=16, integer=6, signed=True, rounding_mode=None, saturat
         self.saturation_bits = saturation_bits
 
     # make this a property to avoid inconsistencies
+
     @property
     def fractional(self):
-        self.width - self.integer
+        return self.width - self.integer
 
     @property
     def rounding_mode(self):
@@ -144,7 +149,9 @@ def rounding_mode(self):
 
     @rounding_mode.setter
     def rounding_mode(self, mode):
-        if isinstance(mode, str):
+        if mode is None:
+            self._rounding_mode = RoundingMode.TRN
+        elif isinstance(mode, str):
             self._rounding_mode = RoundingMode.from_string(mode)
         else:
             self._rounding_mode = mode
@@ -155,7 +162,9 @@ def saturation_mode(self):
 
     @saturation_mode.setter
     def saturation_mode(self, mode):
-        if isinstance(mode, str):
+        if mode is None:
+            self._saturation_mode = SaturationMode.WRAP
+        elif isinstance(mode, str):
             self._saturation_mode = SaturationMode.from_string(mode)
         else:
             self._saturation_mode = mode

From 9a74e46e33a715054496b408870675a35d4e19df Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Sun, 11 Feb 2024 18:07:24 -0600
Subject: [PATCH 18/59] fix some bugs from qonnx pytest

---
 .../model/optimizer/passes/batchnorm_opt.py   | 19 ++++++++++---------
 hls4ml/model/optimizer/passes/merge_const.py  |  2 ++
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index f633d763c8..ee00ecfa46 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from hls4ml.model.layers import ApplyAlpha, BatchNormalization, BatchNormOnnx, Constant
+from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.quantizers import QuantNodeQuantizer
 from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, UnspecifiedPrecisionType
@@ -33,7 +33,7 @@ def transform(self, model, node):
             raise TypeError('Only consant gammas supported')
         gamma = gamma_node.attributes['value']
         attributes['gamma_data'] = gamma
-        attributes['gamma_quantizer'] = gamma_node.get_attr['quantizer']
+        attributes['gamma_quantizer'] = gamma_node.get_attr('quantizer')
 
         node.inputs[1] = ''
         model.remove_node(gamma_node, rewire=False)
@@ -43,7 +43,7 @@ def transform(self, model, node):
             raise TypeError('Only consant betas supported')
         beta = beta_node.attributes['value']
         attributes['beta_data'] = beta
-        attributes['beta_quantizer'] = beta_node.get_attr['quantizer']
+        attributes['beta_quantizer'] = beta_node.get_attr('quantizer')
         node.inputs[2] = ''
         model.remove_node(beta_node, rewire=False)
 
@@ -52,7 +52,7 @@ def transform(self, model, node):
             raise TypeError('Only consant moving_means supported')
         moving_mean = moving_mean_node.attributes['value']
         attributes['mean_data'] = moving_mean
-        attributes['mean_quantizer'] = moving_mean_node.get_attr['quantizer']
+        attributes['mean_quantizer'] = moving_mean_node.get_attr('quantizer')
         node.inputs[3] = ''
         model.remove_node(moving_mean_node, rewire=False)
 
@@ -61,13 +61,13 @@ def transform(self, model, node):
             raise TypeError('Only consant moving_variances supported')
         moving_variance = moving_variance_node.attributes['value']
         attributes['variance_data'] = moving_variance
-        attributes['variance_quantizer'] = moving_variance_node.get_attr['quantizer']
+        attributes['variance_quantizer'] = moving_variance_node.get_attr('quantizer')
         node.inputs[4] = ''
         model.remove_node(moving_variance_node, rewire=False)
 
         node.inputs = [inp for inp in node.inputs if inp]
         if len(node.inputs) != 1:
-            raise RuntimeError('The QONNX batchnomr had unexpected inputs.')
+            raise RuntimeError('The QONNX batchnorm had unexpected inputs.')
 
         new_node = model.make_node(BatchNormalization, node.name, attributes, [node.inputs[0]], [x for x in node.outputs])
 
@@ -76,6 +76,7 @@ def transform(self, model, node):
         return True
 
 
+# Most likely this case is removed by qonnx cleaning
 class ConstantBatchNormFusion(OptimizerPass):
     """
     Merge BatchNorm into Const (after parameters have already been merged in BatchNormalization)
@@ -149,7 +150,7 @@ class FuseConsecutiveBatchNormalization(OptimizerPass):
     OptimizerPass to merge consecutive BatchNormalization layers,
     only if the earlier one does not have quantization specified
 
-    Note:  Consider restricting this to ApplyAlpha.  Batch Normalization quantization seems to be ignored.
+    Note:  Consider restricting this to ApplyAlpha.  Batch Normalization-style quantization seems to be ignored.
 
     Note:  This optimizer may not be safe if weights are updateable. May need to turn off.
     """
@@ -157,8 +158,8 @@ class FuseConsecutiveBatchNormalization(OptimizerPass):
     def match(self, node):
         prev_node = node.get_input_node(node.inputs[0])
         basic_match = (
-            isinstance(node, ApplyAlpha)
-            and isinstance(prev_node, ApplyAlpha)
+            isinstance(node, BatchNormalization)
+            and isinstance(prev_node, BatchNormalization)
             and isinstance(prev_node.get_output_variable().type.precision, UnspecifiedPrecisionType)
         )
 
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index 8ffe053866..25bd59bda6 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -8,6 +8,7 @@
 _base_attributes = ('Trace', 'reuse_factor', 'n_in')
 
 
+# This should generally not happen because of qonnx cleaning
 class MergeTwoConstants(OptimizerPass):
     """Merge of two constants makes another constant"""
 
@@ -237,6 +238,7 @@ def transform(self, model, node):
         bn_layer = model.make_node(ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs])
 
         model.remove_node(const_node, rewire=False)
+        del node.inputs[1]
         model.replace_node(node, bn_layer)
 
         return True

From 60a74bb49e906149f64401678bcb7f0ba4e4eff4 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 12 Feb 2024 09:59:48 -0600
Subject: [PATCH 19/59] fix assertion of not matching the number of inputs when
 replacing node

---
 hls4ml/model/optimizer/passes/conv_to_convxd.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
index b61b0340be..e54c98c1d7 100644
--- a/hls4ml/model/optimizer/passes/conv_to_convxd.py
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -81,9 +81,11 @@ def transform(self, model, node):
         )
 
         # removing and replacing old nodes
-        model.remove_node(weight_node, rewire=False)
         if bias_node:
             model.remove_node(bias_node, rewire=False)
+            del node.inputs[2]
+        model.remove_node(weight_node, rewire=False)
+        del node.inputs[1]
         model.replace_node(node, new_node)
 
         return True

From 88a8d351b158145ef2d1f6d0a9daed9b159a7241 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 29 Feb 2024 16:36:54 -0600
Subject: [PATCH 20/59] update some precisions inference

---
 .../model/optimizer/passes/infer_precision.py | 121 ++++++++++++++++--
 1 file changed, 109 insertions(+), 12 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py
index 6f6a72097f..c660647d3b 100644
--- a/hls4ml/model/optimizer/passes/infer_precision.py
+++ b/hls4ml/model/optimizer/passes/infer_precision.py
@@ -1,11 +1,12 @@
 import math
-from copy import deepcopy
 
 import numpy as np
 
 from hls4ml.model.optimizer import ConfigurableOptimizerPass
 from hls4ml.model.types import FixedPrecisionType, UnspecifiedPrecisionType
 
+# TODO:  The code assumes everything is Fixed or Integer precision. Need to add checks
+
 
 class InferPrecisionTypes(ConfigurableOptimizerPass):
     def __init__(self):
@@ -36,7 +37,7 @@ def _infer_precision(self, node, types_to_infer):
         if node_class in ['Dense']:
             return self._infer_dense_precision(node, types_to_infer)
 
-        if node_class in ['BatchNormalization']:
+        if node_class in ['BatchNormalization', 'ApplyAlpha']:
             return self._infer_bn_precision(node, types_to_infer)
 
         if node_class in ['Conv1D', 'Conv2D', 'PointwiseConv1D', 'PointwiseConv2D', 'Conv2DBatchnorm']:
@@ -51,9 +52,15 @@ def _infer_precision(self, node, types_to_infer):
         if node_class in ['Clone', 'Reshape', 'Resize', 'Transpose', 'ZeroPadding1D', 'ZeroPadding2D']:
             return self._infer_output_matching_precision(node, types_to_infer)
 
-        if node_class in ['Concatenate', 'Merge']:
+        if node_class in ['Merge']:
             return self._infer_merge_precision(node, types_to_infer)
 
+        if node_class in ['Concatenate']:
+            return self._infer_cat_precision(node, types_to_infer)
+
+        if node_class in ['Dot']:
+            return self._infer_dot_precision(node, types_to_infer)
+
         # What about quantized activation layer? Setting it to 'auto' manually will break it here. We should prevent
         # this in config_from_* functions
 
@@ -124,6 +131,7 @@ def _infer_common_precision(self, node, types_to_infer, n_ops):
             bitwidth = integers + max(frac, bias_width - bias_integers)
             signed = signed or bias_signed
 
+        # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
         new_type = FixedPrecisionType(bitwidth, integers, signed)
 
         if 'accum_t' in types_to_infer:
@@ -225,6 +233,11 @@ def _infer_sepconv_precision(self, node, types_to_infer):
         return inferred_types
 
     def _infer_bn_precision(self, node, types_to_infer):
+        """
+        The batchnormalziation precision here is the more implementation-focused version. It propagates
+        precision from scale and bias, not mean, variance, etc.
+        """
+
         inferred_types = []
 
         if 'scale_t' in types_to_infer:
@@ -238,16 +251,28 @@ def _infer_bn_precision(self, node, types_to_infer):
             inferred_types.append('bias_t')
 
         if 'result_t' in types_to_infer:
+            input_precision = node.get_input_variable().type.precision
             scale_precision = node.types['scale_t'].precision
             bias_precision = node.types['bias_t'].precision
 
-            out_precision = deepcopy(node.get_input_node().get_output_variable().type.precision)
-            out_precision.integer += scale_precision.integer
-            out_precision.fractional = max(out_precision.fractional, scale_precision.fractional)
+            after_scale_signed = scale_precision.signed or input_precision.signed
+            after_scale_width = input_precision.width + scale_precision.width
+            after_scale_integer = input_precision.integer + scale_precision.integer
+
+            out_precision_signed = after_scale_signed or bias_precision.signed
+            out_precision_integer = (
+                max(
+                    after_scale_integer + (bias_precision.signed and not after_scale_signed),
+                    bias_precision.integer + (after_scale_signed and not bias_precision.signed),
+                )
+                + 1
+            )
+            out_precision_width = out_precision_integer + max(
+                after_scale_width - after_scale_integer, bias_precision.fractional
+            )
 
-            out_precision.integer = max(out_precision.integer, bias_precision.integer) + 1
-            out_precision.fractional = max(out_precision.fractional, bias_precision.fractional)
-            out_precision.width = out_precision.fractional + out_precision.integer
+            # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
+            out_precision = FixedPrecisionType(out_precision_width, out_precision_integer, out_precision_signed)
 
             node.types['result_t'].name = node.name + '_result_t'
             node.types['result_t'].precision = out_precision
@@ -288,10 +313,82 @@ def _infer_merge_precision(self, node, types_to_infer):
         input_1 = node.get_input_variable(node.inputs[0]).type.precision
         input_2 = node.get_input_variable(node.inputs[1]).type.precision
 
-        new_width = max(input_1.fractional, input_2.fractional) + max(input_1.integer, input_2.integer)
-        new_int = max(input_1.integer, input_2.integer)
+        op = node.get_attr('op').lower()
+        if op in ('add', 'subtract', 'average'):
+            new_signed = input_1.signed or input_2.signed or op == 'subtract'
+            new_int = (
+                max(
+                    input_1.integer + (input_2.signed and not input_1.signed),
+                    input_2.integer + (input_1.signed and not input_2.signed),
+                )
+                + 1
+            )
+            new_width = new_int + max(input_1.fractional, input_2.fractional)
+
+        elif op == 'multiply':
+            new_signed = input_1.signed or input_2.signed
+            new_int = input_1.integer + input_2.integer
+            new_width = input_1.width + input_2.width
+        elif op in ('maximum', 'minimum'):
+            new_signed = input_1.signed or input_2.signed
+
+            input_1_integer = input_1.integer
+            input_2_integer = input_2.integer
+
+            # add one to integer if unsigned while new is signed
+            if new_signed and not input_1.signed:
+                input_1_integer += 1
+            if new_signed and not input_2.signed:
+                input_2_integer += 1
+
+            new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer)
+            new_int = max(input_1_integer, input_2_integer)
+
+        out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+        node.types['result_t'].name = node.name + '_result_t'
+        node.types['result_t'].precision = out_precision
+
+        return ['result_t']
+
+    def _infer_cat_precision(self, node, types_to_infer):
+        assert 'result_t' in types_to_infer and len(types_to_infer) == 1
+
+        input_1 = node.get_input_variable(node.inputs[0]).type.precision
+        input_2 = node.get_input_variable(node.inputs[1]).type.precision
+
+        new_signed = input_1.signed or input_2.signed
+
+        input_1_integer = input_1.integer
+        input_2_integer = input_2.integer
+
+        # add one to integer if unsigned while new is signed
+        if new_signed and not input_1.signed:
+            input_1_integer += 1
+        if new_signed and not input_2.signed:
+            input_2_integer += 1
+
+        new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer)
+        new_int = max(input_1_integer, input_2_integer)
+
+        out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+        node.types['result_t'].name = node.name + '_result_t'
+        node.types['result_t'].precision = out_precision
+
+        return ['result_t']
+
+    def _infer_dot_precision(self, node, types_to_infer):
+        assert 'result_t' in types_to_infer and len(types_to_infer) == 1
+
+        input_1 = node.get_input_variable(node.inputs[0]).type.precision
+        input_2 = node.get_input_variable(node.inputs[1]).type.precision
+
+        n_in = node.get_input_variable(node.inputs[0]).shape[0]
+
+        new_signed = input_1.signed or input_2.signed
+        new_width = input_1.width + input_2.width + math.ceil(np.log2(n_in))
+        new_int = input_1.integer + input_2.integer + math.ceil(np.log2(n_in))
 
-        out_precision = FixedPrecisionType(new_width, new_int)
+        out_precision = FixedPrecisionType(new_width, new_int, new_signed)
         node.types['result_t'].name = node.name + '_result_t'
         node.types['result_t'].precision = out_precision
 

From 10a3c500b79ad1b4fded24c860f41ad9732a4afb Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 29 Feb 2024 17:33:24 -0600
Subject: [PATCH 21/59] extract bitwidth from size 1 array in quant node

---
 hls4ml/model/optimizer/passes/quant_opt.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index 79d92ec4d1..0d02124bc6 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -13,6 +13,7 @@
 and Linear nodes are immediately merged into the Constant.
 
 """
+
 import math  # prefer to use math.ceil for scalar values
 
 import numpy as np
@@ -67,7 +68,7 @@ def transform(self, model, node):
                 bitwidth = bitwidth_node.get_attr('value')
                 if bitwidth.size != 1:
                     raise RuntimeError('Only scalar bitwidth values are supporeted by the Quant node')
-                node.set_attr('bitwidth', bitwidth)
+                node.set_attr('bitwidth', bitwidth[0])
                 node.inputs[3] = ''
                 model.remove_node(bitwidth_node, rewire=False)
 

From ab8d67b2ce9318106203d99dba12533570f0494d Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 1 Mar 2024 19:55:08 -0600
Subject: [PATCH 22/59] update automatic onnx configuration

---
 hls4ml/converters/__init__.py    |  1 +
 hls4ml/converters/onnx_to_hls.py | 38 +++++++++++++-----
 hls4ml/model/graph.py            |  3 +-
 hls4ml/utils/config.py           | 66 ++++++++++++++++++++++++++++++--
 test/pytest/test_qonnx.py        |  6 +--
 5 files changed, 98 insertions(+), 16 deletions(-)

diff --git a/hls4ml/converters/__init__.py b/hls4ml/converters/__init__.py
index 0bc7ccdbe7..b7bcb05b9e 100644
--- a/hls4ml/converters/__init__.py
+++ b/hls4ml/converters/__init__.py
@@ -10,6 +10,7 @@
 from hls4ml.converters.keras_to_hls import get_supported_keras_layers  # noqa: F401
 from hls4ml.converters.keras_to_hls import parse_keras_model  # noqa: F401
 from hls4ml.converters.keras_to_hls import keras_to_hls, register_keras_layer_handler
+from hls4ml.converters.onnx_to_hls import parse_onnx_model  # noqa: F401
 from hls4ml.model import ModelGraph
 from hls4ml.utils.config import create_config
 from hls4ml.utils.symbolic_utils import LUTFunction
diff --git a/hls4ml/converters/onnx_to_hls.py b/hls4ml/converters/onnx_to_hls.py
index 8f6c7461fb..75850fa93e 100644
--- a/hls4ml/converters/onnx_to_hls.py
+++ b/hls4ml/converters/onnx_to_hls.py
@@ -162,26 +162,23 @@ def get_out_layer_name(graph):
     return [node.name for node in graph.node if node.output[0] in output_index_list]
 
 
-def onnx_to_hls(config):
-    """Convert onnx model to hls model from configuration.
+def parse_onnx_model(onnx_model):
+    """Parses the onnx model, both for configuration building and general processing.
 
     Args:
-        config (dict): ONNX configuration from yaml file or passed through API.
+        onnx_model: an ONNX model object.
 
     Raises:
         Exception: Raised if an unsupported operation is found in the ONNX model.
 
     Returns:
-        ModelGraph: hls4ml model object
+        layer_list (list):  The onnx layers
+        input_layers (list):  The input layers
+        output_layers (list):  The output layers
     """
     # This is a list of dictionaries to hold all the layer info we need to generate HLS
     layer_list = []
 
-    # Extract model architecture
-    print('Interpreting Model ...')
-
-    onnx_model = onnx.load(config['OnnxModel']) if isinstance(config['OnnxModel'], str) else config['OnnxModel']
-
     # We don't infer the shapes because the qonnx package preprocessing does it.
 
     # Obtain list of input/ouput layers
@@ -257,6 +254,29 @@ def onnx_to_hls(config):
         print(f"Layer name: {layer['name']}, layer type: {layer['class_name']}, current shape: {input_shapes}")
         layer_list.append(layer)
 
+    return layer_list, input_layers, output_layers
+
+
+def onnx_to_hls(config):
+    """Convert onnx model to hls model from configuration.
+
+    Args:
+        config (dict): ONNX configuration from yaml file or passed through API.
+
+    Raises:
+        Exception: Raised if an unsupported operation is found in the ONNX model.
+
+    Returns:
+        ModelGraph: hls4ml model object
+    """
+
+    # Extract model architecture
+    print('Interpreting Model ...')
+
+    onnx_model = onnx.load(config['OnnxModel']) if isinstance(config['OnnxModel'], str) else config['OnnxModel']
+
+    layer_list, input_layers, output_layers = parse_onnx_model(onnx_model)
+
     #################
     # Generate HLS
     #################
diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py
index a6b5c29e89..f0d29237b7 100644
--- a/hls4ml/model/graph.py
+++ b/hls4ml/model/graph.py
@@ -99,7 +99,8 @@ def get_precision(self, layer, var='default'):
         type_name = layer.name.lower() + '_' + var + '_t'
         if precision is None:
             precision = self.layer_name_precision.get(layer.name.lower() + '_default')
-            type_name = layer.name.lower() + '_default_t'
+            # I think it is better to keep these unique still to avoid inadvertent updates
+            # type_name = layer.name.lower() + '_default_t'
 
         if precision is None:
             precision = self.layer_type_precision.get(layer.class_name.lower() + '_' + var)
diff --git a/hls4ml/utils/config.py b/hls4ml/utils/config.py
index 5d7ca1ae72..5c41a52a82 100644
--- a/hls4ml/utils/config.py
+++ b/hls4ml/utils/config.py
@@ -321,7 +321,7 @@ def config_from_pytorch_model(
 
 
 def config_from_onnx_model(
-    model, granularity='model', backend=None, default_precision='ap_fixed<16,6>', default_reuse_factor=1
+    model, granularity='name', backend=None, default_precision='ap_fixed<16,6>', default_reuse_factor=1
 ):
     """Create an HLS conversion config given the ONNX model.
 
@@ -331,8 +331,8 @@ def config_from_onnx_model(
 
     Args:
         model: ONNX model
-        granularity (str, optional): Granularity of the created config. Defaults to 'model'.
-            Can be set to 'model', 'type' and 'layer'.
+        granularity (str, optional): Granularity of the created config. Defaults to 'name'.
+            Can be set to 'model', 'type' and 'name'.
 
             Granularity can be used to generate a more verbose config that can be fine-tuned.
             The default granularity ('model') will generate config keys that apply to the whole
@@ -351,6 +351,16 @@ def config_from_onnx_model(
         [dict]: The created config.
     """
 
+    if granularity.lower() not in ['model', 'type', 'name']:
+        raise Exception(
+            f'Invalid configuration granularity specified, expected "model", "type" or "name" got "{granularity}"'
+        )
+
+    if backend is not None:
+        backend = hls4ml.backends.get_backend(backend)
+    elif granularity.lower() != 'model':
+        print('Warning:  it is recommended to pass the backend to "config_from_onnx_model"')
+
     config = {}
 
     model_config = {}
@@ -360,4 +370,54 @@ def config_from_onnx_model(
 
     config['Model'] = model_config
 
+    layer_list, _, _ = hls4ml.converters.parse_onnx_model(model)
+
+    def make_layer_config(layer):
+        cls_name = layer['class_name']
+
+        layer_cls = hls4ml.model.layers.layer_map[cls_name]
+        if backend is not None:
+            layer_cls = backend.create_layer_class(layer_cls)
+
+        layer_config = {}
+
+        # set the default precision of the layer to auto?
+        # (not really necessary if we set the backend appropriately)
+        # layer_config['Precision'] = {'default': 'auto'}
+
+        config_attrs = [a for a in layer_cls.expected_attributes if a.configurable]
+        for attr in config_attrs:
+            if isinstance(attr, hls4ml.model.attributes.TypeAttribute):
+                precision_cfg = layer_config.setdefault('Precision', {})
+                name = attr.name
+                if name.endswith('_t'):
+                    name = name[:-2]
+                if attr.default is None:
+                    precision_cfg[name] = 'auto'
+                else:
+                    precision_cfg[name] = str(attr.default)
+            else:
+                if attr.default is not None:
+                    layer_config[attr.config_name] = attr.default
+
+        return layer_config
+
+    if granularity.lower() == 'type':
+        type_config = {}
+        for layer in layer_list:
+            if layer['class_name'] in type_config:
+                continue
+            layer_config = make_layer_config(layer)
+            type_config[layer['class_name']] = layer_config
+
+        config['LayerType'] = type_config
+
+    elif granularity.lower() == 'name':
+        name_config = {}
+        for layer in layer_list:
+            layer_config = make_layer_config(layer)
+            name_config[layer['name']] = layer_config
+
+        config['LayerName'] = name_config
+
     return config
diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index 2c314c13ca..529a5adebc 100644
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -93,7 +93,7 @@ def test_tfc_2w2a(tfc_2w2a_model, backend):
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
     # Convert QONNX model, compile, and run inference
-    config = hls4ml.utils.config_from_onnx_model(model)
+    config = hls4ml.utils.config_from_onnx_model(model, backend=backend)
     # Some hand-derived config
     config['LayerName'] = {}
     config['LayerName']['global_in'] = {'Precision': 'ap_fixed<16,2>'}
@@ -116,7 +116,7 @@ def test_cnv_2w2a(cnv_2w2a_model, backend):
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
     # Convert QONNX model, compile, and run inference
-    config = hls4ml.utils.config_from_onnx_model(model, default_precision='fixed<32,16>')
+    config = hls4ml.utils.config_from_onnx_model(model, backend=backend, default_precision='fixed<32,16>')
     hls_model = hls4ml.converters.convert_from_onnx_model(
         model,
         output_dir=str(test_root_path / f'hls4mlprj_qonnx_cnv-2w2a_{backend}'),
@@ -142,7 +142,7 @@ def test_jet_tagging(jettagging_model, backend):
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
     # Convert QONNX model, compile, and run inference
-    config = hls4ml.utils.config_from_onnx_model(model)
+    config = hls4ml.utils.config_from_onnx_model(model, backend=backend)
 
     hls_model = hls4ml.converters.convert_from_onnx_model(
         model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_jettag_{backend}'), backend=backend, hls_config=config

From 0a863adcc25b5facae2b9b375bf4c7fa1bc41ecc Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Sat, 2 Mar 2024 12:49:43 -0600
Subject: [PATCH 23/59] standardize on merge operators

---
 hls4ml/converters/onnx/merge.py               | 14 +++++++++++-
 .../model/optimizer/passes/infer_precision.py | 12 ++++++++--
 hls4ml/model/optimizer/passes/merge_const.py  | 22 +++++++++----------
 3 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/hls4ml/converters/onnx/merge.py b/hls4ml/converters/onnx/merge.py
index 2309cc213f..420f077ec2 100644
--- a/hls4ml/converters/onnx/merge.py
+++ b/hls4ml/converters/onnx/merge.py
@@ -2,13 +2,25 @@
 
 merge_layers = ['Add', 'Sub', 'Mul', 'Div', 'Average', 'Max', 'Min', 'Concat', 'Sum']
 
+op_map = {
+    'Add': 'add',
+    'Sub': 'subtract',
+    'Mul': 'multiply',
+    'Div': 'divide',
+    'Average': 'average',
+    'Max': 'maximum',
+    'Min': 'minimum',
+    'Sum': 'add',
+    'Concat': 'concat',
+}
+
 
 @onnx_handler(*merge_layers)
 def parse_merge_layer(node, input_names, input_shapes, graph):
     layer = {}
     layer['class_name'] = node.op_type
     layer['name'] = node.name
-    layer['op'] = layer['class_name'].lower()
+    layer['op'] = op_map[node.op_type]
     layer['inputs'] = input_names
     layer['outputs'] = list(node.output)
 
diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py
index c660647d3b..d1629a7a13 100644
--- a/hls4ml/model/optimizer/passes/infer_precision.py
+++ b/hls4ml/model/optimizer/passes/infer_precision.py
@@ -66,6 +66,10 @@ def _infer_precision(self, node, types_to_infer):
 
         return []
 
+    def _get_default_precision(self, node):
+        model_config = node.model.config
+        return model_config.backend.convert_precision_string(model_config.model_precision['default'])
+
     def _infer_default_type(self, node, type_name):
         model_config = node.model.config
         default_precision = model_config.backend.convert_precision_string(model_config.model_precision['default'])
@@ -324,11 +328,12 @@ def _infer_merge_precision(self, node, types_to_infer):
                 + 1
             )
             new_width = new_int + max(input_1.fractional, input_2.fractional)
-
+            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
         elif op == 'multiply':
             new_signed = input_1.signed or input_2.signed
             new_int = input_1.integer + input_2.integer
             new_width = input_1.width + input_2.width
+            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
         elif op in ('maximum', 'minimum'):
             new_signed = input_1.signed or input_2.signed
 
@@ -343,8 +348,11 @@ def _infer_merge_precision(self, node, types_to_infer):
 
             new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer)
             new_int = max(input_1_integer, input_2_integer)
+            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+        else:
+            print(f'Warning: not propagating weights for type {op}')
+            out_precision = self._get_default_precision(node)
 
-        out_precision = FixedPrecisionType(new_width, new_int, new_signed)
         node.types['result_t'].name = node.name + '_result_t'
         node.types['result_t'].precision = out_precision
 
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index 25bd59bda6..54f275d9ec 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -35,19 +35,19 @@ def transform(self, model, node):
         val1 = const_node1.attributes['value']
 
         op = node.attributes['op']
-        if op in ('add', 'sum'):
+        if op == 'add':
             new_val = val0 + val1
-        elif op == 'sub':
+        elif op == 'subtract':
             new_val = val0 - val1
-        elif op == 'mul':
+        elif op == 'multiply':
             new_val = val0 * val1
-        elif op == 'div':
+        elif op == 'divide':
             new_val = val0 / val1
         elif op == 'average':
             new_val = np.mean(np.array([val0, val1]), axis=0)
-        elif op == 'max':
+        elif op == 'maximum':
             new_val = np.maximum(val0, val1)
-        elif op == 'min':
+        elif op == 'minimum':
             new_val = np.minimum(val0, val1)
         else:
             raise RuntimeError(f'Unexpected op_type: {op}')
@@ -76,7 +76,7 @@ class MergeToApplyAlpha(OptimizerPass):
     def match(self, node):
         is_match = (
             isinstance(node, Merge)
-            and node.attributes['op'] in ('add', 'sum', 'sub', 'mul')  # Div is separate
+            and node.attributes['op'] in ('add', 'subtract', 'multiply')  # Div is separate
             and (
                 isinstance(node.get_input_node(node.inputs[0]), Constant)
                 != isinstance(node.get_input_node(node.inputs[1]), Constant)
@@ -108,12 +108,12 @@ def transform(self, model, node):
         bias_quantizer = None
 
         op = node.attributes['op']
-        if op in ('add', 'sum'):
+        if op == 'add':
             scale = np.array(1)
             scale_precision = IntegerPrecisionType(1, False)
             bias = const_node.attributes['value']
             bias_quantizer = const_node.get_attr('quantizer')
-        elif op == 'sub':
+        elif op == 'subtract':
             bias_quantizer = const_node.get_attr('quantizer')
             if node1const:
                 scale = np.array(1)
@@ -139,7 +139,7 @@ def transform(self, model, node):
                 scale_precision = IntegerPrecisionType(2, True)
                 bias = const_node.attributes['value']
 
-        elif op == 'mul':
+        elif op == 'multiply':
             scale = const_node.attributes['value']
             scale_quantizer = const_node.get_attr('quantizer')
             bias = np.array(0)
@@ -187,7 +187,7 @@ class MergeToApplyAlphaDiv(OptimizerPass):
     def match(self, node):
         is_match = (
             isinstance(node, Merge)
-            and node.attributes['op'] == 'div'
+            and node.attributes['op'] == 'divide'
             and isinstance(node.get_input_node(node.inputs[1]), Constant)
         )  # only second can be const
 

From bfe6a3f6650705ac2a845949b654f48fdb86acfa Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 7 Mar 2024 19:52:01 -0600
Subject: [PATCH 24/59] snapshot of current work

---
 hls4ml/model/graph.py                         | 61 +++++++++-------
 hls4ml/model/layers.py                        |  5 --
 hls4ml/model/optimizer/__init__.py            | 17 +++--
 .../model/optimizer/passes/batchnorm_opt.py   |  2 +-
 hls4ml/model/optimizer/passes/bn_fuse.py      | 49 ++++---------
 .../model/optimizer/passes/conv_to_convxd.py  | 20 +++---
 .../model/optimizer/passes/infer_precision.py | 12 +++-
 hls4ml/model/optimizer/passes/linear.py       |  8 ++-
 .../optimizer/passes/matmul_const_to_dense.py | 32 ++++-----
 hls4ml/model/optimizer/passes/merge_const.py  | 72 ++++++++++---------
 hls4ml/model/optimizer/passes/quant_opt.py    | 65 ++++++++++++-----
 hls4ml/model/types.py                         | 26 +------
 12 files changed, 185 insertions(+), 184 deletions(-)

diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py
index f0d29237b7..33b367a929 100644
--- a/hls4ml/model/graph.py
+++ b/hls4ml/model/graph.py
@@ -94,6 +94,11 @@ def get_layer_config(self, layer):
 
         return layer_config
 
+    def set_name_config(self, name, config):
+        hls_config = self.config['HLSConfig']
+        layer_config = hls_config.setdefault('LayerName', {})
+        layer_config[name] = config
+
     def get_precision(self, layer, var='default'):
         precision = self.layer_name_precision.get(layer.name.lower() + '_' + var)
         type_name = layer.name.lower() + '_' + var + '_t'
@@ -178,6 +183,35 @@ def get_compression(self, layer):
 
         return compression
 
+    def parse_name_config(self, layer_name, layer_cfg):
+        """This is used by _parse_hls_config below, but also in optimizers when a new layer config is created"""
+        precision_cfg = layer_cfg.get('Precision')
+        if isinstance(precision_cfg, dict):
+            for var, precision in precision_cfg.items():
+                self.layer_name_precision[layer_name.lower() + '_' + var] = precision
+        else:
+            self.layer_name_precision[layer_name.lower() + '_default'] = precision_cfg
+
+        rf = layer_cfg.get('ReuseFactor')
+        if rf is not None:
+            self.layer_name_rf[layer_name.lower()] = rf
+
+        targ_cycles = layer_cfg.get('TargetCycles')
+        if targ_cycles is not None:
+            self.layer_name_targ_cycles[layer_name.lower()] = targ_cycles
+
+        strategy = layer_cfg.get('Strategy')
+        if strategy is not None:
+            self.layer_name_strategy[layer_name.lower()] = strategy
+
+        conv_implementation = layer_cfg.get('ConvImplementation')
+        if conv_implementation is not None:
+            self.layer_name_conv_implementation[layer_name.lower()] = conv_implementation
+
+        compression = layer_cfg.get('Compression')
+        if compression is not None:
+            self.layer_name_compression[layer_name.lower()] = bool(compression)
+
     def _parse_hls_config(self):
         hls_config = self.config['HLSConfig']
 
@@ -250,32 +284,7 @@ def _parse_hls_config(self):
         layer_name_cfg = hls_config.get('LayerName')
         if layer_name_cfg is not None:
             for layer_name, layer_cfg in layer_name_cfg.items():
-                precision_cfg = layer_cfg.get('Precision')
-                if isinstance(precision_cfg, dict):
-                    for var, precision in precision_cfg.items():
-                        self.layer_name_precision[layer_name.lower() + '_' + var] = precision
-                else:
-                    self.layer_name_precision[layer_name.lower() + '_default'] = precision_cfg
-
-                rf = layer_cfg.get('ReuseFactor')
-                if rf is not None:
-                    self.layer_name_rf[layer_name.lower()] = rf
-
-                targ_cycles = layer_cfg.get('TargetCycles')
-                if targ_cycles is not None:
-                    self.layer_name_targ_cycles[layer_name.lower()] = targ_cycles
-
-                strategy = layer_cfg.get('Strategy')
-                if strategy is not None:
-                    self.layer_name_strategy[layer_name.lower()] = strategy
-
-                conv_implementation = layer_cfg.get('ConvImplementation')
-                if conv_implementation is not None:
-                    self.layer_name_conv_implementation[layer_name.lower()] = conv_implementation
-
-                compression = layer_cfg.get('Compression')
-                if compression is not None:
-                    self.layer_name_compression[layer_name.lower()] = bool(compression)
+                self.parse_name_config(layer_name, layer_cfg)
 
     def _validate_hls_config(self):
         use_dataflow = False
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index d0ac7e5561..a5130fa7bb 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -249,11 +249,6 @@ def add_output_variable(
 
         self.set_attr(out_name, out)
 
-    def update_output_precision(self, precision, output_name=None):
-        if output_name is None:
-            output_name = self.outputs[0]
-        self.variables[output_name].type.precision = precision
-
     def add_weights(self, quantizer=None, compression=False):
         self.add_weights_variable(
             name='weight', var_name='w{index}', data='weight', quantizer=quantizer, compression=compression
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index bd4da19071..f05f8e3e04 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -33,11 +33,6 @@
 register_flow(
     'convert',
     [
-        'infer_precision_types',
-        'channels_last_converter',
-        'fuse_bias_add',
-        'remove_useless_transpose',
-        'expand_layer_group',
         'reshape_constant',
         'quant_constant_parameters',
         'quant_to_activation',
@@ -56,10 +51,17 @@
         'merge_to_apply_alpha_div',
         'matmul_const_to_dense',
         'conv_to_conv_x_d',
+        'fuse_consecutive_batch_normalization',  # needs to be before infer_precision_types
+        'merge_linear_activation',  # needs to be before infer_precision_types
+        'fuse_batch_normalization',  # needs to be before infer_precision_types
+        'infer_precision_types',
+        'channels_last_converter',
+        'fuse_bias_add',
+        'remove_useless_transpose',
+        'expand_layer_group',
         'output_rounding_saturation_mode',
         'qkeras_factorize_alpha',
         'extract_ternary_threshold',
-        'fuse_consecutive_batch_normalization',
     ],
 )  # TODO Maybe not all QKeras optmizers belong here?
 
@@ -67,13 +69,10 @@
     'optimize',
     [
         'eliminate_linear_activation',
-        'fuse_consecutive_batch_normalization',
-        'fuse_batch_normalization',
         'remove_nop_batch_normalization',
         'replace_multidimensional_dense_with_conv',
         'infer_precision_types',
         'set_precision_concat',
-        'merge_linear_activation',
     ],
     requires=['convert'],
 )
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index ee00ecfa46..1800b33056 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -5,7 +5,7 @@
 from hls4ml.model.quantizers import QuantNodeQuantizer
 from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, UnspecifiedPrecisionType
 
-_base_attributes = ('Trace', 'reuse_factor', 'epsilon', 'n_in', 'n_filt')
+_base_attributes = ('epsilon', 'n_in', 'n_filt')
 
 
 class BatchNormOnnxConstantParameters(OptimizerPass):
diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py
index a636af2f86..c84430f13f 100644
--- a/hls4ml/model/optimizer/passes/bn_fuse.py
+++ b/hls4ml/model/optimizer/passes/bn_fuse.py
@@ -29,13 +29,13 @@ def match(self, node):
             b1 = node.weights['bias'].data_unquantized
             scale_compatible = (
                 (prev_node.get_attr('weight_quantizer') is None and node.get_attr('scale_quantizer') is None)
-                or (s0 == np.ones_like(s0)).all()
-                or (s1 == np.ones_like(s1)).all()
+                or ((s0 == np.ones_like(s0)).all() and prev_node.get_attr('weight_quantizer') is None)
+                or ((s1 == np.ones_like(s1)).all() and node.get_attr('scale_quantizer') is None)
             )
             bias_compatible = (
                 (prev_node.get_attr('bias_quantizer') is None and node.get_attr('bias_quantizer') is None)
-                or (b0 == np.zeros_like(b0)).all()
-                or (b1 == np.zeros_like(b1)).all()
+                or ((b0 == np.zeros_like(b0)).all() and prev_node.get_attr('bias_quantizer') is None)
+                or ((b1 == np.zeros_like(b1)).all() and node.get_attr('bias_quantizer') is None)
             )
             return scale_compatible and bias_compatible
 
@@ -60,12 +60,14 @@ def transform(self, model, node):
         bn_scale = node.weights['scale']
         bn_bias = node.weights['bias']
 
+        allowed_precisions = (IntegerPrecisionType, FixedPrecisionType, UnspecifiedPrecisionType)
+
         # only merge if the types are integer or fixed
         if (
-            not isinstance(parent_weight.type, (IntegerPrecisionType, FixedPrecisionType))
-            or not isinstance(parent_bias.type, (IntegerPrecisionType, FixedPrecisionType))
-            or not isinstance(bn_scale.type, (IntegerPrecisionType, FixedPrecisionType))
-            or not isinstance(bn_bias.type, (IntegerPrecisionType, FixedPrecisionType))
+            not isinstance(parent_weight.type.precision, allowed_precisions)
+            or not isinstance(parent_bias.type.precision, allowed_precisions)
+            or not isinstance(bn_scale.type.precision, allowed_precisions)
+            or not isinstance(bn_bias.type.precision, allowed_precisions)
         ):
             return False
 
@@ -74,44 +76,21 @@ def transform(self, model, node):
 
         w_quantizer = (
             node.get_attr('scale_quantizer')
-            if (parent_weight.data == np.ones_like(parent_weight.data)).all()
+            if node.get_attr('scale_quantizer') is not None
             else parent_node.get_attr('weight_quantizer')
         )
         b_quantizer = (
             node.get_attr('bias_quantizer')
-            if (parent_bias.data == np.zeros_like(parent_bias.data)).all()
+            if node.get_attr('bias_quantizer') is not None
             else parent_node.get_attr('bias_quantizer')
         )
 
         node.set_attr('weight_quantizer', w_quantizer)
         node.set_attr('bias_quantizer', b_quantizer)
 
-        # Not sure if this setting of this is useful
-        w_prec = None
-        if w_quantizer is None and (fused_weight == np.ones_like(fused_weight)).all():
-            if (
-                isinstance(parent_weight.type, IntegerPrecisionType)
-                and isinstance(bn_scale.type, IntegerPrecisionType)
-                and parent_weight.type.width == 1
-                and bn_scale.type.width == 1
-            ):
-                w_prec = node.weights['scale'].type
-
-        b_prec = None
-        if b_quantizer is None and (fused_bias == np.zeros_like(fused_bias)).all():
-            if (
-                isinstance(parent_bias.type, IntegerPrecisionType)
-                and isinstance(bn_bias.type, IntegerPrecisionType)
-                and parent_bias.type.width == 1
-                and bn_bias.type.width == 1
-            ):
-                b_prec = node.weights['bias'].type
-
         # call function so that quantizer would be called if needed
-        node.add_weights_variable(
-            name='weight', var_name='w{index}', data=fused_weight, quantizer=w_quantizer, precision=w_prec
-        )
-        node.add_weights_variable(name='bias', var_name='b{index}', data=fused_bias, quantizer=b_quantizer, precision=b_prec)
+        node.add_weights_variable(name='weight', var_name='w{index}', data=fused_weight, quantizer=w_quantizer)
+        node.add_weights_variable(name='bias', var_name='b{index}', data=fused_bias, quantizer=b_quantizer)
 
         model.remove_node(node, rewire=True)
 
diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
index e54c98c1d7..6fb88ad0d0 100644
--- a/hls4ml/model/optimizer/passes/conv_to_convxd.py
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -2,13 +2,9 @@
 
 from hls4ml.model.layers import Constant, Conv, Conv1D, Conv2D
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.quantizers import QuantNodeQuantizer
-from hls4ml.model.types import IntegerPrecisionType
 
 # these are attributes to copy
 _base_attributes = (
-    'Trace',
-    'reuse_factor',
     'in_width',
     'out_width',
     'n_chan',
@@ -25,7 +21,6 @@
     'filt_height',
     'stride_height',
     'dilation_height',
-    'strategy',
     'data_format',
 )
 
@@ -69,16 +64,19 @@ def transform(self, model, node):
         if bias_node:
             attributes['bias_data'] = bias_node.attributes['value']
             attributes['bias_quantizer'] = bias_node.get_attr('quantizer')
-            attributes['have_bias'] = True
+            attributes['use_bias'] = True
         else:
             attributes['bias_data'] = np.zeros(attributes['n_filt'])
-            attributes['bias_quantizer'] = QuantNodeQuantizer(IntegerPrecisionType(1, False))
-            attributes['have_bias'] = False
+            attributes['use_bias'] = False
+
+        # get the configuration name
+        config = model.config.get_layer_config(node)
+        new_name = f'{newtype.__name__}_{node.name}'
+        model.config.set_name_config(new_name, config)
+        model.config.parse_name_config(new_name, config)
 
         # making new node
-        new_node = model.make_node(
-            newtype, f'{newtype.__name__}_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs]
-        )
+        new_node = model.make_node(newtype, new_name, attributes, [node.inputs[0]], [x for x in node.outputs])
 
         # removing and replacing old nodes
         if bias_node:
diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py
index d1629a7a13..4de58a18c2 100644
--- a/hls4ml/model/optimizer/passes/infer_precision.py
+++ b/hls4ml/model/optimizer/passes/infer_precision.py
@@ -12,8 +12,18 @@ class InferPrecisionTypes(ConfigurableOptimizerPass):
     def __init__(self):
         # The option, infer_no_bias, allows you to tailor for the given weights, in particular, zero bias
         self.infer_no_bias = False
+        self.count = 0
+        self.MAX_COUNT = 1000
 
     def match(self, node):
+        input_var = node.get_input_variable()
+        if input_var is not None and isinstance(input_var.type, UnspecifiedPrecisionType):
+            # need to wait for the input to update
+            # but check for infinite loops
+            self.count += 1
+            if self.count == self.MAX_COUNT:
+                raise RuntimeError("There is an infinite loop in the precision inference.")
+            return False
         for layer_type in node.types.values():
             if isinstance(layer_type.precision, UnspecifiedPrecisionType):
                 return True
@@ -30,7 +40,7 @@ def transform(self, model, node):
             if type_name not in inferred_types:
                 self._infer_default_type(node, type_name)
 
-        return False  # No model graph changes made
+        return True  # May need to rerun
 
     def _infer_precision(self, node, types_to_infer):
         node_class = node.class_name
diff --git a/hls4ml/model/optimizer/passes/linear.py b/hls4ml/model/optimizer/passes/linear.py
index 78a808b9a1..1b8e3d9686 100644
--- a/hls4ml/model/optimizer/passes/linear.py
+++ b/hls4ml/model/optimizer/passes/linear.py
@@ -15,6 +15,9 @@ def transform(self, model, node):
         return True
 
 
+_safe_parents = (Dense, Conv1D, Conv2D, BatchNormalization, Activation)
+
+
 class MergeLinearActivation(OptimizerPass):
     '''
     For many objects it's safe to change the output precision independently of the calculation.
@@ -26,7 +29,7 @@ def match(self, node):
         '''
         if isinstance(node, Activation) and node.get_attr('activation') == 'linear':
             parent = node.get_input_node(node.inputs[0])
-            safe_parent = isinstance(parent, (Dense, Conv1D, Conv2D, BatchNormalization))
+            safe_parent = isinstance(parent, _safe_parents)
             return safe_parent and isinstance(parent.get_output_variable().type.precision, UnspecifiedPrecisionType)
         else:
             return False
@@ -35,6 +38,7 @@ def transform(self, model, node):
         prev_node = node.get_input_node(node.inputs[0])
         quantizer = node.get_attr("quantizer")
         prev_node.set_attr("quantizer", quantizer)
-        prev_node.update_output_precision(quantizer.hls_type)
+        prev_node.types['result_t'] = quantizer.hls_type
+        prev_node.get_output_variable().type.precision = quantizer.hls_type
         model.remove_node(node)
         return True
diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
index 889a376cee..4c48944eb3 100644
--- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
+++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py
@@ -2,10 +2,6 @@
 
 from hls4ml.model.layers import Constant, Dense, MatMul
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.quantizers import QuantNodeQuantizer
-from hls4ml.model.types import IntegerPrecisionType
-
-_base_attributes = ('Trace', 'reuse_factor', 'weight', 'weight_t', 'bias', 'bias_t')
 
 
 class MatmulConstToDense(OptimizerPass):
@@ -30,27 +26,29 @@ def transform(self, model, node):
         weight_data = const_node.attributes['value']
         weight_quantizer = const_node.get_attr('quantizer')
 
+        # get the configuration name
+        config = model.config.get_layer_config(node)
+        new_name = f'Dense_{node.name}'
+        model.config.set_name_config(new_name, config)
+        model.config.parse_name_config(new_name, config)
+
         in_shape = other_var.shape
         n_in = np.prod(in_shape)
         out_shape = list(in_shape[:-1]) + [weight_data.shape[-1]]
         n_out = np.prod(out_shape)
 
         # creating the attributes
-        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update(
-            {
-                'weight_data': weight_data,
-                'weight_quantizer': weight_quantizer,
-                'bias_data': np.zeros(out_shape),
-                'bias_quantizer': QuantNodeQuantizer(IntegerPrecisionType(1, False)),
-                'have_bias': False,
-                'n_in': n_in,
-                'n_out': n_out,
-            }
-        )
+        attributes = {
+            'weight_data': weight_data,
+            'weight_quantizer': weight_quantizer,
+            'bias_data': np.zeros(out_shape),
+            'use_bias': False,
+            'n_in': n_in,
+            'n_out': n_out,
+        }
 
         # making new node
-        new_dense = model.make_node(Dense, f'Dense_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs])
+        new_dense = model.make_node(Dense, new_name, attributes, [node.inputs[0]], [x for x in node.outputs])
 
         # removing and replacing old nodes
         model.remove_node(const_node, rewire=False)
diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index 54f275d9ec..78591d203c 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -5,8 +5,6 @@
 from hls4ml.model.quantizers import QuantNodeQuantizer
 from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType
 
-_base_attributes = ('Trace', 'reuse_factor', 'n_in')
-
 
 # This should generally not happen because of qonnx cleaning
 class MergeTwoConstants(OptimizerPass):
@@ -56,12 +54,10 @@ def transform(self, model, node):
         const_node0.set_attr('quantizer', quantizer)  # overwrite the quantizer
         if quantizer:
             const_node0.set_attr('quantizer', quantizer)
-
+            const_node0.types['result_t'] = quantizer.hls_type
+            const_node0.get_output_variable().type.precision = quantizer.hls_type
         const_node0.set_attr('value', new_val)
 
-        # reinitialize (which also runs quantization if quantizer exists)
-        const_node0.initialize()
-
         model.remove_node(const_node1, rewire=False)
 
         # remove the batch norm node
@@ -151,23 +147,26 @@ def transform(self, model, node):
         if bias.shape != tuple(input_shape) and np.squeeze(bias).shape != tuple(input_shape):
             bias = np.broadcast_to(bias, input_shape)
 
-        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update(
-            {
-                'scale_data': scale,
-                'bias_data': bias,
-                'n_in': n_in,
-                'n_out': n_in,
-                'n_filt': -1,
-                'scale_precision': scale_precision,
-                'scale_quantizer': scale_quantizer,
-                'bias_precision': bias_precision,
-                'bias_quantizer': bias_quantizer,
-            }
-        )
+        attributes = {
+            'scale_data': scale,
+            'bias_data': bias,
+            'n_in': n_in,
+            'n_out': n_in,
+            'n_filt': -1,
+            'scale_precision': scale_precision,
+            'scale_quantizer': scale_quantizer,
+            'bias_precision': bias_precision,
+            'bias_quantizer': bias_quantizer,
+        }
+
+        # get the configuration name
+        config = model.config.get_layer_config(node)
+        new_name = f'bn_{node.name}'
+        model.config.set_name_config(new_name, config)
+        model.config.parse_name_config(new_name, config)
 
         aa_layer = model.make_node(
-            ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[input_node_idx]], [x for x in node.outputs]
+            ApplyAlpha, new_name, attributes, [node.inputs[input_node_idx]], [x for x in node.outputs]
         )
 
         model.remove_node(const_node, rewire=False)
@@ -222,20 +221,23 @@ def transform(self, model, node):
         if bias.shape != tuple(input_shape) and np.squeeze(bias).shape != tuple(input_shape):
             bias = np.broadcast_to(bias, input_shape)
 
-        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update(
-            {
-                'scale_data': scale,
-                'bias_data': bias,
-                'scale_quantizer': scale_quantizer,
-                'bias_precision': bias_precision,
-                'n_in': n_in,
-                'n_out': n_in,
-                'n_filt': -1,
-            }
-        )
-
-        bn_layer = model.make_node(ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs])
+        attributes = {
+            'scale_data': scale,
+            'bias_data': bias,
+            'scale_quantizer': scale_quantizer,
+            'bias_precision': bias_precision,
+            'n_in': n_in,
+            'n_out': n_in,
+            'n_filt': -1,
+        }
+
+        # get the configuration name
+        config = model.config.get_layer_config(node)
+        new_name = f'bn_{node.name}'
+        model.config.set_name_config(new_name, config)
+        model.config.parse_name_config(new_name, config)
+
+        bn_layer = model.make_node(ApplyAlpha, new_name, attributes, [node.inputs[0]], [x for x in node.outputs])
 
         model.remove_node(const_node, rewire=False)
         del node.inputs[1]
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index 0d02124bc6..ed7f9701a2 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -14,6 +14,7 @@
 
 """
 
+import copy
 import math  # prefer to use math.ceil for scalar values
 
 import numpy as np
@@ -25,8 +26,6 @@
 
 _ALSO_MATCH_PO2 = True
 
-_base_attributes = ('Trace', 'reuse_factor')
-
 
 class QuantConstantParameters(OptimizerPass):
     """Remove Constant from the Qaunt node parameters (but not input[0])"""
@@ -131,11 +130,17 @@ def transform(self, model, node):
 
         precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode)
 
-        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({'activation': 'linear', 'quantizer': quantizer})
+        attributes = {'activation': 'linear', 'quantizer': quantizer}
+
+        # update the configuration
+        config = model.config.get_layer_config(node)
+        prec_config = config.setdefault('Precision', {})
+        prec_config['result'] = str(precision)
+        new_name = f'{node.name}_act'
+        model.config.set_name_config(new_name, config)
+        model.config.parse_name_config(new_name, config)
 
-        new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs])
-        new_node.get_output_variable().type.precision = precision
+        new_node = model.make_node(Activation, new_name, attributes, [node.inputs[0]], [x for x in node.outputs])
         model.replace_node(node, new_node)
 
         return True
@@ -189,8 +194,11 @@ def transform(self, model, node):
 
         const_node = node.get_input_node(node.inputs[0])
         const_node.set_attr('quantizer', quantizer)
+        const_node.set_attr('result_t', precision)
         const_node.get_output_variable().type.precision = precision
 
+        # Should we update the configuration to reflect the new precision? I don't think it's necessary
+
         # remove the Quant node
         model.remove_node(node, rewire=True)
 
@@ -228,11 +236,18 @@ def transform(self, model, node):
 
         precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode)
 
-        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
-        attributes.update({'activation': 'linear', 'quantizer': quantizer})
+        activation_attributes = {'activation': 'linear', 'quantizer': quantizer}
+
+        # update the configuration
+        config = model.config.get_layer_config(node)
+        act_config = copy.deepcopy(config)
+        prec_config = act_config.setdefault('Precision', {})
+        prec_config['result'] = str(precision)
+        act_name = f'{node.name}_act'
+        model.config.set_name_config(act_name, act_config)
+        model.config.parse_name_config(act_name, act_config)
 
-        new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs])
-        new_node.get_output_variable().type.precision = precision
+        new_node = model.make_node(Activation, act_name, activation_attributes, [node.inputs[0]], [x for x in node.outputs])
         model.replace_node(node, new_node)
 
         # but now add the ApplyAlhpas before and after
@@ -240,16 +255,25 @@ def transform(self, model, node):
         scale = node.get_attr('scale')
         bias = node.get_attr('zeropt')
 
-        attributes_scale = {k: node.attributes.get(k, None) for k in _base_attributes}
+        attributes_scale = {}
+        attributes_rescale = {}
 
-        attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes}
+        scale_config = copy.deepcopy(config)
+        scale_name = f'{node.name}_scale'
+        model.config.set_name_config(scale_name, scale_config)
+        model.config.parse_name_config(scale_name, scale_config)
+
+        rescale_config = config  # no need to deep copy the last
+        rescale_name = f'{node.name}_rescale'
+        model.config.set_name_config(rescale_name, rescale_config)
+        model.config.parse_name_config(rescale_name, rescale_config)
 
         firstscale = 1 / scale
         firstbias = bias
         attributes_scale['scale_data'] = firstscale
         attributes_scale['bias_data'] = firstbias
 
-        scale_node = model.make_node(ApplyAlpha, node.name + '_scale', attributes_scale, [node.inputs[0]])
+        scale_node = model.make_node(ApplyAlpha, scale_name, attributes_scale, [node.inputs[0]])
         model.insert_node(scale_node)
 
         rescale = scale
@@ -257,7 +281,7 @@ def transform(self, model, node):
         attributes_rescale['scale_data'] = rescale
         attributes_rescale['bias_data'] = rebias
 
-        rescale_node = model.make_node(ApplyAlpha, node.name + '_rescale', attributes_rescale, [new_node.outputs[0]])
+        rescale_node = model.make_node(ApplyAlpha, rescale_name, attributes_rescale, [new_node.outputs[0]])
         model.insert_node(rescale_node)
 
         return True
@@ -305,10 +329,15 @@ def transform(self, model, node):
         const_node.set_attr('value', new_val)
         const_node.set_attr('quantizer', quantizer)
 
-        # reinitialize (which also runs quantization if quantizer exists)
-        const_node.initialize()
+        const_node.types['result_t'].precision = precision
+        const_node.get_output_variable().type.precision = precision
+
+        attributes_rescale = {}
 
-        attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes}
+        rescale_config = copy.deepcopy(model.config.get_layer_config(node))
+        rescale_name = f'{node.name}_rescale'
+        model.config.set_name_config(rescale_name, rescale_config)
+        model.config.parse_name_config(rescale_name, rescale_config)
 
         rescale = scale
         rebias = -bias * scale
@@ -316,7 +345,7 @@ def transform(self, model, node):
         attributes_rescale['bias_data'] = rebias
 
         rescale_node = model.make_node(
-            ApplyAlpha, node.name + '_rescale', attributes_rescale, [x for x in node.inputs], [x for x in node.outputs]
+            ApplyAlpha, rescale_name, attributes_rescale, [x for x in node.inputs], [x for x in node.outputs]
         )
         model.replace_node(node, rescale_node)
 
diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py
index 05617ba124..9fb257a1ef 100644
--- a/hls4ml/model/types.py
+++ b/hls4ml/model/types.py
@@ -115,26 +115,6 @@ def saturation_mode(self):
     def saturation_bits(self):
         return 0
 
-    @property
-    def integer(self):
-        return self.width
-
-    @property
-    def fractional(self):
-        return 0
-
-    @property
-    def rounding_mode(self):
-        return RoundingMode.TRN
-
-    @property
-    def saturation_mode(self):
-        return SaturationMode.WRAP
-
-    @property
-    def saturation_bits(self):
-        return None
-
 
 class FixedPrecisionType(PrecisionType):
     """Arbitrary precision fixed-point data type.
@@ -159,10 +139,6 @@ def __init__(self, width=16, integer=6, signed=True, rounding_mode=None, saturat
 
     # make this a property to avoid inconsistencies
 
-    @property
-    def fractional(self):
-        return self.width - self.integer
-
     @property
     def fractional(self):
         return self.width - self.integer
@@ -231,6 +207,7 @@ def __init__(self):
         super().__init__(width=1, signed=False)
         self.integer = 1
 
+    # TODO:  this should really be a specific type
     def __str__(self):
         typestring = 'uint<1>'
         return typestring
@@ -245,6 +222,7 @@ class ExponentPrecisionType(PrecisionType):
     def __init__(self, width=16, signed=True):
         super().__init__(width=width, signed=signed)
 
+    # TODO:  this should really be a specific type, not int
     def __str__(self):
         typestring = '{signed}int<{width}>'.format(signed='u' if not self.signed else '', width=self.width)
         return typestring

From 25849ef435731679961b5ba7068abbede1d9f02a Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Sun, 10 Mar 2024 15:30:36 -0500
Subject: [PATCH 25/59] Fix bug in FuseBatchNormalization

---
 hls4ml/model/optimizer/passes/bn_fuse.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py
index c84430f13f..b3e8e454c8 100644
--- a/hls4ml/model/optimizer/passes/bn_fuse.py
+++ b/hls4ml/model/optimizer/passes/bn_fuse.py
@@ -89,8 +89,8 @@ def transform(self, model, node):
         node.set_attr('bias_quantizer', b_quantizer)
 
         # call function so that quantizer would be called if needed
-        node.add_weights_variable(name='weight', var_name='w{index}', data=fused_weight, quantizer=w_quantizer)
-        node.add_weights_variable(name='bias', var_name='b{index}', data=fused_bias, quantizer=b_quantizer)
+        parent_node.add_weights_variable(name='weight', var_name='w{index}', data=fused_weight, quantizer=w_quantizer)
+        parent_node.add_weights_variable(name='bias', var_name='b{index}', data=fused_bias, quantizer=b_quantizer)
 
         model.remove_node(node, rewire=True)
 

From 4485bf3154ed5f4fbdabea1888b122ba84d2df80 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Sun, 10 Mar 2024 20:23:58 -0500
Subject: [PATCH 26/59] fix issue with configuration setup of test

---
 test/pytest/test_qonnx.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index 529a5adebc..426df8f2e0 100644
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -93,10 +93,7 @@ def test_tfc_2w2a(tfc_2w2a_model, backend):
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
     # Convert QONNX model, compile, and run inference
-    config = hls4ml.utils.config_from_onnx_model(model, backend=backend)
-    # Some hand-derived config
-    config['LayerName'] = {}
-    config['LayerName']['global_in'] = {'Precision': 'ap_fixed<16,2>'}
+    config = hls4ml.utils.config_from_onnx_model(model, backend=backend, default_precision='fixed<32,16>')
     hls_model = hls4ml.converters.convert_from_onnx_model(
         model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_tfc-2w2a_{backend}'), backend=backend, hls_config=config
     )

From 52067c32e5de07af94322815b3fd02db5f8a5efa Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Sun, 10 Mar 2024 22:25:15 -0500
Subject: [PATCH 27/59] fix bug in FuseConsecutiveBatchNormalization

---
 hls4ml/model/optimizer/passes/batchnorm_opt.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index 1800b33056..26292d7e2a 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -197,10 +197,10 @@ def transform(self, model, node):
 
         # only merge if the types are integer or fixed
         if (
-            not isinstance(prev_node.weights['scale'].type, (IntegerPrecisionType, FixedPrecisionType))
-            or not isinstance(prev_node.weights['bias'].type, (IntegerPrecisionType, FixedPrecisionType))
-            or not isinstance(node.weights['scale'].type, (IntegerPrecisionType, FixedPrecisionType))
-            or not isinstance(node.weights['bias'].type, (IntegerPrecisionType, FixedPrecisionType))
+            not isinstance(prev_node.weights['scale'].type.precision, (IntegerPrecisionType, FixedPrecisionType))
+            or not isinstance(prev_node.weights['bias'].type.precision, (IntegerPrecisionType, FixedPrecisionType))
+            or not isinstance(node.weights['scale'].type.precision, (IntegerPrecisionType, FixedPrecisionType))
+            or not isinstance(node.weights['bias'].type.precision, (IntegerPrecisionType, FixedPrecisionType))
         ):
             return False
 

From 24d6245660d2b601301dc800e4401a5098b39c2a Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 11 Mar 2024 18:54:41 -0500
Subject: [PATCH 28/59] add missing header

---
 .../quartus/firmware/nnet_utils/nnet_conv2d_resource.h          | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_conv2d_resource.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_conv2d_resource.h
index 73ad45592f..f5ce781739 100644
--- a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_conv2d_resource.h
+++ b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_conv2d_resource.h
@@ -1,6 +1,8 @@
 #ifndef NNET_CONV2D_RESOURCE_H_
 #define NNET_CONV2D_RESOURCE_H_
 
+#include <cstdint>
+
 #include "nnet_common.h"
 #include "nnet_dense.h"
 #include "nnet_helpers.h"

From 835af4e0a2c1ce403c74342f873fe727f01d99c0 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 11 Mar 2024 18:55:41 -0500
Subject: [PATCH 29/59] attempt to make qonnx tests match better

---
 test/pytest/test_qonnx.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index 426df8f2e0..b955608b88 100644
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -88,7 +88,9 @@ def test_tfc_2w2a(tfc_2w2a_model, backend):
     model = tfc_2w2a_model
 
     ishape = (1, 1, 28, 28)
-    X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32)
+    X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape)
+    X = (np.round(X * 2**16) * 2**-16).astype(np.float32)
+
     idict = {model.graph.input[0].name: X}
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
@@ -105,15 +107,20 @@ def test_tfc_2w2a(tfc_2w2a_model, backend):
 
 @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus'])
 def test_cnv_2w2a(cnv_2w2a_model, backend):
+    """
+    This tests a convolution model. Note:  the batch normalizations weights not quantized, so it
+    is difficult to make this match perfectly. It is also a slow test.
+    """
     model = cnv_2w2a_model
 
     ishape = (1, 32, 32, 3)
-    X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32)
+    X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape)
+    X = (np.round(X * 2**6) * 2**-6).astype(np.float32)
     idict = {model.graph.input[0].name: X}
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
     # Convert QONNX model, compile, and run inference
-    config = hls4ml.utils.config_from_onnx_model(model, backend=backend, default_precision='fixed<32,16>')
+    config = hls4ml.utils.config_from_onnx_model(model, backend=backend, default_precision='fixed<32,6>')
     hls_model = hls4ml.converters.convert_from_onnx_model(
         model,
         output_dir=str(test_root_path / f'hls4mlprj_qonnx_cnv-2w2a_{backend}'),
@@ -134,12 +141,13 @@ def test_jet_tagging(jettagging_model, backend):
     # Execute QONNX model inference
     # TODO make the test bigger
     ishape = (1, 16)
-    X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32)
+    X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape)
+    X = (np.round(X * 2**16) * 2**-16).astype(np.float32)
     idict = {model.graph.input[0].name: X}
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
     # Convert QONNX model, compile, and run inference
-    config = hls4ml.utils.config_from_onnx_model(model, backend=backend)
+    config = hls4ml.utils.config_from_onnx_model(model, backend=backend, default_precision='fixed<32,16>')
 
     hls_model = hls4ml.converters.convert_from_onnx_model(
         model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_jettag_{backend}'), backend=backend, hls_config=config

From 2bcec04b12ad10dcb689536cada563caafea5faf Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 12 Mar 2024 09:08:35 -0500
Subject: [PATCH 30/59] fix pre-commit

---
 hls4ml/model/optimizer/passes/move_scales.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py
index fe1acb7f94..cec69af5e8 100644
--- a/hls4ml/model/optimizer/passes/move_scales.py
+++ b/hls4ml/model/optimizer/passes/move_scales.py
@@ -4,6 +4,7 @@
 TODO:  Check that biases are properly handled. (Attempt to do it via Merge)
 
 '''
+
 import numpy as np
 
 from hls4ml.model.layers import ApplyAlpha, Constant, Conv, MatMul, Merge

From b3facd25975ac61b02270d04b60efb1fe3e455de Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 17 Apr 2024 09:59:54 -0500
Subject: [PATCH 31/59] remove count, become more selective on when True is
 returned

---
 hls4ml/model/optimizer/passes/infer_precision.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py
index 4de58a18c2..ee585c42d6 100644
--- a/hls4ml/model/optimizer/passes/infer_precision.py
+++ b/hls4ml/model/optimizer/passes/infer_precision.py
@@ -12,17 +12,11 @@ class InferPrecisionTypes(ConfigurableOptimizerPass):
     def __init__(self):
         # The option, infer_no_bias, allows you to tailor for the given weights, in particular, zero bias
         self.infer_no_bias = False
-        self.count = 0
-        self.MAX_COUNT = 1000
 
     def match(self, node):
         input_var = node.get_input_variable()
         if input_var is not None and isinstance(input_var.type, UnspecifiedPrecisionType):
-            # need to wait for the input to update
-            # but check for infinite loops
-            self.count += 1
-            if self.count == self.MAX_COUNT:
-                raise RuntimeError("There is an infinite loop in the precision inference.")
+            # only infer types if the input type is known
             return False
         for layer_type in node.types.values():
             if isinstance(layer_type.precision, UnspecifiedPrecisionType):
@@ -40,7 +34,9 @@ def transform(self, model, node):
             if type_name not in inferred_types:
                 self._infer_default_type(node, type_name)
 
-        return True  # May need to rerun
+        # if the return type was set, this may allow InferPrecisionTypes to be run
+        # on layers it was not previously able to
+        return 'result_t' in types_to_infer
 
     def _infer_precision(self, node, types_to_infer):
         node_class = node.class_name

From 0d8108eaeacc504a213a3795b1d1482cf621c4cf Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 19 Apr 2024 14:59:03 -0500
Subject: [PATCH 32/59] fix optimizer issue when quantizer is None

---
 hls4ml/model/optimizer/passes/linear.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/linear.py b/hls4ml/model/optimizer/passes/linear.py
index 1b8e3d9686..b1aee7adc7 100644
--- a/hls4ml/model/optimizer/passes/linear.py
+++ b/hls4ml/model/optimizer/passes/linear.py
@@ -37,8 +37,10 @@ def match(self, node):
     def transform(self, model, node):
         prev_node = node.get_input_node(node.inputs[0])
         quantizer = node.get_attr("quantizer")
-        prev_node.set_attr("quantizer", quantizer)
-        prev_node.types['result_t'] = quantizer.hls_type
-        prev_node.get_output_variable().type.precision = quantizer.hls_type
+        # if the activation has a quantizer (usually from a QONNX Quant node), set the previous node's output precision
+        if quantizer is not None:
+            prev_node.set_attr("quantizer", quantizer)
+            prev_node.types['result_t'] = quantizer.hls_type
+            prev_node.get_output_variable().type.precision = quantizer.hls_type
         model.remove_node(node)
         return True

From 1fa59dcd947c99851a5c5bce4301e3ef52407bdc Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 16 May 2024 11:32:10 -0500
Subject: [PATCH 33/59] update pytest image to 0.5.6

---
 test/pytest/ci-template.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/pytest/ci-template.yml b/test/pytest/ci-template.yml
index afaf90da4d..f6aa700415 100644
--- a/test/pytest/ci-template.yml
+++ b/test/pytest/ci-template.yml
@@ -1,6 +1,6 @@
 .pytest:
   stage: test
-  image: gitlab-registry.cern.ch/fastmachinelearning/hls4ml-testing:0.5.5.base
+  image: gitlab-registry.cern.ch/fastmachinelearning/hls4ml-testing:0.5.6.base
   tags:
     - k8s-default
   before_script:

From c5841a2d1754bc1b179b9a70f8bdd7463fd14f1b Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 25 Jun 2024 18:03:12 -0500
Subject: [PATCH 34/59] seperate out parse_qonnx flow

---
 hls4ml/model/optimizer/__init__.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 712dc3822c..eb53ed7925 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -31,7 +31,7 @@
 del optimizers
 
 register_flow(
-    'convert',
+    'parse_qonnx',
     [
         'reshape_constant',
         'quant_constant_parameters',
@@ -51,9 +51,16 @@
         'merge_to_apply_alpha_div',
         'matmul_const_to_dense',
         'conv_to_conv_x_d',
-        'fuse_consecutive_batch_normalization',  # needs to be before infer_precision_types
-        'merge_linear_activation',  # needs to be before infer_precision_types
-        'fuse_batch_normalization',  # needs to be before infer_precision_types
+    ],
+)
+
+register_flow(
+    'convert',
+    [
+        'fuse_consecutive_batch_normalization',
+        'merge_linear_activation',
+        'fuse_batch_normalization',
+        # The ones above here need to be before infer_precision_types
         'infer_precision_types',
         'channels_last_converter',
         'remove_transpose_before_flatten',
@@ -65,6 +72,7 @@
         'qkeras_factorize_alpha',
         'extract_ternary_threshold',
     ],
+    requires=['parse_qonnx'],
 )  # TODO Maybe not all QKeras optmizers belong here?
 
 register_flow(

From de790ca3f889d777a19da3f802a2708e8cc53788 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 26 Jun 2024 14:01:46 -0500
Subject: [PATCH 35/59] Again allow for None in target shape--for pytorch

---
 hls4ml/model/layers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index 0db82ff411..d40d0f04b1 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -402,9 +402,9 @@ def initialize(self):
             else:
                 raise RuntimeError("Reshape for ONNX requires the target shape to be a second input.")
 
-        # nones should not exist here
+        # remove Nones -- Seems to be used by pytorch parser
         if target_shape[0] is None:
-            raise RuntimeError(f"Unexpectedly have a None in {target_shape=}")
+            target_shape = target_shape[1:]
 
         # take care of -1 shapes
         shape = self._infer_output_shape(input_shape, target_shape)

From 2909d154dd49f81f0ada629f2e9bd45786a24ebf Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 18 Jul 2024 14:14:57 -0500
Subject: [PATCH 36/59] Following what seems to be done in the main branch

---
 hls4ml/model/optimizer/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index eb53ed7925..282561e11e 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -60,6 +60,7 @@
         'fuse_consecutive_batch_normalization',
         'merge_linear_activation',
         'fuse_batch_normalization',
+        'eliminate_linear_activation',
         # The ones above here need to be before infer_precision_types
         'infer_precision_types',
         'channels_last_converter',

From c9693da8106ee9bf34ce52c9003684d825a7d0e7 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 19 Jul 2024 10:18:41 -0500
Subject: [PATCH 37/59] update infer_precision based on changes in
 keras-config-auto

---
 .../model/optimizer/passes/infer_precision.py | 298 +++++++++++++-----
 1 file changed, 211 insertions(+), 87 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py
index 51422c534e..5c1801156f 100644
--- a/hls4ml/model/optimizer/passes/infer_precision.py
+++ b/hls4ml/model/optimizer/passes/infer_precision.py
@@ -1,9 +1,10 @@
 import math
+from typing import Iterable
 
 import numpy as np
 
 from hls4ml.model.optimizer import ConfigurableOptimizerPass
-from hls4ml.model.types import FixedPrecisionType, UnspecifiedPrecisionType
+from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, PrecisionType, UnspecifiedPrecisionType
 
 # TODO:  The code assumes everything is Fixed or Integer precision. Need to add checks
 
@@ -67,6 +68,12 @@ def _infer_precision(self, node, types_to_infer):
         if node_class in ['Dot']:
             return self._infer_dot_precision(node, types_to_infer)
 
+        if node_class in ['Embedding']:
+            return self._infer_embedding_precision(node, types_to_infer)
+
+        if node_class in ['SimpleRNN', 'LSTM', 'GRU']:
+            return self._infer_rnn_precision(node, types_to_infer)
+
         # What about quantized activation layer? Setting it to 'auto' manually will break it here. We should prevent
         # this in config_from_* functions
 
@@ -76,6 +83,20 @@ def _get_default_precision(self, node):
         model_config = node.model.config
         return model_config.backend.convert_precision_string(model_config.model_precision['default'])
 
+    def _get_maximum_precision(self, node):
+        model_config = node.model.config
+        if 'maximum' in model_config.model_precision:
+            return model_config.backend.convert_precision_string(model_config.model_precision['maximum'])
+        else:
+            return None
+
+    def _all_supported_types(self, types: Iterable[PrecisionType]):
+        """Are all the types supported for inference--currently Integer or Fixed"""
+        for tp in types:
+            if not isinstance(tp, (IntegerPrecisionType, FixedPrecisionType)):
+                return False
+        return True
+
     def _infer_default_type(self, node, type_name):
         model_config = node.model.config
         default_precision = model_config.backend.convert_precision_string(model_config.model_precision['default'])
@@ -96,9 +117,6 @@ def _infer_common_precision(self, node, types_to_infer, n_ops):
         inferred_types = []
 
         input_precision = node.get_input_variable().type.precision
-        input_width = input_precision.width
-        input_integers = input_precision.integer
-        input_signed = input_precision.signed
 
         if 'weight_t' in types_to_infer:
             weight_quantizer = node.get_attr('weight_quantizer', None)
@@ -110,10 +128,6 @@ def _infer_common_precision(self, node, types_to_infer, n_ops):
             node.weights['weight'].update_precision(node.types['weight_t'].precision)
             inferred_types.append('weight_t')
 
-        weight_width = node.types['weight_t'].precision.width
-        weight_integers = node.types['weight_t'].precision.integer
-        weight_signed = node.types['weight_t'].precision.signed
-
         if 'bias_t' in types_to_infer:
             bias_quantizer = node.get_attr('bias_quantizer', None)
             if bias_quantizer is not None:
@@ -124,25 +138,42 @@ def _infer_common_precision(self, node, types_to_infer, n_ops):
             node.weights['bias'].update_precision(node.types['bias_t'].precision)
             inferred_types.append('bias_t')
 
-        bias_width = node.types['bias_t'].precision.width
-        bias_integers = node.types['bias_t'].precision.integer
-        bias_signed = node.types['bias_t'].precision.signed
-        no_bias = node.weights['bias'].nonzeros == 0 and self.infer_no_bias  # no bias
+        if self._all_supported_types((input_precision, node.types['weight_t'].precision, node.types['bias_t'].precision)):
+            input_width = input_precision.width
+            input_integers = input_precision.integer
+            input_signed = input_precision.signed
 
-        # using math.ceil instead of np.ceil because it returns an int
-        bitwidth = weight_width + input_width + math.ceil(np.log2(n_ops))
-        integers = weight_integers + input_integers + math.ceil(np.log2(n_ops))
-        signed = weight_signed or input_signed
+            weight_width = node.types['weight_t'].precision.width
+            weight_integers = node.types['weight_t'].precision.integer
+            weight_signed = node.types['weight_t'].precision.signed
 
-        frac = bitwidth - integers
+            bias_width = node.types['bias_t'].precision.width
+            bias_integers = node.types['bias_t'].precision.integer
+            bias_signed = node.types['bias_t'].precision.signed
+            no_bias = node.weights['bias'].nonzeros == 0 and self.infer_no_bias  # no bias
+
+            # using math.ceil instead of np.ceil because it returns an int
+            bitwidth = weight_width + input_width + math.ceil(np.log2(n_ops))
+            integers = weight_integers + input_integers + math.ceil(np.log2(n_ops))
+            signed = weight_signed or input_signed
+
+            frac = bitwidth - integers
 
-        if not no_bias:
-            integers = max(integers + (bias_signed and not signed), bias_integers + (signed and not bias_signed)) + 1
-            bitwidth = integers + max(frac, bias_width - bias_integers)
-            signed = signed or bias_signed
+            if not no_bias:
+                integers = max(integers + (bias_signed and not signed), bias_integers + (signed and not bias_signed)) + 1
+                bitwidth = integers + max(frac, bias_width - bias_integers)
+                signed = signed or bias_signed
 
-        # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
-        new_type = FixedPrecisionType(bitwidth, integers, signed)
+            # if max_precision is specified, limit the size to be less than max precisoin
+            max_precision = self._get_maximum_precision(node)
+            if max_precision is not None:
+                bitwidth = min(bitwidth, max_precision.width)
+                integers = min(integers, max_precision.integer)
+
+            # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
+            new_type = FixedPrecisionType(bitwidth, integers, signed)
+        else:
+            new_type = self._get_default_precision(node)
 
         if 'accum_t' in types_to_infer:
             node.types['accum_t'].name = node.name + '_accum_t'
@@ -166,6 +197,7 @@ def _infer_conv_precision(self, node, types_to_infer):
         n_ops = node.get_attr('n_chan') * node.get_attr('filt_height', 1) * node.get_attr('filt_width')
         return self._infer_common_precision(node, types_to_infer, n_ops)
 
+    # This function is ignored because we will split sepconv in the future
     def _infer_sepconv_precision(self, node, types_to_infer):
         inferred_types = []
 
@@ -265,24 +297,35 @@ def _infer_bn_precision(self, node, types_to_infer):
             scale_precision = node.types['scale_t'].precision
             bias_precision = node.types['bias_t'].precision
 
-            after_scale_signed = scale_precision.signed or input_precision.signed
-            after_scale_width = input_precision.width + scale_precision.width
-            after_scale_integer = input_precision.integer + scale_precision.integer
+            if self._all_supported_types((input_precision, scale_precision, bias_precision)):
+
+                after_scale_signed = scale_precision.signed or input_precision.signed
+                after_scale_width = input_precision.width + scale_precision.width
+                after_scale_integer = input_precision.integer + scale_precision.integer
 
-            out_precision_signed = after_scale_signed or bias_precision.signed
-            out_precision_integer = (
-                max(
-                    after_scale_integer + (bias_precision.signed and not after_scale_signed),
-                    bias_precision.integer + (after_scale_signed and not bias_precision.signed),
+                out_precision_signed = after_scale_signed or bias_precision.signed
+                out_precision_integer = (
+                    max(
+                        after_scale_integer + (bias_precision.signed and not after_scale_signed),
+                        bias_precision.integer + (after_scale_signed and not bias_precision.signed),
+                    )
+                    + 1
+                )
+                out_precision_width = out_precision_integer + max(
+                    after_scale_width - after_scale_integer, bias_precision.fractional
                 )
-                + 1
-            )
-            out_precision_width = out_precision_integer + max(
-                after_scale_width - after_scale_integer, bias_precision.fractional
-            )
 
-            # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
-            out_precision = FixedPrecisionType(out_precision_width, out_precision_integer, out_precision_signed)
+                # if max_precision is specified, limit the size to be less than max precisoin
+                max_precision = self._get_maximum_precision(node)
+                if max_precision is not None:
+                    out_precision_width = min(out_precision_width, max_precision.width)
+                    out_precision_integer = min(out_precision_integer, max_precision.integer)
+
+                # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
+                out_precision = FixedPrecisionType(out_precision_width, out_precision_integer, out_precision_signed)
+
+            else:
+                out_precision = self._get_default_precision(node)
 
             node.types['result_t'].name = node.name + '_result_t'
             node.types['result_t'].precision = out_precision
@@ -298,20 +341,29 @@ def _infer_pooling_precision(self, node, types_to_infer):
             input_precision = node.get_input_variable().type.precision
             pool_op = node.attributes['pool_op'].lower()
 
-            width = input_precision.width
-            integer = input_precision.integer
-            signed = input_precision.signed
+            if pool_op == 'max':
+                # This has the benefit of working for xnor types. I don't think "copy" is needed
+                accum_type = input_precision
+
+            elif pool_op == 'average':
+                if self._all_supported_types((input_precision,)):
+                    width = input_precision.width
+                    integer = input_precision.integer
+                    signed = input_precision.signed
+
+                    pool_size = node.get_attr('pool_height', 1) * node.get_attr('pool_width')
+                    extra_bits = int(np.ceil(np.log2(pool_size)))
+
+                    # for now ignore max precision in this case
+                    accum_type = FixedPrecisionType(
+                        width=width + extra_bits * 2, integer=integer + extra_bits, signed=signed
+                    )
+                else:
+                    accum_type = self._get_default_precision(node)
 
-            pool_size = node.get_attr('pool_height', 1) * node.get_attr('pool_width')
-            if pool_op == 'average':
-                extra_bits = int(np.ceil(np.log2(pool_size)))
-            elif pool_op == 'max':
-                extra_bits = 0
             else:
                 raise ValueError(f'Unknown pooling operation: {pool_op}')
 
-            accum_type = FixedPrecisionType(width=width + extra_bits * 2, integer=integer + extra_bits, signed=signed)
-
             node.types['accum_t'].name = node.name + '_accum_t'
             node.types['accum_t'].precision = accum_type
 
@@ -331,22 +383,76 @@ def _infer_merge_precision(self, node, types_to_infer):
 
         op = node.get_attr('op').lower()
         if op in ('add', 'subtract', 'average'):
-            new_signed = input_1.signed or input_2.signed or op == 'subtract'
-            new_int = (
-                max(
-                    input_1.integer + (input_2.signed and not input_1.signed),
-                    input_2.integer + (input_1.signed and not input_2.signed),
+            if self._all_supported_types((input_1, input_2)):
+                new_signed = input_1.signed or input_2.signed or op == 'subtract'
+                new_int = (
+                    max(
+                        input_1.integer + (input_2.signed and not input_1.signed),
+                        input_2.integer + (input_1.signed and not input_2.signed),
+                    )
+                    + 1
                 )
-                + 1
-            )
-            new_width = new_int + max(input_1.fractional, input_2.fractional)
-            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+                new_width = new_int + max(input_1.fractional, input_2.fractional)
+                max_precision = self._get_maximum_precision(node)
+                if max_precision is not None:
+                    new_width = min(new_width, max_precision.width)
+                    new_int = min(new_int, max_precision.integer)
+                out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            else:
+                out_precision = self._get_default_precision(node)
         elif op == 'multiply':
-            new_signed = input_1.signed or input_2.signed
-            new_int = input_1.integer + input_2.integer
-            new_width = input_1.width + input_2.width
-            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            if self._all_supported_types((input_1, input_2)):
+                new_signed = input_1.signed or input_2.signed
+                new_int = input_1.integer + input_2.integer
+                new_width = input_1.width + input_2.width
+                # if max_precision is specified, limit the size to be less than max precisoin
+                max_precision = self._get_maximum_precision(node)
+                if max_precision is not None:
+                    new_width = min(new_width, max_precision.width)
+                    new_int = min(new_int, max_precision.integer)
+                out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            else:
+                out_precision = self._get_default_precision(node)
         elif op in ('maximum', 'minimum'):
+            if input_1 == input_2:
+                # can handle binary and potentially others
+                out_precision = input_1  # I assume copy is not necessary
+            elif self._all_supported_types((input_1, input_2)):
+                new_signed = input_1.signed or input_2.signed
+
+                input_1_integer = input_1.integer
+                input_2_integer = input_2.integer
+
+                # add one to integer if unsigned while new is signed
+                if new_signed and not input_1.signed:
+                    input_1_integer += 1
+                if new_signed and not input_2.signed:
+                    input_2_integer += 1
+
+                new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer)
+                new_int = max(input_1_integer, input_2_integer)
+                out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            else:
+                out_precision = self._get_default_precision(node)
+        else:
+            print(f'Warning: not propagating weights for type {op}')
+            out_precision = self._get_default_precision(node)
+
+        node.types['result_t'].name = node.name + '_result_t'
+        node.types['result_t'].precision = out_precision
+
+        return ['result_t']
+
+    def _infer_cat_precision(self, node, types_to_infer):
+        assert 'result_t' in types_to_infer and len(types_to_infer) == 1
+
+        input_1 = node.get_input_variable(node.inputs[0]).type.precision
+        input_2 = node.get_input_variable(node.inputs[1]).type.precision
+
+        if input_1 == input_2:
+            # can handle binary and potentially others
+            out_precision = input_1  # I assume copy is not necessary
+        elif self._all_supported_types((input_1, input_2)):
             new_signed = input_1.signed or input_2.signed
 
             input_1_integer = input_1.integer
@@ -360,9 +466,15 @@ def _infer_merge_precision(self, node, types_to_infer):
 
             new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer)
             new_int = max(input_1_integer, input_2_integer)
+
+            # if max_precision is specified, limit the size to be less than max precisoin
+            max_precision = self._get_maximum_precision(node)
+            if max_precision is not None:
+                new_width = min(new_width, max_precision.width)
+                new_int = min(new_int, max_precision.integer)
+
             out_precision = FixedPrecisionType(new_width, new_int, new_signed)
         else:
-            print(f'Warning: not propagating weights for type {op}')
             out_precision = self._get_default_precision(node)
 
         node.types['result_t'].name = node.name + '_result_t'
@@ -370,46 +482,58 @@ def _infer_merge_precision(self, node, types_to_infer):
 
         return ['result_t']
 
-    def _infer_cat_precision(self, node, types_to_infer):
+    def _infer_dot_precision(self, node, types_to_infer):
         assert 'result_t' in types_to_infer and len(types_to_infer) == 1
 
         input_1 = node.get_input_variable(node.inputs[0]).type.precision
         input_2 = node.get_input_variable(node.inputs[1]).type.precision
 
-        new_signed = input_1.signed or input_2.signed
+        if self._all_supported_types((input_1, input_2)):
+            n_in = node.get_input_variable(node.inputs[0]).shape[0]
 
-        input_1_integer = input_1.integer
-        input_2_integer = input_2.integer
-
-        # add one to integer if unsigned while new is signed
-        if new_signed and not input_1.signed:
-            input_1_integer += 1
-        if new_signed and not input_2.signed:
-            input_2_integer += 1
+            new_signed = input_1.signed or input_2.signed
+            new_width = input_1.width + input_2.width + math.ceil(np.log2(n_in))
+            new_int = input_1.integer + input_2.integer + math.ceil(np.log2(n_in))
 
-        new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer)
-        new_int = max(input_1_integer, input_2_integer)
+            # if max_precision is specified, limit the size to be less than max precisoin
+            max_precision = self._get_maximum_precision(node)
+            if max_precision is not None:
+                new_width = min(new_width, max_precision.width)
+                new_int = min(new_int, max_precision.integer)
 
-        out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+        else:
+            out_precision = self._get_default_precision(node)
         node.types['result_t'].name = node.name + '_result_t'
         node.types['result_t'].precision = out_precision
 
         return ['result_t']
 
-    def _infer_dot_precision(self, node, types_to_infer):
-        assert 'result_t' in types_to_infer and len(types_to_infer) == 1
+    def _infer_embedding_precision(self, node, types_to_infer):
+        inferred_types = []
 
-        input_1 = node.get_input_variable(node.inputs[0]).type.precision
-        input_2 = node.get_input_variable(node.inputs[1]).type.precision
+        if 'embeddings_t' in types_to_infer:
+            self._infer_default_type(node, 'embeddings_t')
+            node.weights['embeddings'].update_precision(node.types['embeddings_t'].precision)
+            inferred_types.append('embeddings_t')
+
+        if 'result_t' in types_to_infer:
+            out_precision = self._get_default_precision(node)
+            node.types['result_t'].name = node.name + '_result_t'
+            node.types['result_t'].precision = out_precision
+            inferred_types.append('result_t')
 
-        n_in = node.get_input_variable(node.inputs[0]).shape[0]
+        return inferred_types
 
-        new_signed = input_1.signed or input_2.signed
-        new_width = input_1.width + input_2.width + math.ceil(np.log2(n_in))
-        new_int = input_1.integer + input_2.integer + math.ceil(np.log2(n_in))
+    # TODO:  This is just a placeholder
+    def _infer_rnn_precision(self, node, types_to_infer):
+        inferred_types = []
 
-        out_precision = FixedPrecisionType(new_width, new_int, new_signed)
-        node.types['result_t'].name = node.name + '_result_t'
-        node.types['result_t'].precision = out_precision
+        # for now just do the weights and leave the rest for the default catch
+        for weightvar in ('weight', 'bias', 'recurrent_weight', 'recurrent_bias'):
+            if f'{weightvar}_t' in types_to_infer:
+                self._infer_default_type(node, f'{weightvar}_t')
+                node.weights[weightvar].update_precision(node.types[f'{weightvar}_t'].precision)
+                inferred_types.append(f'{weightvar}_t')
 
-        return ['result_t']
+        return inferred_types

From aaaa2fcfe01a8aed2efb09707f2eb423366dac1e Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 19 Jul 2024 10:20:26 -0500
Subject: [PATCH 38/59] loosen batchnorm merging restrictions, fix ternary
 handling

---
 hls4ml/model/optimizer/__init__.py            |  4 +--
 .../model/optimizer/passes/batchnorm_opt.py   | 32 +++++++++----------
 hls4ml/model/optimizer/passes/qkeras.py       | 10 +++++-
 test/pytest/test_qkeras.py                    |  4 ++-
 4 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 282561e11e..c6270d8f28 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -61,6 +61,8 @@
         'merge_linear_activation',
         'fuse_batch_normalization',
         'eliminate_linear_activation',
+        'qkeras_factorize_alpha',
+        'extract_ternary_threshold',
         # The ones above here need to be before infer_precision_types
         'infer_precision_types',
         'channels_last_converter',
@@ -70,8 +72,6 @@
         'fuse_bias_add',
         'expand_layer_group',
         'output_rounding_saturation_mode',
-        'qkeras_factorize_alpha',
-        'extract_ternary_threshold',
     ],
     requires=['parse_qonnx'],
 )  # TODO Maybe not all QKeras optmizers belong here?
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index 26292d7e2a..94a9a32d70 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -170,12 +170,12 @@ def match(self, node):
             s1 = node.weights['scale'].data_unquantized
             b1 = node.weights['bias'].data_unquantized
             scale_compatible = (
-                (prev_node.get_attr('scale_quantizer') is None and node.get_attr('scale_quantizer') is None)
+                (prev_node.get_attr('scale_quantizer') is None or node.get_attr('scale_quantizer') is None)
                 or (s0 == np.ones_like(s0)).all()
                 or (s1 == np.ones_like(s1)).all()
             )
             bias_compatible = (
-                (prev_node.get_attr('bias_quantizer') is None and node.get_attr('bias_quantizer') is None)
+                (prev_node.get_attr('bias_quantizer') is None or node.get_attr('bias_quantizer') is None)
                 or (b0 == np.zeros_like(b0)).all()
                 or (b1 == np.zeros_like(b1)).all()
             )
@@ -195,26 +195,24 @@ def transform(self, model, node):
         # if len(node_map[node.outputs[0]]) > 1:
         #     return False
 
-        # only merge if the types are integer or fixed
-        if (
-            not isinstance(prev_node.weights['scale'].type.precision, (IntegerPrecisionType, FixedPrecisionType))
-            or not isinstance(prev_node.weights['bias'].type.precision, (IntegerPrecisionType, FixedPrecisionType))
-            or not isinstance(node.weights['scale'].type.precision, (IntegerPrecisionType, FixedPrecisionType))
-            or not isinstance(node.weights['bias'].type.precision, (IntegerPrecisionType, FixedPrecisionType))
-        ):
-            return False
-
         s0 = prev_node.weights['scale'].data_unquantized
         b0 = prev_node.weights['bias'].data_unquantized
         s1 = node.weights['scale'].data_unquantized
         b1 = node.weights['bias'].data_unquantized
 
-        s_quantizer = (
-            node.get_attr('scale_quantizer') if (s0 == np.ones_like(s0)).all() else prev_node.get_attr('scale_quantizer')
-        )
-        b_quantizer = (
-            node.get_attr('bias_quantizer') if (b0 == np.zeros_like(b0)).all() else prev_node.get_attr('bias_quantizer')
-        )
+        if (s0 == np.ones_like(s0)).all():
+            s_quantizer = node.get_attr('scale_quantizer')
+        elif (s1 == np.ones_like(s1)).all():
+            s_quantizer = prev_node.get_attr('scale_quantizer')
+        else:
+            s_quantizer = None
+
+        if (b0 == np.ones_like(b0)).all():
+            b_quantizer = node.get_attr('bias_quantizer')
+        elif (b1 == np.ones_like(b1)).all():
+            b_quantizer = prev_node.get_attr('bias_quantizer')
+        else:
+            b_quantizer = None
 
         node.set_attr('scale_quantizer', s_quantizer)
         node.set_attr('bias_quantizer', b_quantizer)
diff --git a/hls4ml/model/optimizer/passes/qkeras.py b/hls4ml/model/optimizer/passes/qkeras.py
index a97438832d..03690bed0d 100644
--- a/hls4ml/model/optimizer/passes/qkeras.py
+++ b/hls4ml/model/optimizer/passes/qkeras.py
@@ -163,8 +163,16 @@ def transform(self, model, node):
         else:
             n_in = node.get_attr('n_out')
 
+        # the name of the new ApplyAlpha node
+        alpha_name = node.get_attr('name') + '_alpha'
+
+        # make the precision auto
+        alpha_precision = {'Precision': 'auto'}
+        model.config.set_name_config(alpha_name, alpha_precision)
+        model.config.parse_name_config(alpha_name, alpha_precision)
+
         attrs = {
-            'name': node.get_attr('name') + '_alpha',
+            'name': alpha_name,
             'class_name': 'Alpha',
             'inputs': node.outputs,
             'n_in': n_in,
diff --git a/test/pytest/test_qkeras.py b/test/pytest/test_qkeras.py
index 45d015807b..5f62475d1a 100644
--- a/test/pytest/test_qkeras.py
+++ b/test/pytest/test_qkeras.py
@@ -356,8 +356,10 @@ def test_relu_negative_slope(randX_1000_1, quantizer, backend, io_type):
     ],
 )
 def test_qactivation_kwarg(randX_100_10, activation_quantizer, weight_quantizer):
-    if activation_quantizer in ['binary', 'ternary']:
+    if activation_quantizer in ['binary']:
         name = 'bnbt_qdense_alpha'
+    elif activation_quantizer in ['ternary']:
+        name = 'bnbt_qdense_ternary_scale'
     else:
         name = f'qdense_{eval(activation_quantizer).__class__.__name__}'
 

From a2b88f4a1a9f6c4ddb06bdf50c5e5e8d21dd0eb4 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Fri, 19 Jul 2024 17:24:52 -0500
Subject: [PATCH 39/59] remove some backends from slow qonnx test

---
 test/pytest/test_qonnx.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index b955608b88..5b7b9d95c9 100644
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -105,11 +105,11 @@ def test_tfc_2w2a(tfc_2w2a_model, backend):
     np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
 
 
-@pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus'])
+@pytest.mark.parametrize('backend', ['Vitis'])
 def test_cnv_2w2a(cnv_2w2a_model, backend):
     """
-    This tests a convolution model. Note:  the batch normalizations weights not quantized, so it
-    is difficult to make this match perfectly. It is also a slow test.
+    This tests a convolution model. Note:  the batch normalizations weights not quantized, so it is
+    difficult to make this match perfectly. It is also a slow test, which is why only Vitis is tested.
     """
     model = cnv_2w2a_model
 

From ef02b4f4a45ae4c032d8ea49fc9854e8d4de7bc7 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 21 Aug 2024 11:15:05 -0500
Subject: [PATCH 40/59] move multi_dense to conv above inferming precision
 types

---
 hls4ml/model/optimizer/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index e311eb96cf..64be9903ad 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -63,6 +63,7 @@
         'eliminate_linear_activation',
         'qkeras_factorize_alpha',
         'extract_ternary_threshold',
+        'replace_multidimensional_dense_with_conv',
         'seperable_to_depthwise_and_conv',
         # The ones above here need to be before infer_precision_types
         'infer_precision_types',
@@ -74,7 +75,6 @@
         'expand_layer_group',
         'output_rounding_saturation_mode',
         'fuse_consecutive_batch_normalization',
-        'replace_multidimensional_dense_with_conv',
         'enforce_proxy_model_embedded_config',
     ],
     requires=['parse_qonnx'],

From c3ffa7bf5fde0c54b4d514ff2a18c5c1228e9549 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 21 Aug 2024 11:38:40 -0500
Subject: [PATCH 41/59] fix the default reuse factor

---
 hls4ml/utils/config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hls4ml/utils/config.py b/hls4ml/utils/config.py
index 51e87244e4..78f033c28c 100644
--- a/hls4ml/utils/config.py
+++ b/hls4ml/utils/config.py
@@ -401,6 +401,8 @@ def make_layer_config(layer):
                     precision_cfg[name] = 'auto'
                 else:
                     precision_cfg[name] = str(attr.default)
+            elif attr.name == 'reuse_factor':
+                layer_config[attr.config_name] = default_reuse_factor
             else:
                 if attr.default is not None:
                     layer_config[attr.config_name] = attr.default

From cc7652de36847360b54c99c6fb9cad3665760943 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 3 Sep 2024 11:15:53 -0500
Subject: [PATCH 42/59] Pre-commit fix

---
 hls4ml/converters/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hls4ml/converters/__init__.py b/hls4ml/converters/__init__.py
index 40515afea6..c5ff82703c 100644
--- a/hls4ml/converters/__init__.py
+++ b/hls4ml/converters/__init__.py
@@ -11,6 +11,7 @@
 from hls4ml.converters.keras_to_hls import parse_keras_model  # noqa: F401
 from hls4ml.converters.keras_to_hls import keras_to_hls, register_keras_layer_handler
 from hls4ml.converters.onnx_to_hls import parse_onnx_model  # noqa: F401
+
 # from hls4ml.converters.pytorch_to_hls import parse_pytorch_model  # noqa: F401
 from hls4ml.model import ModelGraph
 from hls4ml.utils.config import create_config

From b36fe4ff2eadd2c023550dec580a64e0dbb2b5ef Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 3 Sep 2024 19:19:48 -0500
Subject: [PATCH 43/59] fix qonnx review suggestions

---
 hls4ml/converters/__init__.py                 |  2 --
 hls4ml/converters/onnx/core.py                | 14 -------------
 .../model/optimizer/passes/batchnorm_opt.py   | 21 ++++++++-----------
 hls4ml/model/optimizer/passes/bn_fuse.py      | 11 ++++------
 4 files changed, 13 insertions(+), 35 deletions(-)

diff --git a/hls4ml/converters/__init__.py b/hls4ml/converters/__init__.py
index c5ff82703c..13e90df687 100644
--- a/hls4ml/converters/__init__.py
+++ b/hls4ml/converters/__init__.py
@@ -11,8 +11,6 @@
 from hls4ml.converters.keras_to_hls import parse_keras_model  # noqa: F401
 from hls4ml.converters.keras_to_hls import keras_to_hls, register_keras_layer_handler
 from hls4ml.converters.onnx_to_hls import parse_onnx_model  # noqa: F401
-
-# from hls4ml.converters.pytorch_to_hls import parse_pytorch_model  # noqa: F401
 from hls4ml.model import ModelGraph
 from hls4ml.utils.config import create_config
 from hls4ml.utils.symbolic_utils import LUTFunction
diff --git a/hls4ml/converters/onnx/core.py b/hls4ml/converters/onnx/core.py
index c6aaa6009c..d84ba98a95 100644
--- a/hls4ml/converters/onnx/core.py
+++ b/hls4ml/converters/onnx/core.py
@@ -29,7 +29,6 @@ def parse_matmul_layer(node, input_names, input_shapes, graph):
     'Softmax',
     'Softsign',
     'Softplus',
-    # 'Clip',
 ]
 
 activation_map = {
@@ -45,7 +44,6 @@ def parse_matmul_layer(node, input_names, input_shapes, graph):
     'Softmax': 'Softmax',
     'Softsign': 'Activation',
     'Softplus': 'Activation',
-    # 'Clip': 'Clip',
 }
 # ---------
 
@@ -69,18 +67,6 @@ def parse_activation_layer(node, input_names, input_shapes, graph):
             layer['activation'] = layer['class_name']
             layer['activ_param'] = get_onnx_attribute(node, 'alpha', 0.01)
 
-        # # Don't yet support Clip
-        # elif layer['class_name'] == 'Clip':
-        #     clip_min_node = [x for x in graph.initializer if x.name in input_names]
-        #     clip_min = clip_min_node[0].float_data[0]
-
-        #     # Check if it's relu or not
-        #     if clip_min == 0.0:
-        #         layer['class_name'] = 'Activation'
-        #         layer['activation'] = 'ReLU'
-        #     else:
-        #         raise Exception('Clip with min != 0 is not supported yet!')
-
         else:
             layer['activation'] = layer['class_name']
             layer['class_name'] = 'Activation'
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index 94a9a32d70..50bbf96e04 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -30,7 +30,7 @@ def transform(self, model, node):
 
         gamma_node = node.get_input_node(node.inputs[1])
         if not isinstance(gamma_node, Constant):
-            raise TypeError('Only consant gammas supported')
+            raise TypeError('Only constant gammas supported')
         gamma = gamma_node.attributes['value']
         attributes['gamma_data'] = gamma
         attributes['gamma_quantizer'] = gamma_node.get_attr('quantizer')
@@ -40,7 +40,7 @@ def transform(self, model, node):
 
         beta_node = node.get_input_node(node.inputs[2])
         if not isinstance(beta_node, Constant):
-            raise TypeError('Only consant betas supported')
+            raise TypeError('Only constant betas supported')
         beta = beta_node.attributes['value']
         attributes['beta_data'] = beta
         attributes['beta_quantizer'] = beta_node.get_attr('quantizer')
@@ -49,7 +49,7 @@ def transform(self, model, node):
 
         moving_mean_node = node.get_input_node(node.inputs[3])
         if not isinstance(moving_mean_node, Constant):
-            raise TypeError('Only consant moving_means supported')
+            raise TypeError('Only constant moving_means supported')
         moving_mean = moving_mean_node.attributes['value']
         attributes['mean_data'] = moving_mean
         attributes['mean_quantizer'] = moving_mean_node.get_attr('quantizer')
@@ -58,7 +58,7 @@ def transform(self, model, node):
 
         moving_variance_node = node.get_input_node(node.inputs[4])
         if not isinstance(moving_variance_node, Constant):
-            raise TypeError('Only consant moving_variances supported')
+            raise TypeError('Only constant moving_variances supported')
         moving_variance = moving_variance_node.attributes['value']
         attributes['variance_data'] = moving_variance
         attributes['variance_quantizer'] = moving_variance_node.get_attr('quantizer')
@@ -147,12 +147,14 @@ def transform(self, model, node):
 
 class FuseConsecutiveBatchNormalization(OptimizerPass):
     """
-    OptimizerPass to merge consecutive BatchNormalization layers,
-    only if the earlier one does not have quantization specified
+    OptimizerPass to merge consecutive BatchNormalization layers, only if the earlier one does not have the output type
+    specified. There is a further check on the compatibility to merge: except in cases when merging a scale of 1 or a
+    bias of 0, this does not merge when both scales or both biases are quantized.
 
     Note:  Consider restricting this to ApplyAlpha.  Batch Normalization-style quantization seems to be ignored.
 
-    Note:  This optimizer may not be safe if weights are updateable. May need to turn off.
+    Note:  This optimizer may not be safe if weights are updateable, in particular if a scale can go from ones to other
+    values or if a bias can go from zeros to other values.
     """
 
     def match(self, node):
@@ -190,11 +192,6 @@ def transform(self, model, node):
         if len(prev_map[prev_node.outputs[0]]) > 1:
             return False
 
-        # # Not sure why this part is needed
-        # node_map = node.get_output_use_map()
-        # if len(node_map[node.outputs[0]]) > 1:
-        #     return False
-
         s0 = prev_node.weights['scale'].data_unquantized
         b0 = prev_node.weights['bias'].data_unquantized
         s1 = node.weights['scale'].data_unquantized
diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py
index b3e8e454c8..000d8380ce 100644
--- a/hls4ml/model/optimizer/passes/bn_fuse.py
+++ b/hls4ml/model/optimizer/passes/bn_fuse.py
@@ -7,8 +7,10 @@
 
 class FuseBatchNormalization(OptimizerPass):
     """
-    OptimizerPass to merge BatchNormalization layers,
-    only if the earlier one does not have quantization specified
+    OptimizerPass to merge a BatchNormalization layer with Dense or Conv layer, only if the Dense or Conv layer does not
+    have the output type specified. There is a further check on the compatibility to merge: except in cases when merging a
+    weight/scale of 1 or a bias of 0, this optimizer does not merge nodes when both the weight and scale or both biases
+    are quantized.
 
     Note:  Consider restricting this to ApplyAlpha.  Batch Normalization quantization seems to be ignored.
 
@@ -49,11 +51,6 @@ def transform(self, model, node):
         if len(parent_map[parent_node.outputs[0]]) > 1:
             return False
 
-        # # Not sure why this part is needed
-        # node_map = node.get_output_use_map()
-        # if len(node_map[node.outputs[0]]) > 1:
-        #     return False
-
         parent_weight = parent_node.weights['weight']
         parent_bias = parent_node.weights['bias']
 

From c37d953181f64396d079c5d4b5f51dabceae8e2e Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 3 Sep 2024 19:22:44 -0500
Subject: [PATCH 44/59] fix qonnx review suggestions (part 2)

---
 hls4ml/model/optimizer/passes/merge_const.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py
index 78591d203c..a75ed27aca 100644
--- a/hls4ml/model/optimizer/passes/merge_const.py
+++ b/hls4ml/model/optimizer/passes/merge_const.py
@@ -67,7 +67,7 @@ def transform(self, model, node):
 
 
 class MergeToApplyAlpha(OptimizerPass):
-    """Convert Add, Sub, Mul, or Div Merges with consant to ApplyAlpha"""
+    """Convert Add, Sub, Mul, or Div Merges with constant to ApplyAlpha"""
 
     def match(self, node):
         is_match = (
@@ -178,7 +178,7 @@ def transform(self, model, node):
 
 class MergeToApplyAlphaDiv(OptimizerPass):
     """
-    Convert Div Merges with consant to ApplyAlpha
+    Convert Div Merges with constant to ApplyAlpha
 
     TODO:  propagate precision
     """

From 23825ded13fb418516af29fa6a4768c97bc98ba8 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 4 Sep 2024 12:10:19 -0500
Subject: [PATCH 45/59] fix error message

---
 hls4ml/model/optimizer/passes/batchnorm_opt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index 50bbf96e04..cd238092c8 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -24,7 +24,7 @@ def transform(self, model, node):
         """
 
         if not (len(node.inputs) == 5 and all(node.inputs)):
-            raise ValueError(f'All {len.node.inputs} BatchNormOnnnx inputs need to be defined')
+            raise ValueError('All 5 BatchNormOnnnx inputs need to be defined')
 
         attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
 

From cad06fa9361810fb006061a65fbea2b49feee50b Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 9 Sep 2024 14:50:21 -0500
Subject: [PATCH 46/59] change order of qonnx optimizers

---
 hls4ml/model/optimizer/__init__.py             | 2 +-
 hls4ml/model/optimizer/passes/batchnorm_opt.py | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 64be9903ad..fee180b0c5 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -37,8 +37,8 @@
         'quant_constant_parameters',
         'quant_to_activation',
         'fuse_quant_with_constant',
-        'quant_to_alpha_activation_alpha',
         'const_quant_to_const_alpha',
+        'quant_to_alpha_activation_alpha',
         'batch_norm_onnx_constant_parameters',
         'constant_batch_norm_fusion',
         'merge_two_constants',
diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index cd238092c8..0dde6b77a9 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 
 from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant
@@ -94,11 +96,14 @@ def transform(self, model, node):
         """
         Remove the batch norm
         """
+        warnings.warn('ConstantBatchNormFusion should probably not be triggered. Check the optimizer order.', stacklevel=2)
         const_node = node.get_input_node(node.inputs[0])
 
         const_prec = const_node.get_output_variable().type.precision
 
-        new_val = const_node.value * node.weights['scale'].data_unquantized + node.weights['bias'].data_unquantized
+        new_val = (
+            const_node.attributes['value'] * node.weights['scale'].data_unquantized + node.weights['bias'].data_unquantized
+        )
 
         const_node.set_attr('value', new_val)
         const_node.set_attr('quantizer', node.get_attr('quantizer'))  # None if not defined

From 51c80f96e4c6caf0f3c5310a6b334ddd008e9c1d Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 12 Sep 2024 12:16:43 -0500
Subject: [PATCH 47/59] make the optimizer oder be more similar to main branch

---
 hls4ml/model/optimizer/__init__.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index fee180b0c5..840d42ebf2 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -57,25 +57,24 @@
 register_flow(
     'convert',
     [
-        'fuse_consecutive_batch_normalization',
+        'channels_last_converter',
         'merge_linear_activation',
-        'fuse_batch_normalization',
-        'eliminate_linear_activation',
-        'qkeras_factorize_alpha',
-        'extract_ternary_threshold',
-        'replace_multidimensional_dense_with_conv',
         'seperable_to_depthwise_and_conv',
-        # The ones above here need to be before infer_precision_types
-        'infer_precision_types',
-        'channels_last_converter',
         'remove_transpose_before_flatten',
         'remove_nop_transpose',
         'remove_single_channel_transpose',
         'fuse_bias_add',
         'expand_layer_group',
         'output_rounding_saturation_mode',
+        'qkeras_factorize_alpha',
+        'extract_ternary_threshold',
         'fuse_consecutive_batch_normalization',
+        'fuse_batch_normalization',
+        'replace_multidimensional_dense_with_conv',
         'enforce_proxy_model_embedded_config',
+        'eliminate_linear_activation',
+        # many of the above optimzers need to be done before this
+        'infer_precision_types',
     ],
     requires=['parse_qonnx'],
 )  # TODO Maybe not all QKeras optmizers belong here?
@@ -83,10 +82,7 @@
 register_flow(
     'optimize',
     [
-        'eliminate_linear_activation',
         'remove_nop_batch_normalization',
-        'infer_precision_types',
-        'set_precision_concat',
     ],
     requires=['convert'],
 )

From 8eaf10a1557fb56c8abd97966e86488357dde1b7 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 19 Sep 2024 16:16:16 -0500
Subject: [PATCH 48/59] fix dimensions when moving scales

---
 hls4ml/model/layers.py                       |  2 +-
 hls4ml/model/optimizer/passes/move_scales.py | 71 +++++++++++---------
 hls4ml/model/optimizer/passes/quant_opt.py   | 22 +++---
 3 files changed, 54 insertions(+), 41 deletions(-)

diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
index e5ceaca28d..bc3bc2b1c3 100644
--- a/hls4ml/model/layers.py
+++ b/hls4ml/model/layers.py
@@ -976,7 +976,7 @@ def initialize(self):
 class BatchNormalization(Layer):
     _expected_attributes = [
         Attribute('n_in'),
-        Attribute('n_filt', default=0),
+        Attribute('n_filt', default=-1),
         WeightAttribute('scale'),
         WeightAttribute('bias'),
         TypeAttribute('scale'),
diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py
index cec69af5e8..3776a6d202 100644
--- a/hls4ml/model/optimizer/passes/move_scales.py
+++ b/hls4ml/model/optimizer/passes/move_scales.py
@@ -67,15 +67,16 @@ def transform(self, model, node):
             bias = np.array(bias1d[0])
 
         output = node.get_output_variable()
+        # to remove warning, since these get set again
+        new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')}
 
         can_propagate = False
         if not bias.shape and bias == 0:
             # zero bias, propagate through, if possible
             # (always possible if scale is scalar)
             try:
-                np.broadcast_to(scale, output.shape)  # check size compatibility
-                newscale = scale
-                newbias = np.array(0)
+                newscale = np.broadcast_to(scale, output.shape)  # check size compatibility
+                newbias = np.zeros(output.shape)
                 can_propagate = True
             except ValueError:
                 can_propagate = False
@@ -84,10 +85,9 @@ def transform(self, model, node):
         if not can_propagate and isinstance(inp[other_idx], Constant):
             # can handle nonzero bias in some cases if other value is a Constant
             try:
-                np.broadcast_to(scale, output.shape)  # check size compatibility
-                newscale = scale
-                newbias = inp[other_idx].attributes['value'] * bias
-                np.broadcast_to(newbias, output.shape)
+                newscale = np.broadcast_to(scale, output.shape)  # check size compatibility
+                newbias = np.broadcast_to(inp[other_idx].attributes['value'] * bias, output.shape)
+                new_attrs.pop('bias_precision', None)  # remove special bias precision settings
                 can_propagate = True
             except ValueError:
                 can_propagate = False
@@ -97,9 +97,10 @@ def transform(self, model, node):
 
         model.remove_node(apply_alpha)
 
-        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
-        new_node.add_weights(newscale)
-        new_node.add_bias(newbias)
+        new_attrs['scale_data'] = newscale
+        new_attrs['bias_data'] = newbias
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs])
         model.insert_node(new_node)
         return True
 
@@ -136,9 +137,11 @@ def transform(self, model, node):
         model.remove_node(in0)
         model.remove_node(in1)
 
-        new_node = model.make_node('ApplyAlpha', in0.name, in0.attributes, [x for x in node.outputs])
-        new_node.add_weights(scale)
-        new_node.add_bias(bias)
+        new_attrs = in0.attributes
+        new_attrs['scale_data'] = scale
+        new_attrs['bias_data'] = bias
+
+        new_node = model.make_node('ApplyAlpha', in0.name, new_attrs, [x for x in node.outputs])
         model.insert_node(new_node)
         return True
 
@@ -170,15 +173,16 @@ def transform(self, model, node):
             bias = np.array(bias1d[0])
 
         output = node.get_output_variable()
+        # to remove warning, since these get set again
+        new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')}
 
         can_propagate = False
         if not bias.shape and bias == 0:
             # zero bias, propagate through, if possible
             # (always possible if scale is scalar)
             try:
-                np.broadcast_to(scale, output.shape)  # check broadcastable
-                newscale = scale
-                newbias = np.array(0)
+                newscale = np.broadcast_to(scale, output.shape)  # check broadcastable
+                newbias = np.zeros(output.shape)
                 can_propagate = True
             except ValueError:
                 can_propagate = False
@@ -188,9 +192,10 @@ def transform(self, model, node):
 
         model.remove_node(apply_alpha)
 
-        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
-        new_node.add_weights(newscale)
-        new_node.add_bias(newbias)
+        new_attrs['scale_data'] = newscale
+        new_attrs['bias_data'] = newbias
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs])
         model.insert_node(new_node)
         return True
 
@@ -224,15 +229,16 @@ def transform(self, model, node):
             bias = np.array(bias1d[0])
 
         output = node.get_output_variable()
+        # to remove warning, since these get set again
+        new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')}
 
         can_propagate = False
         if not bias.shape and bias == 0:
             # zero bias, propagate through, if possible
             # (always possible if scale is scalar)
             try:
-                np.broadcast_to(scale, output.shape)  # make sure broadcastable
-                newscale = scale
-                newbias = np.array(0)
+                newscale = np.broadcast_to(scale, output.shape)  # make sure broadcastable
+                newbias = np.zeros(output.shape)
                 can_propagate = True
             except ValueError:
                 can_propagate = False
@@ -242,9 +248,10 @@ def transform(self, model, node):
 
         model.remove_node(apply_alpha)
 
-        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
-        new_node.add_weights(newscale)
-        new_node.add_bias(newbias)
+        new_attrs['scale_data'] = newscale
+        new_attrs['bias_data'] = newbias
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs])
         model.insert_node(new_node)
         return True
 
@@ -278,14 +285,15 @@ def transform(self, model, node):
             bias = np.array(bias1d[0])
 
         output = node.get_output_variable()
+        # to remove warning, since these get set again
+        new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')}
 
         can_propagate = False
         if not scale.shape and scale == 1:
             # No scale, just additional bias
             try:
-                np.broadcast_to(bias, output.shape)
-                newscale = np.array(1)
-                newbias = bias
+                newscale = np.ones(output.shape)
+                newbias = np.broadcast_to(bias, output.shape)
                 can_propagate = True
             except ValueError:
                 can_propagate = False
@@ -295,8 +303,9 @@ def transform(self, model, node):
 
         model.remove_node(apply_alpha)
 
-        new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs])
-        new_node.add_weights(newscale)
-        new_node.add_bias(newbias)
+        new_attrs['scale_data'] = newscale
+        new_attrs['bias_data'] = newbias
+
+        new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs])
         model.insert_node(new_node)
         return True
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index ed7f9701a2..69e9ca7685 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -252,11 +252,13 @@ def transform(self, model, node):
 
         # but now add the ApplyAlhpas before and after
 
+        inshape = node.get_input_variable().shape
+
         scale = node.get_attr('scale')
         bias = node.get_attr('zeropt')
 
-        attributes_scale = {}
-        attributes_rescale = {}
+        attributes_scale = {'n_filt': -1}
+        attributes_rescale = {'n_filt': -1}
 
         scale_config = copy.deepcopy(config)
         scale_name = f'{node.name}_scale'
@@ -270,16 +272,16 @@ def transform(self, model, node):
 
         firstscale = 1 / scale
         firstbias = bias
-        attributes_scale['scale_data'] = firstscale
-        attributes_scale['bias_data'] = firstbias
+        attributes_scale['scale_data'] = np.broadcast_to(firstscale, inshape)
+        attributes_scale['bias_data'] = np.broadcast_to(firstbias, inshape)
 
         scale_node = model.make_node(ApplyAlpha, scale_name, attributes_scale, [node.inputs[0]])
         model.insert_node(scale_node)
 
         rescale = scale
         rebias = -bias * scale
-        attributes_rescale['scale_data'] = rescale
-        attributes_rescale['bias_data'] = rebias
+        attributes_rescale['scale_data'] = np.broadcast_to(rescale, inshape)
+        attributes_rescale['bias_data'] = np.broadcast_to(rebias, inshape)
 
         rescale_node = model.make_node(ApplyAlpha, rescale_name, attributes_rescale, [new_node.outputs[0]])
         model.insert_node(rescale_node)
@@ -332,7 +334,9 @@ def transform(self, model, node):
         const_node.types['result_t'].precision = precision
         const_node.get_output_variable().type.precision = precision
 
-        attributes_rescale = {}
+        inshape = node.get_input_variable().shape
+
+        attributes_rescale = {'n_filt': -1}
 
         rescale_config = copy.deepcopy(model.config.get_layer_config(node))
         rescale_name = f'{node.name}_rescale'
@@ -341,8 +345,8 @@ def transform(self, model, node):
 
         rescale = scale
         rebias = -bias * scale
-        attributes_rescale['scale_data'] = rescale
-        attributes_rescale['bias_data'] = rebias
+        attributes_rescale['scale_data'] = np.broadcast_to(rescale, inshape)
+        attributes_rescale['bias_data'] = np.broadcast_to(rebias, inshape)
 
         rescale_node = model.make_node(
             ApplyAlpha, rescale_name, attributes_rescale, [x for x in node.inputs], [x for x in node.outputs]

From d80dc3b410d2a2578a79aae905530a92e7b732a1 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 19 Sep 2024 21:44:22 -0500
Subject: [PATCH 49/59] Added support and some missing parts for `Depthwise`
 and `Pointwise` Convolutions from QONNX

---
 hls4ml/converters/onnx/convolution.py         | 11 ++-
 hls4ml/model/optimizer/__init__.py            |  1 +
 .../model/optimizer/passes/conv_to_convxd.py  | 16 ++--
 .../passes/conv_to_depthwiseconvxd.py         | 94 +++++++++++++++++++
 4 files changed, 113 insertions(+), 9 deletions(-)
 create mode 100644 hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py

diff --git a/hls4ml/converters/onnx/convolution.py b/hls4ml/converters/onnx/convolution.py
index 85dc0ca804..d84fb855a8 100644
--- a/hls4ml/converters/onnx/convolution.py
+++ b/hls4ml/converters/onnx/convolution.py
@@ -21,13 +21,18 @@ def parse_conv_layer(node, input_names, input_shapes, graph):
     if dilations is None:
         dilations = [1] * len(layer['kernel_shape'])
 
-    if get_onnx_attribute(node, 'group') != 1:
-        raise ValueError("Only 1 group supported corrently")
-
     layer['in_width'] = input_shapes[0][-2]
     layer['n_chan'] = input_shapes[0][-1]
     layer['n_filt'] = input_shapes[1][0]
 
+    layer['group'] = int(get_onnx_attribute(node, 'group'))
+    if layer['group'] != 1:
+        layer['depth_multiplier'] = get_onnx_attribute(node, 'group') / layer['n_chan']
+        if not layer['depth_multiplier'].is_integer():
+            raise ValueError('Depth multiplier must be an integer')
+        else:
+            layer['depth_multiplier'] = int(layer['depth_multiplier'])
+
     layer['n_dim'] = len(input_shapes[0]) - 2  # 2 comes from channels and batch dimentions
     if layer['n_dim'] not in (1, 2):
         raise ValueError("Only 1D and 2D convolutions are supported")
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 840d42ebf2..10f652345f 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -51,6 +51,7 @@
         'merge_to_apply_alpha_div',
         'matmul_const_to_dense',
         'conv_to_conv_x_d',
+        'conv_to_depthwise_conv_x_d',
     ],
 )
 
diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py
index 6fb88ad0d0..25ac50ba40 100644
--- a/hls4ml/model/optimizer/passes/conv_to_convxd.py
+++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py
@@ -29,12 +29,16 @@ class ConvToConvXD(OptimizerPass):
     """Convert Conv with constant to a Conv1D or Conv2D layer"""
 
     def match(self, node):
-        is_match = isinstance(node, Conv) and (
-            (len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant))
-            or (
-                len(node.inputs) == 3
-                and isinstance(node.get_input_node(node.inputs[1]), Constant)
-                and isinstance(node.get_input_node(node.inputs[2]), Constant)
+        is_match = (
+            isinstance(node, Conv)
+            and node.get_attr('group') == 1
+            and (
+                (len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant))
+                or (
+                    len(node.inputs) == 3
+                    and isinstance(node.get_input_node(node.inputs[1]), Constant)
+                    and isinstance(node.get_input_node(node.inputs[2]), Constant)
+                )
             )
         )
 
diff --git a/hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py b/hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py
new file mode 100644
index 0000000000..26603c6a64
--- /dev/null
+++ b/hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py
@@ -0,0 +1,94 @@
+import numpy as np
+
+from hls4ml.model.layers import Constant, Conv, DepthwiseConv1D, DepthwiseConv2D
+from hls4ml.model.optimizer import OptimizerPass
+
+# these are attributes to copy
+_base_attributes = (
+    'in_width',
+    'out_width',
+    'n_chan',
+    'n_filt',
+    'pad_left',
+    'pad_right',
+    'filt_width',
+    'stride_width',
+    'dilation_width',
+    'in_height',
+    'out_height',
+    'pad_top',
+    'pad_bottom',
+    'filt_height',
+    'stride_height',
+    'dilation_height',
+    'data_format',
+)
+
+
+class ConvToDepthwiseConvXD(OptimizerPass):
+    """Convert Conv with constant to a DepthwiseConv1D or DepthwiseConv2D layer"""
+
+    def match(self, node):
+        is_match = (
+            isinstance(node, Conv)
+            and node.get_attr('group') == node.get_attr('n_chan')
+            and (node.get_attr('group') != 1)
+            and (
+                (len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant))
+                or (
+                    len(node.inputs) == 3
+                    and isinstance(node.get_input_node(node.inputs[1]), Constant)
+                    and isinstance(node.get_input_node(node.inputs[2]), Constant)
+                )
+            )
+        )
+
+        return is_match
+
+    def transform(self, model, node):
+        """Convert Conv with constant to a DepthwiseConv1D or DepthwiseConv2D layer"""
+
+        weight_node = node.get_input_node(node.inputs[1])
+        weight_data = weight_node.attributes['value']
+        bias_node = None
+        if len(node.inputs) == 3:
+            bias_node = node.get_input_node(node.inputs[2])
+
+        # creating the attributes
+        attributes = {k: node.attributes.get(k, None) for k in _base_attributes}
+
+        # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C)
+        if node.attributes['n_dim'] == 1:
+            newtype = DepthwiseConv1D
+            attributes['depthwise_data'] = np.transpose(weight_data, (1, 2, 0))
+        else:
+            newtype = DepthwiseConv2D
+            attributes['depthwise_data'] = np.transpose(weight_data, (1, 2, 3, 0))
+        attributes['depthwise_quantizer'] = weight_node.get_attr('quantizer')
+
+        if bias_node:
+            attributes['bias_data'] = bias_node.attributes['value']
+            attributes['bias_quantizer'] = bias_node.get_attr('quantizer')
+            attributes['use_bias'] = True
+        else:
+            attributes['bias_data'] = np.zeros(attributes['n_filt'])
+            attributes['use_bias'] = False
+
+        # get the configuration name
+        config = model.config.get_layer_config(node)
+        new_name = f'{newtype.__name__}_{node.name}'
+        model.config.set_name_config(new_name, config)
+        model.config.parse_name_config(new_name, config)
+
+        # making new node
+        new_node = model.make_node(newtype, new_name, attributes, [node.inputs[0]], [x for x in node.outputs])
+
+        # removing and replacing old nodes
+        if bias_node:
+            model.remove_node(bias_node, rewire=False)
+            del node.inputs[2]
+        model.remove_node(weight_node, rewire=False)
+        del node.inputs[1]
+        model.replace_node(node, new_node)
+
+        return True

From fae647d6df5e2256591eed362ebb22375e4f4efc Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 23 Sep 2024 16:51:00 -0500
Subject: [PATCH 50/59] add seperable conv to test

---
 example-models            |  2 +-
 test/pytest/test_qonnx.py | 44 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/example-models b/example-models
index ff74f73dbc..a81e36eb16 160000
--- a/example-models
+++ b/example-models
@@ -1 +1 @@
-Subproject commit ff74f73dbc253d1aa7de1603ee10ede551919548
+Subproject commit a81e36eb16593450d7661e7b9686666ddb397208
diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index 5b7b9d95c9..e9ef37578f 100644
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -14,6 +14,23 @@
 import hls4ml
 
 test_root_path = Path(__file__).parent
+example_model_path = (test_root_path / '../../example-models').resolve()
+
+
+@pytest.fixture(scope='module')
+def sep_conv_model():
+    """
+    Load separabale conv model
+    """
+    dl_file = str(example_model_path / "onnx/separable_conv_model_ch_last.onnx")
+    assert os.path.isfile(dl_file)
+    out_file = str(test_root_path / "separable_conv_model_ch_last_clean.onnx")
+
+    # cleanup
+    qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
+    model = ModelWrapper(out_file)
+
+    return model
 
 
 @pytest.fixture(scope='module')
@@ -83,6 +100,33 @@ def jettagging_model():
     return model
 
 
+@pytest.mark.parametrize('backend', ['Vitis'])
+def test_sep_conv(sep_conv_model, backend):
+    model = sep_conv_model
+    ishape = tuple(model.get_tensor_shape(model.graph.input[0].name))
+    X = np.random.uniform(low=0, high=1, size=np.prod(ishape)).reshape(ishape)
+    # X = (np.round(X * 2**16) * 2**-16).astype(np.float32)
+    idict = {model.graph.input[0].name: X}
+    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+
+    config = hls4ml.utils.config.config_from_onnx_model(
+        model, granularity='name', backend=backend, default_precision='fixed<16,6>'
+    )
+
+    hls_model = hls4ml.converters.convert_from_onnx_model(
+        model,
+        output_dir=str(test_root_path / f'hls4mlprj_qonnx_sep_conv_{backend}'),
+        io_type='io_stream',
+        backend=backend,
+        hls_config=config,
+    )
+    hls_model.compile()
+    y_hls4ml = hls_model.predict(np.ascontiguousarray(X))
+
+    np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1)
+    print('test')
+
+
 @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus'])
 def test_tfc_2w2a(tfc_2w2a_model, backend):
     model = tfc_2w2a_model

From 56c85a442e0aee27fae8fa457fa273e0ec111a95 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 24 Sep 2024 00:15:47 -0500
Subject: [PATCH 51/59] fix pointwise with naming, quant_opt

---
 hls4ml/backends/catapult/passes/pointwise.py | 8 ++++----
 hls4ml/backends/quartus/passes/pointwise.py  | 6 ++----
 hls4ml/backends/vivado/passes/pointwise.py   | 9 +++++----
 hls4ml/model/optimizer/passes/quant_opt.py   | 2 +-
 test/pytest/test_qonnx.py                    | 7 ++-----
 5 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/hls4ml/backends/catapult/passes/pointwise.py b/hls4ml/backends/catapult/passes/pointwise.py
index 0141d7f108..fd464ef172 100755
--- a/hls4ml/backends/catapult/passes/pointwise.py
+++ b/hls4ml/backends/catapult/passes/pointwise.py
@@ -1,5 +1,3 @@
-from copy import copy
-
 from hls4ml.backends.catapult.passes.convolution_templates import (
     Conv1DConfigTemplate,
     Conv1DFunctionTemplate,
@@ -75,8 +73,10 @@ def match(self, node):
 
     def transform(self, model, node):
         dim = node.__class__.__name__[-2:]  # '1D' or '2D'
-        pw_node = model.make_node('PointwiseConv' + dim, node.name, copy(node.attributes), node.inputs.copy())
-        pw_node.weights['bias'].data = node.weights['bias'].data
+        new_attrs = {k: v for k, v in node.attributes.items() if k not in ('trace', 'precision', 'reuse_factor')}
+        pw_node = model.make_node(
+            'PointwiseConv' + dim, node.name, new_attrs, node.inputs.copy(), outputs=node.outputs.copy()
+        )
         # Set strategy to ensure lowercase string is passed to the template
         if model.config.is_resource_strategy(pw_node):
             pw_node.set_attr('strategy', 'resource')
diff --git a/hls4ml/backends/quartus/passes/pointwise.py b/hls4ml/backends/quartus/passes/pointwise.py
index 0f7f6821ae..d65ab22569 100644
--- a/hls4ml/backends/quartus/passes/pointwise.py
+++ b/hls4ml/backends/quartus/passes/pointwise.py
@@ -1,5 +1,3 @@
-from copy import copy
-
 from hls4ml.backends.fpga.fpga_layers import PointwiseConv1D, PointwiseConv2D
 from hls4ml.backends.quartus.passes.convolution_templates import (
     Conv1DConfigTemplate,
@@ -81,10 +79,10 @@ def match(self, node):
 
     def transform(self, model, node):
         dim = node.__class__.__name__[-2:]  # '1D' or '2D'
+        new_attrs = {k: v for k, v in node.attributes.items() if k not in ('trace', 'precision', 'reuse_factor')}
         pw_node = model.make_node(
-            'PointwiseConv' + dim, node.name, copy(node.attributes), node.inputs.copy(), outputs=node.outputs.copy()
+            'PointwiseConv' + dim, node.name, new_attrs, node.inputs.copy(), outputs=node.outputs.copy()
         )
-        pw_node.weights['bias'].data = node.weights['bias'].data
         model.replace_node(node, pw_node)
 
         return True
diff --git a/hls4ml/backends/vivado/passes/pointwise.py b/hls4ml/backends/vivado/passes/pointwise.py
index 85d2635cb8..34568b09f7 100644
--- a/hls4ml/backends/vivado/passes/pointwise.py
+++ b/hls4ml/backends/vivado/passes/pointwise.py
@@ -1,5 +1,3 @@
-from copy import copy
-
 from hls4ml.backends.fpga.fpga_layers import PointwiseConv1D, PointwiseConv2D
 from hls4ml.backends.vivado.passes.convolution_templates import (
     Conv1DConfigTemplate,
@@ -75,8 +73,11 @@ def match(self, node):
 
     def transform(self, model, node):
         dim = node.__class__.__name__[-2:]  # '1D' or '2D'
-        pw_node = model.make_node('PointwiseConv' + dim, node.name, copy(node.attributes), node.inputs.copy())
-        pw_node.weights['bias'].data = node.weights['bias'].data
+        # to remove warning, since these get set again
+        new_attrs = {k: v for k, v in node.attributes.items() if k not in ('trace', 'precision', 'reuse_factor')}
+        pw_node = model.make_node(
+            'PointwiseConv' + dim, node.name, new_attrs, node.inputs.copy(), outputs=node.outputs.copy()
+        )
         # Set strategy to ensure lowercase string is passed to the template
         if model.config.is_resource_strategy(pw_node):
             pw_node.set_attr('strategy', 'resource')
diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py
index 69e9ca7685..cac29b5040 100644
--- a/hls4ml/model/optimizer/passes/quant_opt.py
+++ b/hls4ml/model/optimizer/passes/quant_opt.py
@@ -187,7 +187,7 @@ def transform(self, model, node):
         integer = bitwidth
         scale = node.get_attr('scale')
         if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all():
-            _, exp = np.frexp(np.squeeze(scale))
+            _, exp = np.frexp(scale[0])  # know that np.all(scale[0] == scale) must be true
             integer = bitwidth + exp - 1
 
         precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode)
diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index e9ef37578f..58d8b68fe2 100644
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -20,15 +20,12 @@
 @pytest.fixture(scope='module')
 def sep_conv_model():
     """
-    Load separabale conv model
+    Load separabale conv model, already channels-last and cleaned
     """
     dl_file = str(example_model_path / "onnx/separable_conv_model_ch_last.onnx")
     assert os.path.isfile(dl_file)
-    out_file = str(test_root_path / "separable_conv_model_ch_last_clean.onnx")
 
-    # cleanup
-    qonnx.util.cleanup.cleanup(dl_file, out_file=out_file)
-    model = ModelWrapper(out_file)
+    model = ModelWrapper(dl_file)
 
     return model
 

From b0efdd6275a02eb9c18b82c29f90f30f380ac693 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Tue, 24 Sep 2024 10:45:59 -0500
Subject: [PATCH 52/59] fix ConstantBatchNormFusion

---
 hls4ml/model/optimizer/passes/batchnorm_opt.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py
index 0dde6b77a9..b6c21c7267 100644
--- a/hls4ml/model/optimizer/passes/batchnorm_opt.py
+++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py
@@ -89,6 +89,9 @@ def match(self, node):
             isinstance(node, BatchNormalization)
             and not any(node.inputs[1:])
             and isinstance(node.get_input_node(node.inputs[0]), Constant)
+            and isinstance(
+                node.get_input_node(node.inputs[0]).get_output_variable().type.precision, UnspecifiedPrecisionType
+            )
         )
         return is_match
 

From 14da6f5d2be0feb6a65b1c0c626631a19b70041e Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 25 Sep 2024 09:13:01 -0500
Subject: [PATCH 53/59] update broadcasting for moving scales for conv

---
 hls4ml/model/optimizer/passes/move_scales.py | 27 ++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py
index 3776a6d202..1197480eaa 100644
--- a/hls4ml/model/optimizer/passes/move_scales.py
+++ b/hls4ml/model/optimizer/passes/move_scales.py
@@ -237,9 +237,21 @@ def transform(self, model, node):
             # zero bias, propagate through, if possible
             # (always possible if scale is scalar)
             try:
-                newscale = np.broadcast_to(scale, output.shape)  # make sure broadcastable
+                if scale.ndim > 1:
+                    # undo any broadcast_to
+                    reduced_scale = _remove_redundant_dims(scale)
+                    if reduced_scale.shape[-1] == 1:
+                        reduced_scale = reduced_scale[..., 0]
+                        if node.attributes['n_dim'] == 1:
+                            scale_trans = np.transpose(reduced_scale, (1, 0))
+                        else:
+                            scale_trans = np.transpose(reduced_scale, (1, 2, 0))
+                        newscale = np.broadcast_to(scale_trans, output.shape)  # make sure broadcastable
+                        can_propagate = True
+                else:
+                    newscale = np.broadcast_to(scale, output.shape)  # make sure broadcastable
+                    can_propagate = True
                 newbias = np.zeros(output.shape)
-                can_propagate = True
             except ValueError:
                 can_propagate = False
 
@@ -309,3 +321,14 @@ def transform(self, model, node):
         new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs])
         model.insert_node(new_node)
         return True
+
+
+def _remove_redundant_dims(X):
+    """This is somewhat of the inverse of broadcast-to. It sets the dimension size to 1 if all values are identical"""
+
+    shape = X.shape
+    for i in range(len(shape)):
+        reduced = np.expand_dims(np.take(X, 0, axis=i), axis=i)
+        if np.all(reduced == X):
+            X = reduced
+    return X

From 0333d36894e4378081639c76f0c0d7ac0f9d3d52 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 26 Sep 2024 09:18:38 -0500
Subject: [PATCH 54/59] snapshot of current development

---
 hls4ml/model/optimizer/__init__.py           |   1 +
 hls4ml/model/optimizer/passes/move_scales.py | 274 +++++++++++--------
 2 files changed, 168 insertions(+), 107 deletions(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 10f652345f..d82d45668d 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -43,6 +43,7 @@
         'constant_batch_norm_fusion',
         'merge_two_constants',
         'scale_down_add',
+        'bias_down_add',
         'scale_down_mat_mul',
         'scale_down_weight_conv',
         'scale_down_bias_conv',
diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py
index 1197480eaa..ecf1099ee5 100644
--- a/hls4ml/model/optimizer/passes/move_scales.py
+++ b/hls4ml/model/optimizer/passes/move_scales.py
@@ -56,15 +56,7 @@ def transform(self, model, node):
         scale = apply_alpha.weights['scale'].data_unquantized
         bias = apply_alpha.weights['bias'].data_unquantized
 
-        scale1d = np.ravel(scale)
-        if (scale1d[0] == scale).all():
-            # scalar scale
-            scale = np.array(scale1d[0])
-
-        bias1d = np.ravel(bias)
-        if (bias1d[0] == bias).all():
-            # scalar bias
-            bias = np.array(bias1d[0])
+        scale, bias = _make_scalar(scale, bias)
 
         output = node.get_output_variable()
         # to remove warning, since these get set again
@@ -146,73 +138,54 @@ def transform(self, model, node):
         return True
 
 
-class ScaleDownConv(OptimizerPass):
-    '''Shift an ApplyAlpha on input below a Conv'''
+class BiasDownAdd(OptimizerPass):
+    '''Shift a ApplyAlpha with only bias below a Merge (Add)'''
 
     def match(self, node):
-        '''Shift an ApplyAlpha from the Weight'''
-        is_match = isinstance(node, Conv) and isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
-
+        '''Match if there is only one ApplyAlpha. If there are two, if the scale of both is 0, they would
+        match the ScaleDownAdd, so this optimizer does not need to handle that case.
+        '''
+        is_match = isinstance(node, Merge) and len(node.inputs) == 2 and node.attributes["op"] == "add"
+        if is_match:
+            in0 = node.get_input_node(node.inputs[0])
+            in1 = node.get_input_node(node.inputs[1])
+            is_match = (
+                (isinstance(in0, ApplyAlpha)
+                 or isinstance(in1, ApplyAlpha))
+                and not (isinstance(in0, ApplyAlpha)
+                         and isinstance(in1, ApplyAlpha))
+            )  # only one ApplyAlpha
         return is_match
 
     def transform(self, model, node):
-        apply_alpha = node.get_input_node(node.inputs[0])
-
-        # Check if we can move
-        scale = apply_alpha.weights['scale'].data_unquantized
-        bias = apply_alpha.weights['bias'].data_unquantized
-
-        scale1d = np.ravel(scale)
-        if (scale1d[0] == scale).all():
-            # scalar scale
-            scale = np.array(scale1d[0])
-
-        bias1d = np.ravel(bias)
-        if (bias1d[0] == bias).all():
-            # scalar bias
-            bias = np.array(bias1d[0])
+        in0 = node.get_input_node(node.inputs[0])
+        in1 = node.get_input_node(node.inputs[1])
 
-        output = node.get_output_variable()
-        # to remove warning, since these get set again
-        new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')}
+        alpha_node = in0 if isinstance(in0, ApplyAlpha) else in1
 
-        can_propagate = False
-        if not bias.shape and bias == 0:
-            # zero bias, propagate through, if possible
-            # (always possible if scale is scalar)
-            try:
-                newscale = np.broadcast_to(scale, output.shape)  # check broadcastable
-                newbias = np.zeros(output.shape)
-                can_propagate = True
-            except ValueError:
-                can_propagate = False
+        # Check if we can move
+        scale = alpha_node.weights['scale'].data_unquantized
 
-        if not can_propagate:
+        if (scale == 0).all():
+            model.remove_node(alpha_node)
+            new_node = model.make_node('ApplyAlpha', alpha_node.name, alpha_node.attributes, [x for x in node.outputs])
+            model.insert_node(new_node)
+            return True
+        else:
             return False
 
-        model.remove_node(apply_alpha)
-
-        new_attrs['scale_data'] = newscale
-        new_attrs['bias_data'] = newbias
-
-        new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs])
-        model.insert_node(new_node)
-        return True
-
 
-class ScaleDownWeightConv(OptimizerPass):
-    '''Shift an ApplyAlpha weight (from conv side) below a Conv'''
+class ScaleDownConv(OptimizerPass):
+    '''Shift an ApplyAlpha on input below a Conv'''
 
     def match(self, node):
         '''Shift an ApplyAlpha from the Weight'''
-        is_match = (
-            isinstance(node, Conv) and len(node.inputs) > 1 and isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha)
-        )
+        is_match = isinstance(node, Conv) and isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
 
         return is_match
 
     def transform(self, model, node):
-        apply_alpha = node.get_input_node(node.inputs[1])
+        apply_alpha = node.get_input_node(node.inputs[0])
 
         # Check if we can move
         scale = apply_alpha.weights['scale'].data_unquantized
@@ -237,21 +210,9 @@ def transform(self, model, node):
             # zero bias, propagate through, if possible
             # (always possible if scale is scalar)
             try:
-                if scale.ndim > 1:
-                    # undo any broadcast_to
-                    reduced_scale = _remove_redundant_dims(scale)
-                    if reduced_scale.shape[-1] == 1:
-                        reduced_scale = reduced_scale[..., 0]
-                        if node.attributes['n_dim'] == 1:
-                            scale_trans = np.transpose(reduced_scale, (1, 0))
-                        else:
-                            scale_trans = np.transpose(reduced_scale, (1, 2, 0))
-                        newscale = np.broadcast_to(scale_trans, output.shape)  # make sure broadcastable
-                        can_propagate = True
-                else:
-                    newscale = np.broadcast_to(scale, output.shape)  # make sure broadcastable
-                    can_propagate = True
+                newscale = np.broadcast_to(scale, output.shape)  # check broadcastable
                 newbias = np.zeros(output.shape)
+                can_propagate = True
             except ValueError:
                 can_propagate = False
 
@@ -268,57 +229,141 @@ def transform(self, model, node):
         return True
 
 
-class ScaleDownBiasConv(OptimizerPass):
-    '''Shift an ApplyAlpha bias (from conv side) below a Conv'''
+class ScaleDownConv(OptimizerPass):
+    '''Shift an ApplyAlpha on a Conv with 2-3 inputs'''
 
     def match(self, node):
         '''Shift an ApplyAlpha from the Weight'''
         is_match = (
-            isinstance(node, Conv) and len(node.inputs) > 2 and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha)
+            isinstance(node, Conv) and len(node.inputs) > 1 and
+            (isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
+             or isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha)
+             or (len(node.inputs) == 3 and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha)))
         )
-
         return is_match
 
     def transform(self, model, node):
-        apply_alpha = node.get_input_node(node.inputs[2])
-
-        # Check if we can move
-        scale = apply_alpha.weights['scale'].data_unquantized
-        bias = apply_alpha.weights['bias'].data_unquantized
-
-        scale1d = np.ravel(scale)
-        if (scale1d[0] == scale).all():
-            # scalar scale
-            scale = np.array(scale1d[0])
-
-        bias1d = np.ravel(bias)
-        if (bias1d[0] == bias).all():
-            # scalar bias
-            bias = np.array(bias1d[0])
+        in0 = node.get_input_node(node.inputs[0])
+        in1 = node.get_input_node(node.inputs[1])
+        in2 = node.get_input_node(node.inputs[2]) if len(node.inputs) == 3 else None
+
+        aa0 = isinstance(in0, ApplyAlpha)
+        aa1 = isinstance(in1, ApplyAlpha)
+        aa2 = isinstance(in2, ApplyAlpha) if len(node.inputs) == 3 else False
+
+        if not isinstance(in1, (Constant, ApplyAlpha)):
+            raise RuntimeError("The weight node needs to be ApplyAlpha or Constant")
+        if len(node.inputs) == 3 and not isinstance(in2, (Constant, ApplyAlpha)):
+            raise RuntimeError("The bias node needs to be ApplyAlpha or Constant")
+
+        scale0 = in0.weights['scale'].data_unquantized if aa0 else None
+        bias0 = in0.weights['bias'].data_unquantized if aa0 else None
+        scale1 = in1.weights['scale'].data_unquantized if aa1 else None
+        bias1 = in1.weights['bias'].data_unquantized if aa1 else None
+        scale2 = in2.weights['scale'].data_unquantized if aa2 else None
+        bias2 = in2.weights['bias'].data_unquantized if aa2 else None
+
+        # If possible, make scale and bias have scalar values
+        if aa0:
+            scale0, bias0 = _make_scalar(scale0, bias0)
+        if aa1:
+            scale1, bias1 = _make_scalar(scale1, bias1)
+        if aa2:
+            scale2, bias2 = _make_scalar(scale2, bias2)
 
         output = node.get_output_variable()
-        # to remove warning, since these get set again
-        new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')}
-
-        can_propagate = False
-        if not scale.shape and scale == 1:
-            # No scale, just additional bias
-            try:
-                newscale = np.ones(output.shape)
-                newbias = np.broadcast_to(bias, output.shape)
-                can_propagate = True
-            except ValueError:
-                can_propagate = False
+        if (aa0 and not aa1 and not aa2):
+            # only datapath has a scale
+            bias = in2.attributes['value'] if len(node.inputs) == 3 else 0
+            conv_nobias = np.all(bias == 0)
+
+            can_propagate = False
+            if not bias0.shape and bias0 == 0:
+                # zero bias, propagate through, if possible
+                # (always possible if scale is scalar)
+                if conv_nobias:
+                    try:
+                        newscale = np.broadcast_to(_remove_redundant_dims(scale0), output.shape)  # check broadcastable
+                        newbias = np.zeros(output.shape)
+                        can_propagate = True
+                    except ValueError:
+                        can_propagate = False
+                elif not scale0.shape:
+                    # scalar scale0
+                    try:
+                        newscale = np.broadcast_to(scale0, output.shape)  # check broadcastable
+                        newbias = np.broadcast_to(bias * (1 - scale0), output.shape)
+                        can_propagate = True
+                    except ValueError:
+                        can_propagate = False
+            if not can_propagate:
+                return False
+
+            # to remove warning, since these get set again
+            new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')}
+            new_name = in0.name
+            model.remove_node(in0)
+        elif (not aa0 and aa1 and not aa2):
+            # only weights have a scale
+            bias = in2.attributes['value'] if len(node.inputs) == 3 else 0
+            conv_nobias = np.all(bias == 0)
+            
+            can_propagate = False
+            if not bias1.shape and bias1 == 0:
+                # zero bias, propagate through, if possible
+                # (always possible if scale is scalar)
+                try:
+                    if scale1.ndim > 1:
+                        # undo any broadcast_to
+                        reduced_scale = _remove_redundant_dims(scale1)
+                        if reduced_scale.shape[-1] == 1:
+                            reduced_scale = reduced_scale[..., 0]
+                            if node.attributes['n_dim'] == 1:
+                                scale_trans = np.transpose(reduced_scale, (1, 0))
+                            else:
+                                scale_trans = np.transpose(reduced_scale, (1, 2, 0))
+                            newscale = np.broadcast_to(scale_trans, output.shape)  # make sure broadcastable
+                            can_propagate = True
+                    else:
+                        newscale = np.broadcast_to(scale1, output.shape)  # make sure broadcastable
+                        can_propagate = True
+                    newbias = np.zeros(output.shape)
+                except ValueError:
+                    can_propagate = False
+
+            if not can_propagate:
+                return False
+
+            # to remove warning, since these get set again
+            new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')}
+            new_name = in1.name
+            model.remove_node(in1)
+
+        elif (not aa0 and not aa1 and aa2):
+            # only bias has a scale
+            
+            can_propagate = False
+            if not scale2.shape and scale2 == 1:
+                # No scale, just additional bias
+                try:
+                    newscale = np.ones(output.shape)
+                    newbias = np.broadcast_to(bias2, output.shape)
+                    can_propagate = True
+                except ValueError:
+                    can_propagate = False
 
-        if not can_propagate:
-            return False
+            if not can_propagate:
+                return False
 
-        model.remove_node(apply_alpha)
+            # to remove warning, since these get set again
+            new_attrs = {k: v for k, v in in2.attributes.items() if k not in ('trace', 'precision')}
+            new_name = in2.name
+            model.remove_node(in2)
 
         new_attrs['scale_data'] = newscale
         new_attrs['bias_data'] = newbias
 
-        new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs])
+        new_node = model.make_node('ApplyAlpha', new_name, new_attrs, [x for x in node.outputs])
         model.insert_node(new_node)
         return True
 
@@ -332,3 +377,18 @@ def _remove_redundant_dims(X):
         if np.all(reduced == X):
             X = reduced
     return X
+
+
+def _make_scalar(scale, bias):
+    """Make the scale and bias scalar if possible"""
+    scale1d = np.ravel(scale)
+    if (scale1d[0] == scale).all():
+        # scalar scale
+        scale = np.array(scale1d[0])
+
+    bias1d = np.ravel(bias)
+    if (bias1d[0] == bias).all():
+        # scalar bias
+        bias = np.array(bias1d[0])
+
+    return scale, bias

From 80184d21514ab617bf4950c0476aac34964616ab Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 26 Sep 2024 12:09:47 -0500
Subject: [PATCH 55/59] snapshot working through scale downs

---
 example-models                               |   2 +-
 hls4ml/model/optimizer/passes/move_scales.py | 189 ++++++++++---------
 2 files changed, 103 insertions(+), 88 deletions(-)

diff --git a/example-models b/example-models
index a81e36eb16..ff74f73dbc 160000
--- a/example-models
+++ b/example-models
@@ -1 +1 @@
-Subproject commit a81e36eb16593450d7661e7b9686666ddb397208
+Subproject commit ff74f73dbc253d1aa7de1603ee10ede551919548
diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py
index ecf1099ee5..0ccdf07c61 100644
--- a/hls4ml/model/optimizer/passes/move_scales.py
+++ b/hls4ml/model/optimizer/passes/move_scales.py
@@ -149,11 +149,8 @@ def match(self, node):
         if is_match:
             in0 = node.get_input_node(node.inputs[0])
             in1 = node.get_input_node(node.inputs[1])
-            is_match = (
-                (isinstance(in0, ApplyAlpha)
-                 or isinstance(in1, ApplyAlpha))
-                and not (isinstance(in0, ApplyAlpha)
-                         and isinstance(in1, ApplyAlpha))
+            is_match = (isinstance(in0, ApplyAlpha) or isinstance(in1, ApplyAlpha)) and not (
+                isinstance(in0, ApplyAlpha) and isinstance(in1, ApplyAlpha)
             )  # only one ApplyAlpha
         return is_match
 
@@ -175,70 +172,19 @@ def transform(self, model, node):
             return False
 
 
-class ScaleDownConv(OptimizerPass):
-    '''Shift an ApplyAlpha on input below a Conv'''
-
-    def match(self, node):
-        '''Shift an ApplyAlpha from the Weight'''
-        is_match = isinstance(node, Conv) and isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
-
-        return is_match
-
-    def transform(self, model, node):
-        apply_alpha = node.get_input_node(node.inputs[0])
-
-        # Check if we can move
-        scale = apply_alpha.weights['scale'].data_unquantized
-        bias = apply_alpha.weights['bias'].data_unquantized
-
-        scale1d = np.ravel(scale)
-        if (scale1d[0] == scale).all():
-            # scalar scale
-            scale = np.array(scale1d[0])
-
-        bias1d = np.ravel(bias)
-        if (bias1d[0] == bias).all():
-            # scalar bias
-            bias = np.array(bias1d[0])
-
-        output = node.get_output_variable()
-        # to remove warning, since these get set again
-        new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')}
-
-        can_propagate = False
-        if not bias.shape and bias == 0:
-            # zero bias, propagate through, if possible
-            # (always possible if scale is scalar)
-            try:
-                newscale = np.broadcast_to(scale, output.shape)  # check broadcastable
-                newbias = np.zeros(output.shape)
-                can_propagate = True
-            except ValueError:
-                can_propagate = False
-
-        if not can_propagate:
-            return False
-
-        model.remove_node(apply_alpha)
-
-        new_attrs['scale_data'] = newscale
-        new_attrs['bias_data'] = newbias
-
-        new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs])
-        model.insert_node(new_node)
-        return True
-
-
 class ScaleDownConv(OptimizerPass):
     '''Shift an ApplyAlpha on a Conv with 2-3 inputs'''
 
     def match(self, node):
         '''Shift an ApplyAlpha from the Weight'''
         is_match = (
-            isinstance(node, Conv) and len(node.inputs) > 1 and
-            (isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
-             or isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha)
-             or (len(node.inputs) == 3 and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha)))
+            isinstance(node, Conv)
+            and len(node.inputs) > 1
+            and (
+                isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha)
+                or isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha)
+                or (len(node.inputs) == 3 and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha))
+            )
         )
         return is_match
 
@@ -272,14 +218,14 @@ def transform(self, model, node):
             scale2, bias2 = _make_scalar(scale2, bias2)
 
         output = node.get_output_variable()
-        if (aa0 and not aa1 and not aa2):
+        if aa0 and not aa1 and not aa2:
             # only datapath has a scale
             bias = in2.attributes['value'] if len(node.inputs) == 3 else 0
             conv_nobias = np.all(bias == 0)
 
             can_propagate = False
             if not bias0.shape and bias0 == 0:
-                # zero bias, propagate through, if possible
+                # No zero offset, propagate through, if possible
                 # (always possible if scale is scalar)
                 if conv_nobias:
                     try:
@@ -303,34 +249,58 @@ def transform(self, model, node):
             new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')}
             new_name = in0.name
             model.remove_node(in0)
-        elif (not aa0 and aa1 and not aa2):
+
+        elif not aa0 and aa1 and not aa2:
             # only weights have a scale
             bias = in2.attributes['value'] if len(node.inputs) == 3 else 0
             conv_nobias = np.all(bias == 0)
-            
+
             can_propagate = False
             if not bias1.shape and bias1 == 0:
-                # zero bias, propagate through, if possible
+                # No zero offset, propagate through, if possible
                 # (always possible if scale is scalar)
-                try:
-                    if scale1.ndim > 1:
-                        # undo any broadcast_to
-                        reduced_scale = _remove_redundant_dims(scale1)
-                        if reduced_scale.shape[-1] == 1:
-                            reduced_scale = reduced_scale[..., 0]
-                            if node.attributes['n_dim'] == 1:
-                                scale_trans = np.transpose(reduced_scale, (1, 0))
-                            else:
-                                scale_trans = np.transpose(reduced_scale, (1, 2, 0))
-                            newscale = np.broadcast_to(scale_trans, output.shape)  # make sure broadcastable
+                if conv_nobias:
+                    try:
+                        if scale1.ndim > 1:
+                            # undo any broadcast_to
+                            reduced_scale0 = _remove_redundant_dims(scale0) if scale0.ndim > 1 else scale0
+                            reduced_scale1 = _remove_redundant_dims(scale1)
+                            reduced_scale = reduced_scale0 @ reduced_scale1
+                            if reduced_scale.shape[-1] == 1:
+                                reduced_scale = reduced_scale[..., 0]
+                                if node.attributes['n_dim'] == 1:
+                                    scale_trans = np.transpose(reduced_scale, (1, 0))
+                                else:
+                                    scale_trans = np.transpose(reduced_scale, (1, 2, 0))
+                                newscale = np.broadcast_to(scale_trans, output.shape)  # make sure broadcastable
+                                can_propagate = True
+                        elif scale0.ndim > 1:
+                            # scale1 is scalar
+                            # undo any broadcast_to
+                            reduced_scale0 = _remove_redundant_dims(scale0)
+                            reduced_scale = scale1 * reduced_scale0
+                            if reduced_scale.shape[-1] == 1:
+                                reduced_scale = reduced_scale[..., 0]
+                                if node.attributes['n_dim'] == 1:
+                                    scale_trans = np.transpose(reduced_scale, (1, 0))
+                                else:
+                                    scale_trans = np.transpose(reduced_scale, (1, 2, 0))
+                                newscale = np.broadcast_to(scale_trans, output.shape)  # make sure broadcastable
+                                can_propagate = True
+                        else:
+                            newscale = np.broadcast_to(scale0 * scale1, output.shape)  # make sure broadcastable
                             can_propagate = True
-                    else:
-                        newscale = np.broadcast_to(scale1, output.shape)  # make sure broadcastable
+                        newbias = np.zeros(output.shape)
+                    except ValueError:
+                        can_propagate = False
+                elif not scale0.shape and not scale1.shape:
+                    # scalar scale1
+                    try:
+                        newscale = np.broadcast_to(scale0 * scale1, output.shape)  # check broadcastable
+                        newbias = np.broadcast_to(bias * (1 - scale0 * scale1), output.shape)
                         can_propagate = True
-                    newbias = np.zeros(output.shape)
-                except ValueError:
-                    can_propagate = False
-
+                    except ValueError:
+                        can_propagate = False
             if not can_propagate:
                 return False
 
@@ -339,9 +309,9 @@ def transform(self, model, node):
             new_name = in1.name
             model.remove_node(in1)
 
-        elif (not aa0 and not aa1 and aa2):
+        elif not aa0 and not aa1 and aa2:
             # only bias has a scale
-            
+
             can_propagate = False
             if not scale2.shape and scale2 == 1:
                 # No scale, just additional bias
@@ -360,6 +330,51 @@ def transform(self, model, node):
             new_name = in2.name
             model.remove_node(in2)
 
+        elif aa0 and aa1 and not aa2:
+            # dataflow and weights have an ApplyAlpha
+            bias = in2.attributes['value'] if len(node.inputs) == 3 else 0
+            conv_nobias = np.all(bias == 0)
+
+            can_propagate = False
+            if not bias0.shape and bias0 == 0 and not bias1.shape and bias1 == 0:
+                # zero bias, propagate through, if possible
+                # (always possible if scale is scalar)
+                if conv_nobias:
+                    try:
+                        if scale1.ndim > 1:
+                            # undo any broadcast_to
+                            reduced_scale = _remove_redundant_dims(scale1)
+                            if reduced_scale.shape[-1] == 1:
+                                reduced_scale = reduced_scale[..., 0]
+                                if node.attributes['n_dim'] == 1:
+                                    scale_trans = np.transpose(reduced_scale, (1, 0))
+                                else:
+                                    scale_trans = np.transpose(reduced_scale, (1, 2, 0))
+                                newscale = np.broadcast_to(scale_trans, output.shape)  # make sure broadcastable
+                                can_propagate = True
+                        else:
+                            newscale = np.broadcast_to(scale1, output.shape)  # make sure broadcastable
+                            can_propagate = True
+                        newbias = np.zeros(output.shape)
+                    except ValueError:
+                        can_propagate = False
+                elif not scale1.shape:
+                    # scalar scale1
+                    try:
+                        newscale = np.broadcast_to(scale1, output.shape)  # check broadcastable
+                        newbias = np.broadcast_to(bias * (1 - scale1), output.shape)
+                        can_propagate = True
+                    except ValueError:
+                        can_propagate = False
+            if not can_propagate:
+                return False
+
+            # to remove warning, since these get set again
+            new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')}
+            new_name = in1.name
+            model.remove_node(in1)
+
+        # after the big if-else above
         new_attrs['scale_data'] = newscale
         new_attrs['bias_data'] = newbias
 

From 6bb08172a7f9dfeccc0ba6d6e72df21fbc0059d1 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 26 Sep 2024 16:03:34 -0500
Subject: [PATCH 56/59] finish making the various cases

---
 hls4ml/model/optimizer/passes/move_scales.py | 131 +++++++++++++++----
 1 file changed, 104 insertions(+), 27 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py
index 0ccdf07c61..b2470f2839 100644
--- a/hls4ml/model/optimizer/passes/move_scales.py
+++ b/hls4ml/model/optimizer/passes/move_scales.py
@@ -251,7 +251,7 @@ def transform(self, model, node):
             model.remove_node(in0)
 
         elif not aa0 and aa1 and not aa2:
-            # only weights have a scale
+            # only weights have an ApplyAlpha
             bias = in2.attributes['value'] if len(node.inputs) == 3 else 0
             conv_nobias = np.all(bias == 0)
 
@@ -263,22 +263,7 @@ def transform(self, model, node):
                     try:
                         if scale1.ndim > 1:
                             # undo any broadcast_to
-                            reduced_scale0 = _remove_redundant_dims(scale0) if scale0.ndim > 1 else scale0
-                            reduced_scale1 = _remove_redundant_dims(scale1)
-                            reduced_scale = reduced_scale0 @ reduced_scale1
-                            if reduced_scale.shape[-1] == 1:
-                                reduced_scale = reduced_scale[..., 0]
-                                if node.attributes['n_dim'] == 1:
-                                    scale_trans = np.transpose(reduced_scale, (1, 0))
-                                else:
-                                    scale_trans = np.transpose(reduced_scale, (1, 2, 0))
-                                newscale = np.broadcast_to(scale_trans, output.shape)  # make sure broadcastable
-                                can_propagate = True
-                        elif scale0.ndim > 1:
-                            # scale1 is scalar
-                            # undo any broadcast_to
-                            reduced_scale0 = _remove_redundant_dims(scale0)
-                            reduced_scale = scale1 * reduced_scale0
+                            reduced_scale = _remove_redundant_dims(scale1)
                             if reduced_scale.shape[-1] == 1:
                                 reduced_scale = reduced_scale[..., 0]
                                 if node.attributes['n_dim'] == 1:
@@ -288,16 +273,16 @@ def transform(self, model, node):
                                 newscale = np.broadcast_to(scale_trans, output.shape)  # make sure broadcastable
                                 can_propagate = True
                         else:
-                            newscale = np.broadcast_to(scale0 * scale1, output.shape)  # make sure broadcastable
+                            newscale = np.broadcast_to(scale1, output.shape)  # make sure broadcastable
                             can_propagate = True
                         newbias = np.zeros(output.shape)
                     except ValueError:
                         can_propagate = False
-                elif not scale0.shape and not scale1.shape:
+                elif not scale1.shape:
                     # scalar scale1
                     try:
-                        newscale = np.broadcast_to(scale0 * scale1, output.shape)  # check broadcastable
-                        newbias = np.broadcast_to(bias * (1 - scale0 * scale1), output.shape)
+                        newscale = np.broadcast_to(scale1, output.shape)  # check broadcastable
+                        newbias = np.broadcast_to(bias * (1 - scale1), output.shape)
                         can_propagate = True
                     except ValueError:
                         can_propagate = False
@@ -305,7 +290,7 @@ def transform(self, model, node):
                 return False
 
             # to remove warning, since these get set again
-            new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')}
+            new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')}
             new_name = in1.name
             model.remove_node(in1)
 
@@ -337,13 +322,28 @@ def transform(self, model, node):
 
             can_propagate = False
             if not bias0.shape and bias0 == 0 and not bias1.shape and bias1 == 0:
-                # zero bias, propagate through, if possible
+                # No zero offset, propagate through, if possible
                 # (always possible if scale is scalar)
                 if conv_nobias:
                     try:
                         if scale1.ndim > 1:
                             # undo any broadcast_to
-                            reduced_scale = _remove_redundant_dims(scale1)
+                            reduced_scale0 = _remove_redundant_dims(scale0) if scale0.ndim > 1 else scale0
+                            reduced_scale1 = _remove_redundant_dims(scale1)
+                            reduced_scale = reduced_scale0 @ reduced_scale1
+                            if reduced_scale.shape[-1] == 1:
+                                reduced_scale = reduced_scale[..., 0]
+                                if node.attributes['n_dim'] == 1:
+                                    scale_trans = np.transpose(reduced_scale, (1, 0))
+                                else:
+                                    scale_trans = np.transpose(reduced_scale, (1, 2, 0))
+                                newscale = np.broadcast_to(scale_trans, output.shape)  # make sure broadcastable
+                                can_propagate = True
+                        elif scale0.ndim > 1:
+                            # scale1 is scalar
+                            # undo any broadcast_to
+                            reduced_scale0 = _remove_redundant_dims(scale0)
+                            reduced_scale = scale1 * reduced_scale0
                             if reduced_scale.shape[-1] == 1:
                                 reduced_scale = reduced_scale[..., 0]
                                 if node.attributes['n_dim'] == 1:
@@ -353,16 +353,93 @@ def transform(self, model, node):
                                 newscale = np.broadcast_to(scale_trans, output.shape)  # make sure broadcastable
                                 can_propagate = True
                         else:
-                            newscale = np.broadcast_to(scale1, output.shape)  # make sure broadcastable
+                            newscale = np.broadcast_to(scale0 * scale1, output.shape)  # make sure broadcastable
                             can_propagate = True
                         newbias = np.zeros(output.shape)
                     except ValueError:
                         can_propagate = False
-                elif not scale1.shape:
+                elif not scale0.shape and not scale1.shape:
+                    # scalar scale1
+                    try:
+                        newscale = np.broadcast_to(scale0 * scale1, output.shape)  # check broadcastable
+                        newbias = np.broadcast_to(bias * (1 - scale0 * scale1), output.shape)
+                        can_propagate = True
+                    except ValueError:
+                        can_propagate = False
+            if not can_propagate:
+                return False
+
+            # to remove warning, since these get set again
+            new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')}
+            new_name = in1.name
+            model.remove_node(in0)
+            model.remove_node(in1)
+
+        elif aa0 and not aa1 and aa2:
+            # datapath and bias have a scale
+
+            can_propagate = False
+            if not bias0.shape and bias0 == 0 and not scale2.shape and not scale0.shape and scale2 == scale0:
+                # scalar scale0, no bais0 and scale2.
+                try:
+                    newscale = np.broadcast_to(scale0, output.shape)  # check broadcastable
+                    newbias = np.broadcast_to(bias2, output.shape)
+                    can_propagate = True
+                except ValueError:
+                    can_propagate = False
+            if not can_propagate:
+                return False
+
+            # to remove warning, since these get set again
+            new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')}
+            new_name = in0.name
+            model.remove_node(in0)
+            model.remove_node(in2)
+
+        elif not aa0 and aa1 and aa2:
+            # only weights and bias have an ApplyAlpha
+
+            can_propagate = False
+            if not bias1.shape and bias1 == 0 and not scale2.shape and not scale1.shape and scale2 == scale1:
+                # No zero offset, propagate through, if possible
+                # (always possible if scale is scalar)
+                if not scale1.shape:
                     # scalar scale1
                     try:
                         newscale = np.broadcast_to(scale1, output.shape)  # check broadcastable
-                        newbias = np.broadcast_to(bias * (1 - scale1), output.shape)
+                        newbias = np.broadcast_to(bias2, output.shape)
+                        can_propagate = True
+                    except ValueError:
+                        can_propagate = False
+            if not can_propagate:
+                return False
+
+            # to remove warning, since these get set again
+            new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')}
+            new_name = in1.name
+            model.remove_node(in1)
+
+        elif aa0 and aa1 and aa2:
+            # have all
+
+            can_propagate = False
+            if (
+                not bias0.shape
+                and bias0 == 0
+                and not bias1.shape
+                and bias1 == 0
+                and not scale2.shape
+                and not scale1.shape
+                and not scale0.shape
+                and scale2 == scale1 * scale0
+            ):
+                # No zero offset, propagate through, if possible
+                # (always possible if scale is scalar)
+                if not scale1.shape:
+                    # scalar scale1
+                    try:
+                        newscale = np.broadcast_to(scale0 * scale1, output.shape)  # check broadcastable
+                        newbias = np.broadcast_to(bias2, output.shape)
                         can_propagate = True
                     except ValueError:
                         can_propagate = False

From 766a14cf0775bfa52eb5d10a1a3cc27a4ab42d37 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 26 Sep 2024 16:11:36 -0500
Subject: [PATCH 57/59] accidentally reverted the example models

---
 example-models | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/example-models b/example-models
index ff74f73dbc..a81e36eb16 160000
--- a/example-models
+++ b/example-models
@@ -1 +1 @@
-Subproject commit ff74f73dbc253d1aa7de1603ee10ede551919548
+Subproject commit a81e36eb16593450d7661e7b9686666ddb397208

From 5ff1373d3db86239b9912a96b1a040958643790f Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 26 Sep 2024 16:35:56 -0500
Subject: [PATCH 58/59] some bug fixes

---
 example-models                               | 2 +-
 hls4ml/model/optimizer/__init__.py           | 2 --
 hls4ml/model/optimizer/passes/move_scales.py | 7 +++++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/example-models b/example-models
index a81e36eb16..3cfbcfd062 160000
--- a/example-models
+++ b/example-models
@@ -1 +1 @@
-Subproject commit a81e36eb16593450d7661e7b9686666ddb397208
+Subproject commit 3cfbcfd062f60492507d21ff0e91559b3bdd6550
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index d82d45668d..0edd549b29 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -45,8 +45,6 @@
         'scale_down_add',
         'bias_down_add',
         'scale_down_mat_mul',
-        'scale_down_weight_conv',
-        'scale_down_bias_conv',
         'scale_down_conv',
         'merge_to_apply_alpha',
         'merge_to_apply_alpha_div',
diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py
index b2470f2839..43fcaa0da7 100644
--- a/hls4ml/model/optimizer/passes/move_scales.py
+++ b/hls4ml/model/optimizer/passes/move_scales.py
@@ -418,6 +418,7 @@ def transform(self, model, node):
             new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')}
             new_name = in1.name
             model.remove_node(in1)
+            model.remove_node(in2)
 
         elif aa0 and aa1 and aa2:
             # have all
@@ -447,9 +448,11 @@ def transform(self, model, node):
                 return False
 
             # to remove warning, since these get set again
-            new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')}
-            new_name = in1.name
+            new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')}
+            new_name = in0.name
+            model.remove_node(in0)
             model.remove_node(in1)
+            model.remove_node(in2)
 
         # after the big if-else above
         new_attrs['scale_data'] = newscale

From 86abdd236f74ce39af96a6f0fc868bc7246f49f2 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Sun, 29 Sep 2024 15:01:20 -0500
Subject: [PATCH 59/59] update qonnx sepconv test

---
 test/pytest/test_qonnx.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py
index 58d8b68fe2..75c6c95c3f 100644
--- a/test/pytest/test_qonnx.py
+++ b/test/pytest/test_qonnx.py
@@ -102,12 +102,12 @@ def test_sep_conv(sep_conv_model, backend):
     model = sep_conv_model
     ishape = tuple(model.get_tensor_shape(model.graph.input[0].name))
     X = np.random.uniform(low=0, high=1, size=np.prod(ishape)).reshape(ishape)
-    # X = (np.round(X * 2**16) * 2**-16).astype(np.float32)
+    X = (np.round(X * 2**16) * 2**-16).astype(np.float32)
     idict = {model.graph.input[0].name: X}
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
     config = hls4ml.utils.config.config_from_onnx_model(
-        model, granularity='name', backend=backend, default_precision='fixed<16,6>'
+        model, granularity='name', backend=backend, default_precision='fixed<32,16>'
     )
 
     hls_model = hls4ml.converters.convert_from_onnx_model(