onnx · veralauee · Jun 26, 2023 · Mar 11, 2024
diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -3150,6 +3150,19 @@ def func(x):
             return tf.identity(x_, name=_TFOUTPUT)
         _ = self._run_test_case(func, [_OUTPUT], {_INPUT: x_val})
 
+    @check_tf_min_version("1.15")
+    @check_opset_min_version(10, "quantize_and_dequantize")
+    def test_qdq_signed_input_narrow_range(self):
+        x_shape = [3, 3, 2]
+        x_val = np.arange(-np.prod(x_shape)/2, np.prod(x_shape)/2).astype("float32").reshape(x_shape)
+        min_x = np.min(x_val)
+        max_x = np.max(x_val)
+        def func(x):
+            x_ = quantize_and_dequantize(x, min_x, max_x, signed_input=True, narrow_range=True, range_given=True)
+            return tf.identity(x_, name=_TFOUTPUT)
+        _ = self._run_test_case(func, [_OUTPUT], {_INPUT: x_val})
+
+
     @check_tf_min_version("1.15")
     @check_opset_min_version(10, "quantize_and_dequantize")
     def test_qdq_optimizer(self):

diff --git a/tf2onnx/onnx_opset/quantize.py b/tf2onnx/onnx_opset/quantize.py
@@ -39,33 +39,39 @@ def version_10(cls, ctx, node, **kwargs):
         narrow_range = node.get_attr("narrow_range").i
         num_bits = node.get_attr("num_bits").i
 
-        make_sure(
-            not narrow_range,
-            "Unable to convert node FakeQuantWithMinMaxArgs with narrow_range=%r",
+        logger.debug(
+            "Convert node FakeQuantWithMinMaxArgs with narrow_range=%r",
             narrow_range)
         make_sure(
             num_bits == 8,
             "Unable to convert node FakeQuantWithMinMaxArgs with "
             "num_bits=%r", num_bits)
 
-        scale = (amax - amin) / (2 ** num_bits - 1)
-        min_adj = np.around(amin / scale)
+        # Allow narrow_range since TensorRT requires quantized range to be (-127, 127)
+        if narrow_range:
+            scale = amax / (2**(num_bits-1)-1)
+            idtype = TensorProto.INT8
+            zero = np.zeros(np.array(amin).shape, dtype=np.int8)
+        else:
+            scale = (amax - amin) / (2 ** num_bits - 1)
+            min_adj = np.around(amin / scale)
+            idtype = TensorProto.UINT8
+            zero = np.array(-min_adj, dtype=np.uint8)
+            make_sure(
+                zero == -min_adj,
+                "Cannot convert %s node %s with "
+                "min=%r max=%r numbits=%r because zero_scale=%r "
+                "is outside uint8 boundary",
+                node.type, node.name, amin, amax, num_bits, -min_adj)
 
         dtype = ctx.get_dtype(node.input[0])
         shape = ctx.get_shape(node.input[0])
         axis = 1
-        idtype = TensorProto.UINT8
-
+
         pb_scale = ctx.make_const(
             utils.make_name("{}_scaley".format(node.name)),
             np.array(scale, dtype=np.float32))
-        zero = np.array(-min_adj, dtype=np.uint8)
-        make_sure(
-            zero == -min_adj,
-            "Cannot convert %s node %s with "
-            "min=%r max=%r numbits=%r because zero_scale=%r "
-            "is outside uint8 boundary",
-            node.type, node.name, amin, amax, num_bits, -min_adj)
+
         zero_point = ctx.make_const(
             utils.make_name("{}_zpy".format(node.name)), zero)
 

diff --git a/tf2onnx/tf_utils.py b/tf2onnx/tf_utils.py
@@ -222,7 +222,7 @@ def is_huge_shape(x):
                     progress = True
             can_fold = node.type not in ['Enter', 'Placeholder', 'PlaceholderWithDefault', 'Switch', 'Merge',
                                          'NextIteration', 'Exit', 'QuantizeAndDequantizeV2', 'QuantizeAndDequantizeV3',
-                                         'QuantizeAndDequantizeV4']
+                                         'QuantizeAndDequantizeV4', 'FakeQuantWithMinMaxVars']
             can_fold = can_fold and not node.type.startswith('Random')
             can_fold = can_fold and len(input_names) > 0 and all(inp in outputs_to_values for inp in input_names)
             # We can only fold nodes with a single output