diff --git a/hls4ml/optimization/fused_dotp/dotp_unroll.py b/hls4ml/optimization/fused_dotp/dotp_unroll.py
index e0830f17c..c99283e68 100644
--- a/hls4ml/optimization/fused_dotp/dotp_unroll.py
+++ b/hls4ml/optimization/fused_dotp/dotp_unroll.py
@@ -234,7 +234,7 @@ def nadd_max(n, w_c):
         return int(arr[idx]), int(idx + 1)
 
 
-def _compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False):
+def _compile_dense(kernel: np.ndarray, inp: np.ndarray):
     "Compile a matmul operation with MAC Tree"
     ch_in, ch_out = kernel.shape
     assert ch_out == 1, 'Only single output channel is supported for each unrolled operation'
@@ -246,7 +246,7 @@ def _compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False):
         inp = inp[np.arange(len(inp)), signs]
         kernel = np.abs(kernel)
     # ==============================================================
-    if minmal_latency:
+    if _global_config.minimal_latency_compile:
         return [_min_latency_compile_dense(kernel, inp)]
 
     r: list[float | Variable | list[Variable]] = np.empty((ch_out, 0), dtype=object).tolist()
@@ -297,18 +297,18 @@ def _compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False):
             r[i] = x
 
     if kernel2 is not None:
-        return [r] + _compile_dense(kernel2, inp, minmal_latency)
+        return [r] + _compile_dense(kernel2, inp)
     else:
         return [r]
 
 
-def compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False):
+def compile_dense(kernel: np.ndarray, inp: np.ndarray):
     out = []
 
     if not _global_config.use_ternary and np.any(kernel):
         inp = np.stack([-inp, inp], axis=1)
     for _kernel in kernel.T:  # ch_in, 1
-        r = _compile_dense(_kernel[:, None], inp, minmal_latency=minmal_latency)
+        r = _compile_dense(_kernel[:, None], inp)
         r = balanced_reduction([x[0] for x in r])
         out.append(r)
     return np.array(out).T
@@ -324,4 +324,4 @@ def compile_conv(kernel: np.ndarray, inp: list | np.ndarray, minimal_latency=Non
     ch_in = int(np.prod(_ch_in))
     inp = np.reshape(inp, ch_in)
     kernel = np.reshape(kernel, (ch_in, ch_out))
-    return compile_dense(kernel, inp, minmal_latency=minimal_latency)
+    return compile_dense(kernel, inp)
diff --git a/hls4ml/optimization/fused_dotp/resoure_surrogate.py b/hls4ml/optimization/fused_dotp/resoure_surrogate.py
index 4149695bf..99d90cba2 100644
--- a/hls4ml/optimization/fused_dotp/resoure_surrogate.py
+++ b/hls4ml/optimization/fused_dotp/resoure_surrogate.py
@@ -125,7 +125,9 @@ def trace(self, r: list | np.ndarray, name: str, pf: int = 1):
             return
         if len(arr) > 0:
             depth = max(v.depth for v in arr if isinstance(v, Variable))
+            n_depth = sum(v.n_depth for v in arr if isinstance(v, Variable))
             params['depth'] = depth
+            params['n_depth'] = n_depth
         params['pf'] = pf
         self.layers[name] = params
 
diff --git a/hls4ml/optimization/fused_dotp/symbolic_variable.py b/hls4ml/optimization/fused_dotp/symbolic_variable.py
index b57f2f7e7..a7e0f0dad 100644
--- a/hls4ml/optimization/fused_dotp/symbolic_variable.py
+++ b/hls4ml/optimization/fused_dotp/symbolic_variable.py
@@ -35,6 +35,7 @@ def __init__(
         const: float | int = 0,
         id: str | None = None,
         depth=0,
+        n_depth=0,
     ):
         """
         precision: precision of the variable. If it is a number, the Variable will define a constant.
@@ -64,6 +65,7 @@ def __init__(
         self.const = const
         self.children: tuple[Variable, ...] = ()
         self.depth = depth
+        self.n_depth = n_depth
 
         self._proper_precision = False
 
@@ -123,7 +125,7 @@ def __add__(self, other) -> 'Variable':
             const = other
 
         precision = self.precision + other
-        return Variable(precision, ancestors, operation, const, depth=self.depth + 1)
+        return Variable(precision, ancestors, operation, const, n_depth=self.n_depth, depth=self.depth)
 
     @__add__.register(VariableBase)
     def _(self, other: 'Variable'):
@@ -151,8 +153,13 @@ def _(self, other: 'Variable'):
 
         ancestors = (self, other)
         const = 0
-        depth = max(self.depth, other.depth) + 1
-        return Variable(precision, ancestors, 'add', const, depth=depth)
+        p1, p2 = self.precision, other.precision
+        I1, I2 = p1.I, p2.I
+        f1, f2 = p1.f, p2.f
+        ddepth = max(I1, I2) + max(f1, f2)
+        n_depth = max(self.n_depth, other.n_depth) + 1
+        depth = max(self.depth, other.depth) + ddepth
+        return Variable(precision, ancestors, 'add', const, depth=depth, n_depth=n_depth)
 
     @singledispatchmethod
     def __mul__(self, other) -> 'Variable|float|int':
@@ -230,13 +237,13 @@ def __rmul__(self, other):
     def __neg__(self) -> 'Variable':
         if self.operation == 'neg':
             return self.ancestors[0]
-        return Variable(-self.precision, (self,), 'neg', depth=self.depth)
+        return Variable(-self.precision, (self,), 'neg', depth=self.depth + self.precision.b, n_depth=self.n_depth + 1)
 
     def __sub__(self, other) -> 'Variable':
-        if not isinstance(other, Variable):
-            return self + (-other)
-        depth = max(self.depth, other.depth) + 1
-        return Variable(self.precision - other.precision, (self, other), 'sub', depth=depth)
+        # if not isinstance(other, Variable):
+        return self + (-other)
+        # depth = max(self.depth, other.depth) + 1
+        # return Variable(self.precision - other.precision, (self, other), 'sub', depth=depth)
 
     def __rsub__(self, other) -> 'Variable':
         return -self + other