diff --git a/hls4ml/optimization/fused_dotp/dotp_unroll.py b/hls4ml/optimization/fused_dotp/dotp_unroll.py index e0830f17c..c99283e68 100644 --- a/hls4ml/optimization/fused_dotp/dotp_unroll.py +++ b/hls4ml/optimization/fused_dotp/dotp_unroll.py @@ -234,7 +234,7 @@ def nadd_max(n, w_c): return int(arr[idx]), int(idx + 1) -def _compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False): +def _compile_dense(kernel: np.ndarray, inp: np.ndarray): "Compile a matmul operation with MAC Tree" ch_in, ch_out = kernel.shape assert ch_out == 1, 'Only single output channel is supported for each unrolled operation' @@ -246,7 +246,7 @@ def _compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False): inp = inp[np.arange(len(inp)), signs] kernel = np.abs(kernel) # ============================================================== - if minmal_latency: + if _global_config.minimal_latency_compile: return [_min_latency_compile_dense(kernel, inp)] r: list[float | Variable | list[Variable]] = np.empty((ch_out, 0), dtype=object).tolist() @@ -297,18 +297,18 @@ def _compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False): r[i] = x if kernel2 is not None: - return [r] + _compile_dense(kernel2, inp, minmal_latency) + return [r] + _compile_dense(kernel2, inp) else: return [r] -def compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False): +def compile_dense(kernel: np.ndarray, inp: np.ndarray): out = [] if not _global_config.use_ternary and np.any(kernel): inp = np.stack([-inp, inp], axis=1) for _kernel in kernel.T: # ch_in, 1 - r = _compile_dense(_kernel[:, None], inp, minmal_latency=minmal_latency) + r = _compile_dense(_kernel[:, None], inp) r = balanced_reduction([x[0] for x in r]) out.append(r) return np.array(out).T @@ -324,4 +324,4 @@ def compile_conv(kernel: np.ndarray, inp: list | np.ndarray, minimal_latency=Non ch_in = int(np.prod(_ch_in)) inp = np.reshape(inp, ch_in) kernel = np.reshape(kernel, (ch_in, ch_out)) - return compile_dense(kernel, inp, minmal_latency=minimal_latency) + return compile_dense(kernel, inp) diff --git a/hls4ml/optimization/fused_dotp/resoure_surrogate.py b/hls4ml/optimization/fused_dotp/resoure_surrogate.py index 4149695bf..99d90cba2 100644 --- a/hls4ml/optimization/fused_dotp/resoure_surrogate.py +++ b/hls4ml/optimization/fused_dotp/resoure_surrogate.py @@ -125,7 +125,9 @@ def trace(self, r: list | np.ndarray, name: str, pf: int = 1): return if len(arr) > 0: depth = max(v.depth for v in arr if isinstance(v, Variable)) + n_depth = sum(v.n_depth for v in arr if isinstance(v, Variable)) params['depth'] = depth + params['n_depth'] = n_depth params['pf'] = pf self.layers[name] = params diff --git a/hls4ml/optimization/fused_dotp/symbolic_variable.py b/hls4ml/optimization/fused_dotp/symbolic_variable.py index b57f2f7e7..a7e0f0dad 100644 --- a/hls4ml/optimization/fused_dotp/symbolic_variable.py +++ b/hls4ml/optimization/fused_dotp/symbolic_variable.py @@ -35,6 +35,7 @@ def __init__( const: float | int = 0, id: str | None = None, depth=0, + n_depth=0, ): """ precision: precision of the variable. If it is a number, the Variable will define a constant. @@ -64,6 +65,7 @@ def __init__( self.const = const self.children: tuple[Variable, ...] = () self.depth = depth + self.n_depth = n_depth self._proper_precision = False @@ -123,7 +125,7 @@ def __add__(self, other) -> 'Variable': const = other precision = self.precision + other - return Variable(precision, ancestors, operation, const, depth=self.depth + 1) + return Variable(precision, ancestors, operation, const, n_depth=self.n_depth, depth=self.depth) @__add__.register(VariableBase) def _(self, other: 'Variable'): @@ -151,8 +153,13 @@ def _(self, other: 'Variable'): ancestors = (self, other) const = 0 - depth = max(self.depth, other.depth) + 1 - return Variable(precision, ancestors, 'add', const, depth=depth) + p1, p2 = self.precision, other.precision + I1, I2 = p1.I, p2.I + f1, f2 = p1.f, p2.f + ddepth = max(I1, I2) + max(f1, f2) + n_depth = max(self.n_depth, other.n_depth) + 1 + depth = max(self.depth, other.depth) + ddepth + return Variable(precision, ancestors, 'add', const, depth=depth, n_depth=n_depth) @singledispatchmethod def __mul__(self, other) -> 'Variable|float|int': @@ -230,13 +237,13 @@ def __rmul__(self, other): def __neg__(self) -> 'Variable': if self.operation == 'neg': return self.ancestors[0] - return Variable(-self.precision, (self,), 'neg', depth=self.depth) + return Variable(-self.precision, (self,), 'neg', depth=self.depth + self.precision.b, n_depth=self.n_depth + 1) def __sub__(self, other) -> 'Variable': - if not isinstance(other, Variable): - return self + (-other) - depth = max(self.depth, other.depth) + 1 - return Variable(self.precision - other.precision, (self, other), 'sub', depth=depth) + # if not isinstance(other, Variable): + return self + (-other) + # depth = max(self.depth, other.depth) + 1 + # return Variable(self.precision - other.precision, (self, other), 'sub', depth=depth) def __rsub__(self, other) -> 'Variable': return -self + other