Skip to content

Commit

Permalink
misc
Browse files Browse the repository at this point in the history
  • Loading branch information
calad0i committed Oct 7, 2024
1 parent df80842 commit ec233df
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 14 deletions.
12 changes: 6 additions & 6 deletions hls4ml/optimization/fused_dotp/dotp_unroll.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def nadd_max(n, w_c):
return int(arr[idx]), int(idx + 1)


def _compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False):
def _compile_dense(kernel: np.ndarray, inp: np.ndarray):
"Compile a matmul operation with MAC Tree"
ch_in, ch_out = kernel.shape
assert ch_out == 1, 'Only single output channel is supported for each unrolled operation'
Expand All @@ -246,7 +246,7 @@ def _compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False):
inp = inp[np.arange(len(inp)), signs]
kernel = np.abs(kernel)
# ==============================================================
if minmal_latency:
if _global_config.minimal_latency_compile:
return [_min_latency_compile_dense(kernel, inp)]

r: list[float | Variable | list[Variable]] = np.empty((ch_out, 0), dtype=object).tolist()
Expand Down Expand Up @@ -297,18 +297,18 @@ def _compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False):
r[i] = x

if kernel2 is not None:
return [r] + _compile_dense(kernel2, inp, minmal_latency)
return [r] + _compile_dense(kernel2, inp)
else:
return [r]


def compile_dense(kernel: np.ndarray, inp: np.ndarray, minmal_latency=False):
def compile_dense(kernel: np.ndarray, inp: np.ndarray):
out = []

if not _global_config.use_ternary and np.any(kernel):
inp = np.stack([-inp, inp], axis=1)
for _kernel in kernel.T: # ch_in, 1
r = _compile_dense(_kernel[:, None], inp, minmal_latency=minmal_latency)
r = _compile_dense(_kernel[:, None], inp)
r = balanced_reduction([x[0] for x in r])
out.append(r)
return np.array(out).T
Expand All @@ -324,4 +324,4 @@ def compile_conv(kernel: np.ndarray, inp: list | np.ndarray, minimal_latency=Non
ch_in = int(np.prod(_ch_in))
inp = np.reshape(inp, ch_in)
kernel = np.reshape(kernel, (ch_in, ch_out))
return compile_dense(kernel, inp, minmal_latency=minimal_latency)
return compile_dense(kernel, inp)
2 changes: 2 additions & 0 deletions hls4ml/optimization/fused_dotp/resoure_surrogate.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ def trace(self, r: list | np.ndarray, name: str, pf: int = 1):
return
if len(arr) > 0:
depth = max(v.depth for v in arr if isinstance(v, Variable))
n_depth = sum(v.n_depth for v in arr if isinstance(v, Variable))
params['depth'] = depth
params['n_depth'] = n_depth
params['pf'] = pf
self.layers[name] = params

Expand Down
23 changes: 15 additions & 8 deletions hls4ml/optimization/fused_dotp/symbolic_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(
const: float | int = 0,
id: str | None = None,
depth=0,
n_depth=0,
):
"""
precision: precision of the variable. If it is a number, the Variable will define a constant.
Expand Down Expand Up @@ -64,6 +65,7 @@ def __init__(
self.const = const
self.children: tuple[Variable, ...] = ()
self.depth = depth
self.n_depth = n_depth

self._proper_precision = False

Expand Down Expand Up @@ -123,7 +125,7 @@ def __add__(self, other) -> 'Variable':
const = other

precision = self.precision + other
return Variable(precision, ancestors, operation, const, depth=self.depth + 1)
return Variable(precision, ancestors, operation, const, n_depth=self.n_depth, depth=self.depth)

@__add__.register(VariableBase)
def _(self, other: 'Variable'):
Expand Down Expand Up @@ -151,8 +153,13 @@ def _(self, other: 'Variable'):

ancestors = (self, other)
const = 0
depth = max(self.depth, other.depth) + 1
return Variable(precision, ancestors, 'add', const, depth=depth)
p1, p2 = self.precision, other.precision
I1, I2 = p1.I, p2.I
f1, f2 = p1.f, p2.f
ddepth = max(I1, I2) + max(f1, f2)
n_depth = max(self.n_depth, other.n_depth) + 1
depth = max(self.depth, other.depth) + ddepth
return Variable(precision, ancestors, 'add', const, depth=depth, n_depth=n_depth)

@singledispatchmethod
def __mul__(self, other) -> 'Variable|float|int':
Expand Down Expand Up @@ -230,13 +237,13 @@ def __rmul__(self, other):
def __neg__(self) -> 'Variable':
if self.operation == 'neg':
return self.ancestors[0]
return Variable(-self.precision, (self,), 'neg', depth=self.depth)
return Variable(-self.precision, (self,), 'neg', depth=self.depth + self.precision.b, n_depth=self.n_depth + 1)

def __sub__(self, other) -> 'Variable':
if not isinstance(other, Variable):
return self + (-other)
depth = max(self.depth, other.depth) + 1
return Variable(self.precision - other.precision, (self, other), 'sub', depth=depth)
# if not isinstance(other, Variable):
return self + (-other)
# depth = max(self.depth, other.depth) + 1
# return Variable(self.precision - other.precision, (self, other), 'sub', depth=depth)

def __rsub__(self, other) -> 'Variable':
return -self + other
Expand Down

0 comments on commit ec233df

Please sign in to comment.