User-defined data type and rank constraints (#72)

* feat: support rank and dtype constrol * feat+fix: patch requires * fix(try): use MANIFEST.in to robustify package data * feat: impl include and exclude by op.name() * feat: patch extra requires via cli * refact: test mgen.patch_requires and allow it be list or single * fix: check ListConfig
ise-uiuc · Jan 13, 2023 · 597669d · 597669d
1 parent 4fc796c
commit 597669d
Show file tree

Hide file tree

Showing 15 changed files with 215 additions and 92 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -29,6 +29,7 @@ jobs:
           yes | python nnsmith/cli/model_gen.py debug.viz=true model.type=torch mgen.method=symbolic
           yes | python nnsmith/cli/model_gen.py debug.viz=true model.type=torch mgen.method=symbolic-cinit
           yes | python nnsmith/cli/model_gen.py debug.viz=true model.type=torch mgen.method=concolic
+          yes | python nnsmith/cli/model_gen.py model.type=torch mgen.method=symbolic-cinit mgen.rank_choices="[4]" mgen.dtype_choices="[f32]" mgen.include="[core.NCHWConv2d, core.ReLU]" mgen.patch_requires=./tests/mock/requires_patch.py
       - name: Test ONNX + ONNXRuntime
         run: |
           yes | python nnsmith/cli/model_gen.py model.type=onnx mgen.method=symbolic

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,2 @@
+# include yaml files under nnsmith/config/**/*.yaml
+include nnsmith/config/**/*.yaml
diff --git a/doc/cli.md b/doc/cli.md
@@ -76,6 +76,43 @@ nnsmith.fuzz fuzz.time=30s model.type=onnx backend.type=tvm fuzz.root=fuzz_repor
 # Bug reports are stored in `./fuzz_report`.
 ```
 
+## Limit operator types, ranks and data types
+
+To limit:
+- rank only to be 4 (needed by Conv2d);
+- data type only to be float32;
+- only include Conv2d and ReLU.
+
+```shell
+yes | python nnsmith/cli/model_gen.py model.type=torch mgen.method=symbolic-cinit \
+                                                       mgen.rank_choices="[4]"    \
+                                                       mgen.dtype_choices="[f32]" \
+                                                       mgen.include="[core.NCHWConv2d, core.ReLU]" \
+                                                       debug.viz=true
+```
+
+## Add extra constraints
+
+```shell
+# Create patch file as `patch.py`
+echo 'from nnsmith.abstract.arith import nnsmith_lt
+from nnsmith.abstract.extension import patch_requires
+
+
+@patch_requires("global", "core.NCHWConv2d")
+def limit_conv2d(self, _):
+    # let the kernels to be > 3
+    return [nnsmith_lt(3, self.kernel_h_size), nnsmith_lt(3, self.kernel_w_size)]
+' > patch.py
+# Apply the patch with `mgen.patch_requires=./tests/mock/requires_patch.py` (can also be a list of paths)
+yes | python nnsmith/cli/model_gen.py model.type=torch mgen.method=symbolic-cinit \
+                                                       mgen.rank_choices="[4]"    \
+                                                       mgen.dtype_choices="[f32]" \
+                                                       mgen.include="[core.NCHWConv2d, core.ReLU]" \
+                                                       mgen.patch_requires=./tests/mock/requires_patch.py \
+                                                       debug.viz=true
+```
+
 ## Misc
 
 TensorFlow logging can be very noisy. Use `TF_CPP_MIN_LOG_LEVEL=3` as environmental variable to depress that.
diff --git a/nnsmith/abstract/dtype.py b/nnsmith/abstract/dtype.py
@@ -175,6 +175,7 @@ def sizeof(self) -> int:
 # "DTYPE_GEN_ALL" is surely a subset of all types but it is
 # used to conservatively to avoid unsupported data types while
 # applying nnsmith to various frameworks.
+DTYPE_ALL = [dt for dt in DType]
 DTYPE_GEN_ALL = [
     DType.float32,
     DType.float64,

diff --git a/nnsmith/abstract/extension.py b/nnsmith/abstract/extension.py
@@ -1,43 +1,29 @@
-import functools
-import types
-from typing import List
+from typing import List, Optional, Type
 
-from nnsmith.abstract.op import AbsOpBase
-from nnsmith.abstract.tensor import AbsTensor
+REQUIRES_PATCH = {}
+ACTIVATED_PATCH = {}
 
-BACKEND_REQUIRES = {}
 
-
-def copy_requires(f):
-    g = types.FunctionType(
-        f.__code__, f.__globals__, name=f.__name__, closure=f.__closure__
-    )
-    return functools.update_wrapper(g, f)
-
-
-class rewrite_requires:
+class patch_requires:
     def __init__(self, tag: str, opname: str):
         self.tag = tag
         self.opname = opname
 
     def __call__(self, f):
-        BACKEND_REQUIRES.setdefault(self.tag, {}).setdefault(self.opname, f)
+        REQUIRES_PATCH.setdefault(self.tag, {}).setdefault(self.opname, []).append(f)
         return f
 
 
-class patch_requires:
-    def __init__(self, tag: str, opname: str):
-        self.tag = tag
-        self.opname = opname
-        self.prev_fn = None
-
-    def __call__(self, f):
-        def patch_with_prev(op: AbsOpBase, itensors: List[AbsTensor]):
-            if self.prev_fn is None:
-                self.prev_fn = copy_requires(op.requires)
-            return f(op, itensors) + self.prev_fn(op, itensors)
+def activate_ext(
+    opset: List[Type["AbsOpBase"]], factory: Optional["BackendFactory"] = None
+):
+    for op in opset:
+        if "global" in REQUIRES_PATCH:
+            ACTIVATED_PATCH.setdefault(op.name(), []).extend(
+                REQUIRES_PATCH["global"].get(op.name(), [])
+            )
 
-        BACKEND_REQUIRES.setdefault(self.tag, {}).setdefault(
-            self.opname, patch_with_prev
-        )
-        return patch_with_prev
+        if factory is not None and factory.system_name in REQUIRES_PATCH:
+            ACTIVATED_PATCH.setdefault(op.name(), []).extend(
+                REQUIRES_PATCH[factory.system_name].get(op.name(), [])
+            )
diff --git a/nnsmith/abstract/op.py b/nnsmith/abstract/op.py
@@ -16,6 +16,7 @@
     DTYPE_GEN_NON_BOOL,
     DType,
 )
+from nnsmith.abstract.extension import ACTIVATED_PATCH
 from nnsmith.abstract.tensor import AbsTensor
 from nnsmith.error import ConstraintCheck, SanityCheck
 
@@ -347,7 +348,10 @@ def deduct_inp_ranks_and_dtype(
 
     @check_require_fn  # Public API.
     def checked_requires(self, input_shapes):
-        return self.requires(input_shapes)
+        extra = []
+        for f in ACTIVATED_PATCH.get(self.name(), []):
+            extra.extend(f(self, input_shapes))
+        return self.requires(input_shapes) + extra
 
     def n_floats(self, input_shapes: List[AbsTensor]) -> z3.ExprRef:
         return reduce(nnsmith_add, [i.nelement() for i in self.output_like])

diff --git a/nnsmith/backends/factory.py b/nnsmith/backends/factory.py
@@ -2,13 +2,11 @@
 import sys
 import traceback
 from abc import ABC, abstractmethod
-from typing import Callable, Dict, List, Optional, Type, Union
+from typing import Callable, Dict, List, Optional, Union
 
 import numpy as np
 
 from nnsmith.abstract.dtype import DType
-from nnsmith.abstract.extension import BACKEND_REQUIRES
-from nnsmith.abstract.op import AbsOpBase
 from nnsmith.abstract.tensor import AbsTensor
 from nnsmith.difftest import assert_allclose
 from nnsmith.error import InternalError
@@ -370,11 +368,3 @@ def init(name, target="cpu", optmax=True, **kwargs):
             )
         else:
             raise ValueError(f"unknown backend: {name}")
-
-    def add_constraints(self, op_types: List[Type[AbsOpBase]]) -> List[Type[AbsOpBase]]:
-
-        for optype in op_types:
-            if optype.name() in BACKEND_REQUIRES[self.system_name]:
-                optype.requires = BACKEND_REQUIRES[self.system_name][optype.name()]
-
-        return op_types
diff --git a/nnsmith/cli/fuzz.py b/nnsmith/cli/fuzz.py
@@ -10,6 +10,7 @@
 import hydra
 from omegaconf import DictConfig
 
+from nnsmith.abstract.extension import activate_ext
 from nnsmith.backends.factory import BackendFactory
 from nnsmith.cli.model_exec import verify_testcase
 from nnsmith.error import InternalError
@@ -19,7 +20,13 @@
 from nnsmith.macro import NNSMITH_BUG_PATTERN_TOKEN
 from nnsmith.materialize import Model, TestCase
 from nnsmith.narrow_spec import auto_opset
-from nnsmith.util import mkdir, parse_timestr, set_seed
+from nnsmith.util import (
+    hijack_patch_requires,
+    mkdir,
+    op_filter,
+    parse_timestr,
+    set_seed,
+)
 
 
 class StatusCollect:
@@ -130,10 +137,15 @@ def __init__(
             model_cfg["type"], backend_target=cfg["backend"]["target"]
         )
         self.ModelType.add_seed_setter()
-        self.opset = auto_opset(
-            self.ModelType, self.factory, vulops=cfg["mgen"]["vulops"]
+        self.opset = op_filter(
+            auto_opset(self.ModelType, self.factory, vulops=cfg["mgen"]["vulops"]),
+            cfg["mgen"]["include"],
+            cfg["mgen"]["exclude"],
         )
 
+        hijack_patch_requires(cfg["mgen"]["patch_requires"])
+        activate_ext(opset=self.opset, factory=self.factory)
+
         seed = cfg["fuzz"]["seed"] or random.getrandbits(32)
         set_seed(seed)
 
@@ -163,6 +175,8 @@ def make_testcase(self, seed) -> TestCase:
             max_elem_per_tensor=mgen_cfg["max_elem_per_tensor"],
             max_nodes=mgen_cfg["max_nodes"],
             timeout_ms=mgen_cfg["timeout_ms"],
+            rank_choices=mgen_cfg["rank_choices"],
+            dtype_choices=mgen_cfg["dtype_choices"],
         )
 
         ir = gen.make_concrete()

diff --git a/nnsmith/cli/model_gen.py b/nnsmith/cli/model_gen.py
@@ -6,12 +6,13 @@
 import hydra
 from omegaconf import DictConfig
 
+from nnsmith.abstract.extension import activate_ext
 from nnsmith.backends.factory import BackendFactory
 from nnsmith.graph_gen import SymbolicGen, model_gen, viz
 from nnsmith.logging import MGEN_LOG
 from nnsmith.materialize import Model, TestCase
 from nnsmith.narrow_spec import auto_opset
-from nnsmith.util import mkdir
+from nnsmith.util import hijack_patch_requires, mkdir, op_filter
 
 
 @hydra.main(version_base=None, config_path="../config", config_name="main")
@@ -40,6 +41,9 @@ def main(cfg: DictConfig):
 
     # GENERATION
     opset = auto_opset(ModelType, factory, vulops=mgen_cfg["vulops"])
+    opset = op_filter(opset, mgen_cfg["include"], mgen_cfg["exclude"])
+    hijack_patch_requires(mgen_cfg["patch_requires"])
+    activate_ext(opset=opset, factory=factory)
 
     tgen_begin = time.time()
     gen = model_gen(
@@ -49,6 +53,8 @@ def main(cfg: DictConfig):
         max_elem_per_tensor=mgen_cfg["max_elem_per_tensor"],
         max_nodes=mgen_cfg["max_nodes"],
         timeout_ms=mgen_cfg["timeout_ms"],
+        rank_choices=mgen_cfg["rank_choices"],
+        dtype_choices=mgen_cfg["dtype_choices"],
     )
     tgen = time.time() - tgen_begin
 
@@ -64,9 +70,6 @@ def main(cfg: DictConfig):
     tmat_begin = time.time()
     ir = gen.make_concrete()
 
-    if MGEN_LOG.getEffectiveLevel() <= logging.DEBUG:
-        ir.debug()
-
     MGEN_LOG.info(
         f"Generated DNN has {ir.n_var()} variables and {ir.n_compute_inst()} operators."
     )

diff --git a/nnsmith/config/main.yaml b/nnsmith/config/main.yaml
@@ -1,26 +1,3 @@
-topset: null # All instances will be a subset of `topset`;
-# >> Example:
-# topset:
-#   core.MaxPool2d:
-#     in_dtypes: [["f32"], ["f64"]]
-#     out_dtypes: [["f32"], ["f64"]]
-#   core.Where:
-#     in_dtypes: [["bool", "f32", "f32"]]
-#     out_dtypes: [["f32"]]
-
-exclude: null
-# >> Example:
-# exclude:
-#   - core.MaxPool2d:
-#     in_dtypes: [["f32"], ["f64"]]
-#     out_dtypes: [["f32"], ["f64"]]
-#   - core.Where:
-#     in_dtypes: [["bool", "f32", "f32"]]
-#     out_dtypes: [["f32"]]
-
-topset_from_file: null # Path that contains a YAML file that contains a topset domain as above
-
-# model gen config
 model:
   type: null
   path: "???" # can be multiple files tho.
@@ -33,6 +10,11 @@ mgen: # model gen.
   save: "nnsmith_output"
   seed: null
   max_elem_per_tensor: 65536 # 2^16
+  rank_choices: null # 0 ~ __MAX_RANK__
+  dtype_choices: null # 0 ~ __MAX_DTYPE__
+  include: null # ops to include; example mgen.include="[core.NCHWConv2d, core.ReLU]"
+  exclude: null # ops to exclude;
+  patch_requires: [] # files that with @patch_requires
 
 # backend config
 backend:

diff --git a/nnsmith/graph_gen.py b/nnsmith/graph_gen.py
@@ -39,6 +39,8 @@ def __init__(
         forward_prob=None,
         concr_ph_dim_rng=(1, 64),
         max_elem_per_tensor=2**16,
+        rank_choices=None,
+        dtype_choices=None,
     ):
         assert len(opset) > 0, "opset must not be empty"
         if seed is not None:
@@ -55,9 +57,20 @@ def __init__(
         self.forward_prob = 0.5 if forward_prob is None else forward_prob
         self.concr_ph_dim_rng = concr_ph_dim_rng
         self.max_elem_per_tensor = max_elem_per_tensor
+        self.rank_choices = rank_choices if rank_choices else rank_all()
+        self.dtype_choices = (
+            [
+                dt if isinstance(dt, DType) else DType.from_str(dt)
+                for dt in dtype_choices
+            ]
+            if dtype_choices
+            else DTYPE_ALL
+        )
+        assert len(self.dtype_choices) > 0, "dtype_choices must not be empty"
+        assert len(self.rank_choices) > 0, "rank_choices must not be empty"
 
     def random_rank(self):
-        return random.choice(rank_all())
+        return random.choice(self.rank_choices)
 
     def tensor_type_constraints(
         self, atensor: AbsTensor
@@ -74,7 +87,8 @@ def make_symbolic_placeholder(self, rank, dtype=None) -> Placeholder:
         )
         ph = Placeholder(
             AbsTensor(
-                shape=syms, dtype=dtype if dtype is not None else self.random_dtype()
+                shape=syms,
+                dtype=dtype if dtype is not None else self.random_dtype_gen(),
             )
         )
         self.monotonic_placeholder_id += 1
@@ -97,19 +111,27 @@ def make_random_concrete_placeholder(self, rank, dtype=None):
         ph = Placeholder(
             AbsTensor(
                 shape=shape,
-                dtype=dtype if dtype is not None else self.random_dtype(),
+                dtype=dtype if dtype is not None else self.random_dtype_gen(),
             )
         )
         return ph
 
-    def random_dtype(self):
+    def random_dtype_gen(self):
         # more floats than ints.
-        wts = [1] * len(DTYPE_GEN_ALL)
-        for i in DTYPE_GEN_FLOATS:
-            wts[DTYPE_GEN_ALL.index(i)] = 4
-        for i in DTYPE_GEN_INTS:
-            wts[DTYPE_GEN_ALL.index(i)] = 1
-        return random.choices(DTYPE_GEN_ALL, weights=wts)[0]
+        # ~ in DTYPE_GEN_ALL and in self.dtype_choices
+        dtypes = [dt for dt in DTYPE_GEN_ALL if dt in self.dtype_choices]
+        assert (
+            len(dtypes) > 0
+        ), "Empty INTERSECT(DTYPE_GEN_ALL, dtype_choices). Please relax dtype_choices."
+
+        wts = [1] * len(dtypes)
+        for dt in DTYPE_GEN_FLOATS:
+            if dt in dtypes:
+                wts[DTYPE_GEN_ALL.index(dt)] = 4
+        for dt in DTYPE_GEN_INTS:
+            if dt in dtypes:
+                wts[DTYPE_GEN_ALL.index(dt)] = 1
+        return random.choices(dtypes, weights=wts)[0]
 
     def new_sym(self, name):
         return z3.Int(name)
@@ -560,6 +582,7 @@ def try_occupy_placeholder(self, node: AbsOpBase, phvars: List[str]) -> bool:
         return True
 
     def make_concrete(self) -> GraphIR:
+        SanityCheck.gt(len(self.ir.insts), 0, "Empty graph!")
         SanityCheck.not_none(self.last_solution, "Run check_sat first!")
         self.ir.concretize(self.last_solution)
         return self.ir
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# include yaml files under nnsmith/config/*/.yaml
		include nnsmith/config/*/.yaml