Update pyright (#952)

* exclude build from pyright config (instead of setting a root directory) * update pyright version, and fix or ignore new warnings * add type ascription to use of openml data in relational tests
IBM · Jan 18, 2022 · ce5c1e6 · ce5c1e6
1 parent 9578a4f
commit ce5c1e6
Show file tree

Hide file tree

Showing 6 changed files with 78 additions and 47 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -47,4 +47,4 @@ repos:
         language: node
         pass_filenames: false
         types: [python]
-        additional_dependencies: ['[email protected].180']
+        additional_dependencies: ['[email protected].210']
diff --git a/lale/operators.py b/lale/operators.py
@@ -3378,8 +3378,9 @@ def make_operator(
     else:
         hps: Dict[str, Any] = {}
         frozen: Optional[List[str]] = None
-        if hasattr(impl, "get_params"):
-            hps = impl.get_params(deep=False)
+        impl_get_params = getattr(impl, "get_params", None)
+        if impl_get_params is not None:
+            hps = impl_get_params(deep=False)
             frozen = list(hps.keys())
 
         if hasattr(impl, "fit"):
@@ -3588,7 +3589,9 @@ def __init__(
             self._steps = steps
             if _lale_preds:
                 # TODO: improve typing situation
-                if isinstance(list(_lale_preds.keys())[0], int):
+                keys: Iterable[Any] = _lale_preds.keys()
+                first_key = next(iter(keys))
+                if isinstance(first_key, int):
                     self._preds = self._indices_to_preds(steps, _lale_preds)  # type: ignore
                     self._cached_preds = _lale_preds  # type: ignore
                 else:
@@ -3631,7 +3634,7 @@ def __init__(
                     ]
                     sink_nodes = tstep._find_sink_nodes()
                     # Now replace the edges to and from the inner pipeline to to and from source and sink nodes respectively
-                    new_edges = tstep.edges()
+                    new_edges: List[Tuple[OpType, OpType]] = tstep.edges()
                     # list comprehension at the cost of iterating edges thrice
                     new_edges.extend(
                         [
@@ -3661,7 +3664,7 @@ def __init__(
                     self._steps.append(step)
             self._preds = {step: [] for step in self._steps}
             for (src, dst) in edges:
-                self._preds[dst].append(src)
+                self._preds[dst].append(src)  # type: ignore
             if not ordered:
                 self.__sort_topologically()
             assert self.__is_in_topological_order()

diff --git a/lale/schema_simplifier.py b/lale/schema_simplifier.py
@@ -75,7 +75,10 @@ class set_with_str_for_keys(Generic[VV]):
 
     def __init__(self, elems: Union[Dict[str, VV], Iterable[VV]]):
         if isinstance(elems, dict):
-            self._elems = elems
+            # The type hint is needed since technically a Dict[str, something_else]
+            # is an Iterable[str], which could match the latter type,
+            # but pass this type guard
+            self._elems = elems  # type: ignore
         else:
             self._elems = {str(v): v for v in elems}
 

diff --git a/pyrightconfig.json b/pyrightconfig.json
@@ -1,5 +1,3 @@
 {
-    "executionEnvironments": [
-        {"root": "lale"}
-    ]
-}
+  "exclude": ["build"]
+}
diff --git a/test/test_core_pipeline.py b/test/test_core_pipeline.py
@@ -1031,6 +1031,11 @@ def test_multiple_calls_with_classes(self):
         )
         _ = new_trained_pipeline.predict(self.X_test)
 
+    def _last_impl_has(self, op, attr):
+        last = op.get_last()
+        assert last is not None
+        return hasattr(last._impl, attr)
+
     def test_second_call_without_classes(self):
         trainable_pipeline = StandardScaler()
         trained_pipeline = trainable_pipeline.fit(self.X_train, self.y_train)
@@ -1039,7 +1044,7 @@ def test_second_call_without_classes(self):
             self.X_train, self.y_train, classes=[0, 1, 2]
         )
         # Once SGDClassifier is trained, it has a classes_ attribute.
-        self.assertTrue(hasattr(new_trained_pipeline.get_last()._impl, "classes_"))
+        self.assertTrue(self._last_impl_has(new_trained_pipeline, "classes_"))
         new_trained_pipeline = new_trained_pipeline.partial_fit(
             self.X_test, self.y_test
         )
@@ -1053,7 +1058,7 @@ def test_second_call_with_different_classes(self):
             self.X_train, self.y_train, classes=[0, 1, 2]
         )
         # Once SGDClassifier is trained, it has a classes_ attribute.
-        self.assertTrue(hasattr(new_trained_pipeline.get_last()._impl, "classes_"))
+        self.assertTrue(self._last_impl_has(new_trained_pipeline, "classes_"))
         subset_labels = self.y_test[np.where(self.y_test != 0)]
         subset_X = self.X_test[0 : len(subset_labels)]
         new_trained_pipeline = new_trained_pipeline.partial_fit(subset_X, subset_labels)
@@ -1065,7 +1070,7 @@ def test_second_call_with_different_classes_trainable(self):
         new_pipeline = trained_pipeline.freeze_trained() >> SGDClassifier()
         new_pipeline.partial_fit(self.X_train, self.y_train, classes=[0, 1, 2])
         # Once SGDClassifier is trained, it has a classes_ attribute.
-        self.assertTrue(hasattr(new_pipeline._trained.get_last()._impl, "classes_"))
+        self.assertTrue(self._last_impl_has(new_pipeline._trained, "classes_"))
         subset_labels = self.y_test[np.where(self.y_test != 0)]
         subset_X = self.X_test[0 : len(subset_labels)]
         new_trained_pipeline = new_pipeline.partial_fit(subset_X, subset_labels)

diff --git a/test/test_relational_sklearn.py b/test/test_relational_sklearn.py
@@ -290,6 +290,15 @@ def _check_trained(self, op1, op2, msg):
         for i in range(len(op1.categories_)):
             self.assertEqual(list(op1.categories_[i]), list(op2.categories_[i]), msg)
 
+    def _check_last_trained(self, op1, op2, msg):
+        last1 = op1.get_last().impl
+        last2 = op2.get_last().impl
+
+        assert last1 is not None
+        assert last2 is not None
+
+        self._check_trained(last1.impl, last2.impl, msg)
+
     def test_fit(self):
         prefix = Scan(table=it.go_daily_sales) >> Map(
             columns={"retailer": it["Retailer code"], "method": it["Order method code"]}
@@ -300,9 +309,7 @@ def test_fit(self):
         sk_trained = sk_trainable.fit(self.tgt2gosales["pandas"])
         for tgt, datasets in self.tgt2gosales.items():
             rasl_trained = rasl_trainable.fit(datasets)
-            self._check_trained(
-                sk_trained.get_last().impl, rasl_trained.get_last().impl, tgt
-            )
+            self._check_last_trained(sk_trained, rasl_trained, tgt)
 
     def test_partial_fit(self):
         prefix = Scan(table=it.go_daily_sales) >> Map(
@@ -334,9 +341,7 @@ def test_transform(self):
         sk_transformed = sk_trained.transform(self.tgt2gosales["pandas"])
         for tgt, datasets in self.tgt2gosales.items():
             rasl_trained = rasl_trainable.fit(datasets)
-            self._check_trained(
-                sk_trained.get_last().impl, rasl_trained.get_last().impl, tgt
-            )
+            self._check_last_trained(sk_trained, rasl_trained, tgt)
             rasl_transformed = rasl_trained.transform(datasets)
             if tgt == "spark":
                 rasl_transformed = rasl_transformed.toPandas()
@@ -373,23 +378,38 @@ def test_predict(self):
 class TestOneHotEncoder(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
+        import typing
+        from typing import Any, Dict
+
         targets = ["pandas", "spark"]
-        cls.tgt2creditg = {
-            tgt: lale.datasets.openml.fetch(
-                "credit-g",
-                "classification",
-                preprocess=False,
-                astype=tgt,
-            )
-            for tgt in targets
-        }
+        cls.tgt2creditg = typing.cast(
+            Dict[str, Any],
+            {
+                tgt: lale.datasets.openml.fetch(
+                    "credit-g",
+                    "classification",
+                    preprocess=False,
+                    astype=tgt,
+                )
+                for tgt in targets
+            },
+        )
 
     def _check_trained(self, op1, op2, msg):
         self.assertEqual(list(op1.feature_names_in_), list(op2.feature_names_in_), msg)
         self.assertEqual(len(op1.categories_), len(op2.categories_), msg)
         for i in range(len(op1.categories_)):
             self.assertEqual(list(op1.categories_[i]), list(op2.categories_[i]), msg)
 
+    def _check_last_trained(self, op1, op2, msg):
+        last1 = op1.get_last().impl
+        last2 = op2.get_last().impl
+
+        assert last1 is not None
+        assert last2 is not None
+
+        self._check_trained(last1.impl, last2.impl, msg)
+
     def test_fit(self):
         (train_X_pd, _), (_, _) = self.tgt2creditg["pandas"]
         cat_columns = categorical()(train_X_pd)
@@ -400,9 +420,7 @@ def test_fit(self):
         for tgt, dataset in self.tgt2creditg.items():
             (train_X, train_y), (test_X, test_y) = dataset
             rasl_trained = rasl_trainable.fit(train_X)
-            self._check_trained(
-                sk_trained.get_last().impl, rasl_trained.get_last().impl, tgt
-            )
+            self._check_last_trained(sk_trained, rasl_trained, tgt)
 
     def test_partial_fit(self):
         (train_X_pd, _), (_, _) = self.tgt2creditg["pandas"]
@@ -418,9 +436,9 @@ def test_partial_fit(self):
                 if tgt == "spark":
                     data_delta = lale.datasets.pandas2spark(data_delta)
                 rasl_pipe = rasl_pipe.partial_fit(data_delta)
-                self._check_trained(
-                    sk_pipe.get_last().impl,
-                    rasl_pipe.get_last().impl,
+                self._check_last_trained(
+                    sk_pipe,
+                    rasl_pipe,
                     (tgt, lower, upper),
                 )
 
@@ -435,9 +453,7 @@ def test_transform(self):
         for tgt, dataset in self.tgt2creditg.items():
             (train_X, train_y), (test_X, test_y) = dataset
             rasl_trained = rasl_trainable.fit(train_X)
-            self._check_trained(
-                sk_trained.get_last().impl, rasl_trained.get_last().impl, tgt
-            )
+            self._check_last_trained(sk_trained, rasl_trained, tgt)
             rasl_transformed = rasl_trained.transform(test_X)
             if tgt == "spark":
                 rasl_transformed = rasl_transformed.toPandas()
@@ -473,16 +489,22 @@ def test_predict(self):
 class TestStandardScaler(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
+        import typing
+        from typing import Any, Dict
+
         targets = ["pandas", "spark"]
-        cls.tgt2creditg = {
-            tgt: lale.datasets.openml.fetch(
-                "credit-g",
-                "classification",
-                preprocess=True,
-                astype=tgt,
-            )
-            for tgt in targets
-        }
+        cls.tgt2creditg = typing.cast(
+            Dict[str, Any],
+            {
+                tgt: lale.datasets.openml.fetch(
+                    "credit-g",
+                    "classification",
+                    preprocess=True,
+                    astype=tgt,
+                )
+                for tgt in targets
+            },
+        )
 
     def _check_trained(self, op1, op2, msg):
         self.assertEqual(list(op1.feature_names_in_), list(op2.feature_names_in_), msg)