Skip to content

Commit

Permalink
Update pyright (#952)
Browse files Browse the repository at this point in the history
* exclude build from pyright config (instead of setting a root directory)
* update pyright version, and fix or ignore new warnings
* add type ascription to use of openml data in relational tests
  • Loading branch information
shinnar authored Jan 18, 2022
1 parent 9578a4f commit ce5c1e6
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 47 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,4 @@ repos:
language: node
pass_filenames: false
types: [python]
additional_dependencies: ['[email protected].180']
additional_dependencies: ['[email protected].210']
13 changes: 8 additions & 5 deletions lale/operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -3378,8 +3378,9 @@ def make_operator(
else:
hps: Dict[str, Any] = {}
frozen: Optional[List[str]] = None
if hasattr(impl, "get_params"):
hps = impl.get_params(deep=False)
impl_get_params = getattr(impl, "get_params", None)
if impl_get_params is not None:
hps = impl_get_params(deep=False)
frozen = list(hps.keys())

if hasattr(impl, "fit"):
Expand Down Expand Up @@ -3588,7 +3589,9 @@ def __init__(
self._steps = steps
if _lale_preds:
# TODO: improve typing situation
if isinstance(list(_lale_preds.keys())[0], int):
keys: Iterable[Any] = _lale_preds.keys()
first_key = next(iter(keys))
if isinstance(first_key, int):
self._preds = self._indices_to_preds(steps, _lale_preds) # type: ignore
self._cached_preds = _lale_preds # type: ignore
else:
Expand Down Expand Up @@ -3631,7 +3634,7 @@ def __init__(
]
sink_nodes = tstep._find_sink_nodes()
# Now replace the edges to and from the inner pipeline to to and from source and sink nodes respectively
new_edges = tstep.edges()
new_edges: List[Tuple[OpType, OpType]] = tstep.edges()
# list comprehension at the cost of iterating edges thrice
new_edges.extend(
[
Expand Down Expand Up @@ -3661,7 +3664,7 @@ def __init__(
self._steps.append(step)
self._preds = {step: [] for step in self._steps}
for (src, dst) in edges:
self._preds[dst].append(src)
self._preds[dst].append(src) # type: ignore
if not ordered:
self.__sort_topologically()
assert self.__is_in_topological_order()
Expand Down
5 changes: 4 additions & 1 deletion lale/schema_simplifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,10 @@ class set_with_str_for_keys(Generic[VV]):

def __init__(self, elems: Union[Dict[str, VV], Iterable[VV]]):
if isinstance(elems, dict):
self._elems = elems
# The type hint is needed since technically a Dict[str, something_else]
# is an Iterable[str], which could match the latter type,
# but pass this type guard
self._elems = elems # type: ignore
else:
self._elems = {str(v): v for v in elems}

Expand Down
6 changes: 2 additions & 4 deletions pyrightconfig.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
{
"executionEnvironments": [
{"root": "lale"}
]
}
"exclude": ["build"]
}
11 changes: 8 additions & 3 deletions test/test_core_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,11 @@ def test_multiple_calls_with_classes(self):
)
_ = new_trained_pipeline.predict(self.X_test)

def _last_impl_has(self, op, attr):
last = op.get_last()
assert last is not None
return hasattr(last._impl, attr)

def test_second_call_without_classes(self):
trainable_pipeline = StandardScaler()
trained_pipeline = trainable_pipeline.fit(self.X_train, self.y_train)
Expand All @@ -1039,7 +1044,7 @@ def test_second_call_without_classes(self):
self.X_train, self.y_train, classes=[0, 1, 2]
)
# Once SGDClassifier is trained, it has a classes_ attribute.
self.assertTrue(hasattr(new_trained_pipeline.get_last()._impl, "classes_"))
self.assertTrue(self._last_impl_has(new_trained_pipeline, "classes_"))
new_trained_pipeline = new_trained_pipeline.partial_fit(
self.X_test, self.y_test
)
Expand All @@ -1053,7 +1058,7 @@ def test_second_call_with_different_classes(self):
self.X_train, self.y_train, classes=[0, 1, 2]
)
# Once SGDClassifier is trained, it has a classes_ attribute.
self.assertTrue(hasattr(new_trained_pipeline.get_last()._impl, "classes_"))
self.assertTrue(self._last_impl_has(new_trained_pipeline, "classes_"))
subset_labels = self.y_test[np.where(self.y_test != 0)]
subset_X = self.X_test[0 : len(subset_labels)]
new_trained_pipeline = new_trained_pipeline.partial_fit(subset_X, subset_labels)
Expand All @@ -1065,7 +1070,7 @@ def test_second_call_with_different_classes_trainable(self):
new_pipeline = trained_pipeline.freeze_trained() >> SGDClassifier()
new_pipeline.partial_fit(self.X_train, self.y_train, classes=[0, 1, 2])
# Once SGDClassifier is trained, it has a classes_ attribute.
self.assertTrue(hasattr(new_pipeline._trained.get_last()._impl, "classes_"))
self.assertTrue(self._last_impl_has(new_pipeline._trained, "classes_"))
subset_labels = self.y_test[np.where(self.y_test != 0)]
subset_X = self.X_test[0 : len(subset_labels)]
new_trained_pipeline = new_pipeline.partial_fit(subset_X, subset_labels)
Expand Down
88 changes: 55 additions & 33 deletions test/test_relational_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,15 @@ def _check_trained(self, op1, op2, msg):
for i in range(len(op1.categories_)):
self.assertEqual(list(op1.categories_[i]), list(op2.categories_[i]), msg)

def _check_last_trained(self, op1, op2, msg):
last1 = op1.get_last().impl
last2 = op2.get_last().impl

assert last1 is not None
assert last2 is not None

self._check_trained(last1.impl, last2.impl, msg)

def test_fit(self):
prefix = Scan(table=it.go_daily_sales) >> Map(
columns={"retailer": it["Retailer code"], "method": it["Order method code"]}
Expand All @@ -300,9 +309,7 @@ def test_fit(self):
sk_trained = sk_trainable.fit(self.tgt2gosales["pandas"])
for tgt, datasets in self.tgt2gosales.items():
rasl_trained = rasl_trainable.fit(datasets)
self._check_trained(
sk_trained.get_last().impl, rasl_trained.get_last().impl, tgt
)
self._check_last_trained(sk_trained, rasl_trained, tgt)

def test_partial_fit(self):
prefix = Scan(table=it.go_daily_sales) >> Map(
Expand Down Expand Up @@ -334,9 +341,7 @@ def test_transform(self):
sk_transformed = sk_trained.transform(self.tgt2gosales["pandas"])
for tgt, datasets in self.tgt2gosales.items():
rasl_trained = rasl_trainable.fit(datasets)
self._check_trained(
sk_trained.get_last().impl, rasl_trained.get_last().impl, tgt
)
self._check_last_trained(sk_trained, rasl_trained, tgt)
rasl_transformed = rasl_trained.transform(datasets)
if tgt == "spark":
rasl_transformed = rasl_transformed.toPandas()
Expand Down Expand Up @@ -373,23 +378,38 @@ def test_predict(self):
class TestOneHotEncoder(unittest.TestCase):
@classmethod
def setUpClass(cls):
import typing
from typing import Any, Dict

targets = ["pandas", "spark"]
cls.tgt2creditg = {
tgt: lale.datasets.openml.fetch(
"credit-g",
"classification",
preprocess=False,
astype=tgt,
)
for tgt in targets
}
cls.tgt2creditg = typing.cast(
Dict[str, Any],
{
tgt: lale.datasets.openml.fetch(
"credit-g",
"classification",
preprocess=False,
astype=tgt,
)
for tgt in targets
},
)

def _check_trained(self, op1, op2, msg):
self.assertEqual(list(op1.feature_names_in_), list(op2.feature_names_in_), msg)
self.assertEqual(len(op1.categories_), len(op2.categories_), msg)
for i in range(len(op1.categories_)):
self.assertEqual(list(op1.categories_[i]), list(op2.categories_[i]), msg)

def _check_last_trained(self, op1, op2, msg):
last1 = op1.get_last().impl
last2 = op2.get_last().impl

assert last1 is not None
assert last2 is not None

self._check_trained(last1.impl, last2.impl, msg)

def test_fit(self):
(train_X_pd, _), (_, _) = self.tgt2creditg["pandas"]
cat_columns = categorical()(train_X_pd)
Expand All @@ -400,9 +420,7 @@ def test_fit(self):
for tgt, dataset in self.tgt2creditg.items():
(train_X, train_y), (test_X, test_y) = dataset
rasl_trained = rasl_trainable.fit(train_X)
self._check_trained(
sk_trained.get_last().impl, rasl_trained.get_last().impl, tgt
)
self._check_last_trained(sk_trained, rasl_trained, tgt)

def test_partial_fit(self):
(train_X_pd, _), (_, _) = self.tgt2creditg["pandas"]
Expand All @@ -418,9 +436,9 @@ def test_partial_fit(self):
if tgt == "spark":
data_delta = lale.datasets.pandas2spark(data_delta)
rasl_pipe = rasl_pipe.partial_fit(data_delta)
self._check_trained(
sk_pipe.get_last().impl,
rasl_pipe.get_last().impl,
self._check_last_trained(
sk_pipe,
rasl_pipe,
(tgt, lower, upper),
)

Expand All @@ -435,9 +453,7 @@ def test_transform(self):
for tgt, dataset in self.tgt2creditg.items():
(train_X, train_y), (test_X, test_y) = dataset
rasl_trained = rasl_trainable.fit(train_X)
self._check_trained(
sk_trained.get_last().impl, rasl_trained.get_last().impl, tgt
)
self._check_last_trained(sk_trained, rasl_trained, tgt)
rasl_transformed = rasl_trained.transform(test_X)
if tgt == "spark":
rasl_transformed = rasl_transformed.toPandas()
Expand Down Expand Up @@ -473,16 +489,22 @@ def test_predict(self):
class TestStandardScaler(unittest.TestCase):
@classmethod
def setUpClass(cls):
import typing
from typing import Any, Dict

targets = ["pandas", "spark"]
cls.tgt2creditg = {
tgt: lale.datasets.openml.fetch(
"credit-g",
"classification",
preprocess=True,
astype=tgt,
)
for tgt in targets
}
cls.tgt2creditg = typing.cast(
Dict[str, Any],
{
tgt: lale.datasets.openml.fetch(
"credit-g",
"classification",
preprocess=True,
astype=tgt,
)
for tgt in targets
},
)

def _check_trained(self, op1, op2, msg):
self.assertEqual(list(op1.feature_names_in_), list(op2.feature_names_in_), msg)
Expand Down

0 comments on commit ce5c1e6

Please sign in to comment.