-
Notifications
You must be signed in to change notification settings - Fork 914
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Change how kedro import datasets and raise explicit error when dependencies is missing #3272
Changes from 18 commits
20ef723
8b3d36f
d4bff90
922d353
0df7120
0e475c0
645a2ee
b92c5db
d9ca8d1
6e2a638
96050ac
5d33a29
5116411
5d78809
2450178
10d0d0f
2a4e605
2121fe7
dff2973
2dd537f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -376,25 +376,22 @@ def parse_dataset_definition( | |
if "type" not in config: | ||
raise DatasetError("'type' is missing from dataset catalog configuration") | ||
|
||
class_obj = config.pop("type") | ||
if isinstance(class_obj, str): | ||
if len(class_obj.strip(".")) != len(class_obj): | ||
dataset_type = config.pop("type") | ||
if isinstance(dataset_type, str): | ||
if len(dataset_type.strip(".")) != len(dataset_type): | ||
raise DatasetError( | ||
"'type' class path does not support relative " | ||
"paths or paths ending with a dot." | ||
) | ||
class_paths = (prefix + class_obj for prefix in _DEFAULT_PACKAGES) | ||
class_paths = (prefix + dataset_type for prefix in _DEFAULT_PACKAGES) | ||
|
||
for class_path in class_paths: | ||
tmp = _load_obj(class_path) | ||
if tmp is not None: | ||
class_obj = tmp | ||
break | ||
else: | ||
raise DatasetError( | ||
f"Class '{class_obj}' not found or one of its dependencies " | ||
f"has not been installed." | ||
) | ||
raise DatasetError(f"Class '{dataset_type}' not found, is this a typo?") | ||
|
||
if not issubclass(class_obj, AbstractDataset): | ||
raise DatasetError( | ||
|
@@ -422,8 +419,9 @@ def parse_dataset_definition( | |
return class_obj, config | ||
|
||
|
||
def _load_obj(class_path: str) -> object | None: | ||
def _load_obj(class_path: str) -> Any | None: | ||
mod_path, _, class_name = class_path.rpartition(".") | ||
# Check if the module exists | ||
try: | ||
available_classes = load_obj(f"{mod_path}.__all__") | ||
# ModuleNotFoundError: When `load_obj` can't find `mod_path` (e.g `kedro.io.pandas`) | ||
|
@@ -432,18 +430,16 @@ def _load_obj(class_path: str) -> object | None: | |
# `__all__` attribute -- either because it's a custom or a kedro.io dataset | ||
except (ModuleNotFoundError, AttributeError, ValueError): | ||
available_classes = None | ||
|
||
try: | ||
class_obj = load_obj(class_path) | ||
except (ModuleNotFoundError, ValueError): | ||
return None | ||
except AttributeError as exc: | ||
except (ModuleNotFoundError, ValueError, AttributeError) as exc: | ||
# If it's available, module exist but dependencies are missing | ||
if available_classes and class_name in available_classes: | ||
raise DatasetError( | ||
f"{exc} Please see the documentation on how to " | ||
f"{exc}. Please see the documentation on how to " | ||
f"install relevant dependencies for {class_path}:\n" | ||
f"https://kedro.readthedocs.io/en/stable/" | ||
f"kedro_project_setup/dependencies.html" | ||
f"https://docs.kedro.org/en/stable/kedro_project_setup/" | ||
f"dependencies.html#install-dependencies-related-to-the-data-catalog" | ||
Comment on lines
+439
to
+442
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated the link according to review suggestion |
||
) from exc | ||
return None | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,4 @@ def load_obj(obj_path: str, default_obj_path: str = "") -> Any: | |
obj_path = obj_path_list.pop(0) if len(obj_path_list) > 1 else default_obj_path | ||
obj_name = obj_path_list[0] | ||
module_obj = importlib.import_module(obj_path) | ||
if not hasattr(module_obj, obj_name): | ||
raise AttributeError(f"Object '{obj_name}' cannot be loaded from '{obj_path}'.") | ||
Comment on lines
-26
to
-27
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can now create a |
||
return getattr(module_obj, obj_name) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
generate_timestamp, | ||
get_filepath_str, | ||
get_protocol_and_path, | ||
parse_dataset_definition, | ||
validate_on_forbidden_chars, | ||
) | ||
|
||
|
@@ -265,6 +266,32 @@ def test_validate_forbidden_chars(self, input): | |
with pytest.raises(DatasetError, match=expected_error_message): | ||
validate_on_forbidden_chars(**input) | ||
|
||
def test_dataset_name_typo(self, mocker): | ||
# If the module doesn't exist, it return None instead ModuleNotFoundError | ||
mocker.patch("kedro.io.core.load_obj", return_value=None) | ||
dataset_name = "lAmbDaDaTAsET" | ||
|
||
with pytest.raises( | ||
DatasetError, match=f"Class '{dataset_name}' not found, is this a typo?" | ||
): | ||
parse_dataset_definition({"type": dataset_name}) | ||
|
||
def test_dataset_missing_dependencies(self, mocker): | ||
# If the module is found but import the dataset trigger ModuleNotFoundError | ||
dataset_name = "LambdaDataset" | ||
|
||
def side_effect_function(value): | ||
if "__all__" in value: | ||
return [dataset_name] | ||
else: | ||
raise ModuleNotFoundError | ||
|
||
mocker.patch("kedro.io.core.load_obj", side_effect=side_effect_function) | ||
|
||
pattern = "Please see the documentation on how to install relevant dependencies" | ||
with pytest.raises(DatasetError, match=pattern): | ||
parse_dataset_definition({"type": dataset_name}) | ||
|
||
Comment on lines
+269
to
+294
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added some tests for testing the two common scenarios. |
||
|
||
class TestAbstractVersionedDataset: | ||
def test_version_str_repr(self, load_version, save_version): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
refactor the name because
class_obj
is overloaded and confusing. Originally it is astr
but load as a class later, it's better to make it explicit.