kedro-org · noklam · Nov 9, 2023 · Nov 6, 2023 · Nov 6, 2023 · Nov 6, 2023
@@ -6,6 +6,7 @@
 * The new spaceflights starters, `spaceflights-pandas`, `spaceflights-pandas-viz`, `spaceflights-pyspark`, and `spaceflights-pyspark-viz` can be used with the `kedro new` command with the `--starter` flag.
 * Added the `--conf-source` option to `%reload_kedro`, allowing users to specify a source for project configuration.
 * Added the functionality to choose a merging strategy for config files loaded with `OmegaConfigLoader`.
+* Modified the mechanism of importing datasets, raise more explicit error when dependencies are missing.
 
 
 ## Bug fixes and other changes

@@ -376,25 +376,22 @@ def parse_dataset_definition(
     if "type" not in config:
         raise DatasetError("'type' is missing from dataset catalog configuration")
 
-    class_obj = config.pop("type")
-    if isinstance(class_obj, str):
-        if len(class_obj.strip(".")) != len(class_obj):
+    dataset_type = config.pop("type")
+    if isinstance(dataset_type, str):
+        if len(dataset_type.strip(".")) != len(dataset_type):
             raise DatasetError(
                 "'type' class path does not support relative "
                 "paths or paths ending with a dot."
             )
-        class_paths = (prefix + class_obj for prefix in _DEFAULT_PACKAGES)
+        class_paths = (prefix + dataset_type for prefix in _DEFAULT_PACKAGES)
 
         for class_path in class_paths:
             tmp = _load_obj(class_path)
             if tmp is not None:
                 class_obj = tmp
                 break
         else:
-            raise DatasetError(
-                f"Class '{class_obj}' not found or one of its dependencies "
-                f"has not been installed."
-            )
+            raise DatasetError(f"Class '{dataset_type}' not found, is this a typo?")
 
     if not issubclass(class_obj, AbstractDataset):
         raise DatasetError(
@@ -422,8 +419,9 @@ def parse_dataset_definition(
     return class_obj, config
 
 
-def _load_obj(class_path: str) -> object | None:
+def _load_obj(class_path: str) -> Any | None:
     mod_path, _, class_name = class_path.rpartition(".")
+    # Check if the module exists
     try:
         available_classes = load_obj(f"{mod_path}.__all__")
     # ModuleNotFoundError: When `load_obj` can't find `mod_path` (e.g `kedro.io.pandas`)
@@ -432,18 +430,16 @@ def _load_obj(class_path: str) -> object | None:
     #                 `__all__` attribute -- either because it's a custom or a kedro.io dataset
     except (ModuleNotFoundError, AttributeError, ValueError):
         available_classes = None
-
     try:
         class_obj = load_obj(class_path)
-    except (ModuleNotFoundError, ValueError):
-        return None
-    except AttributeError as exc:
+    except (ModuleNotFoundError, ValueError, AttributeError) as exc:
+        # If it's available, module exist but dependencies are missing
         if available_classes and class_name in available_classes:
             raise DatasetError(
-                f"{exc} Please see the documentation on how to "
+                f"{exc}. Please see the documentation on how to "
                 f"install relevant dependencies for {class_path}:\n"
-                f"https://kedro.readthedocs.io/en/stable/"
-                f"kedro_project_setup/dependencies.html"
+                f"https://docs.kedro.org/en/stable/kedro_project_setup/"
+                f"dependencies.html#install-dependencies-related-to-the-data-catalog"
             ) from exc
         return None
 

@@ -23,6 +23,4 @@ def load_obj(obj_path: str, default_obj_path: str = "") -> Any:
     obj_path = obj_path_list.pop(0) if len(obj_path_list) > 1 else default_obj_path
     obj_name = obj_path_list[0]
     module_obj = importlib.import_module(obj_path)
-    if not hasattr(module_obj, obj_name):
-        raise AttributeError(f"Object '{obj_name}' cannot be loaded from '{obj_path}'.")
     return getattr(module_obj, obj_name)
@@ -19,6 +19,7 @@
     generate_timestamp,
     get_filepath_str,
     get_protocol_and_path,
+    parse_dataset_definition,
     validate_on_forbidden_chars,
 )
 
@@ -265,6 +266,32 @@ def test_validate_forbidden_chars(self, input):
         with pytest.raises(DatasetError, match=expected_error_message):
             validate_on_forbidden_chars(**input)
 
+    def test_dataset_name_typo(self, mocker):
+        # If the module doesn't exist, it return None instead ModuleNotFoundError
+        mocker.patch("kedro.io.core.load_obj", return_value=None)
+        dataset_name = "lAmbDaDaTAsET"
+
+        with pytest.raises(
+            DatasetError, match=f"Class '{dataset_name}' not found, is this a typo?"
+        ):
+            parse_dataset_definition({"type": dataset_name})
+
+    def test_dataset_missing_dependencies(self, mocker):
+        # If the module is found but import the dataset trigger ModuleNotFoundError
+        dataset_name = "LambdaDataset"
+
+        def side_effect_function(value):
+            if "__all__" in value:
+                return [dataset_name]
+            else:
+                raise ModuleNotFoundError
+
+        mocker.patch("kedro.io.core.load_obj", side_effect=side_effect_function)
+
+        pattern = "Please see the documentation on how to install relevant dependencies"
+        with pytest.raises(DatasetError, match=pattern):
+            parse_dataset_definition({"type": dataset_name})
+
 
 class TestAbstractVersionedDataset:
     def test_version_str_repr(self, load_version, save_version):

@@ -18,12 +18,6 @@ def test_load_obj_default_path(self):
         extracted_obj = load_obj("DummyClass", "tests.test_utils")
         assert extracted_obj is DummyClass
 
-    def test_load_obj_invalid_attribute(self):
-        with pytest.raises(
-            AttributeError, match=r"Object 'InvalidClass' cannot be loaded"
-        ):
-            load_obj("InvalidClass", "tests.test_utils")
-
     def test_load_obj_invalid_module(self):
         with pytest.raises(ImportError, match=r"No module named 'missing_path'"):
             load_obj("InvalidClass", "missing_path")