Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow optional arguments in load() #24

Merged
merged 1 commit into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
Anything MAY change at any time. The public API SHOULD NOT be considered stable.").
While in this phase, we will denote breaking changes with a minor increase.

## Unreleased patch
## 0.4.0

### Changed

* The `load` function in `load.py` can contain optional arguments. Previously no arguments were allowed.
* `load.py` and `schema.py` are publicly accessible under `dac_pkg_name.load` and `dac_pkg_name.schema` respectively. Previously they were marked as private modules, under `dac_pkg_name._load` and `dac_pkg_name._schema`.
* `Schema` does not have to be a `pandera.DataFrameModel` anymore, but any class that implements a `validate` method (see the `_input.interface.Validator` protocol).
* `dac` does not rely on [`pydantic`](https://pypi.org/project/pydantic/) anymore, and uses [`dataclass`](https://docs.python.org/3/library/dataclasses.html#) instead.
Changes affect `PackConfig` and `PyProjectConfig`.
* `Schema` does not have to be a `pandera.DataFrameModel` anymore, but any class that implements a `validate` method (see the `_input.interface.Validator` protocol).
* `load.py` and `schema.py` are publicly accessible under `dac_pkg_name.load` and `dac_pkg_name.schema` respectively. Previously they were marked as private modules, under `dac_pkg_name._load` and `dac_pkg_name._schema`.

## 0.3.3

Expand Down
2 changes: 1 addition & 1 deletion src/dac/_input/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _check_load_contains_expected_function(self) -> None:

try:
signature = inspect.getfullargspec(pkg.load)
assert signature.args == []
assert len(signature.args) == (len(signature.defaults) if signature.defaults is not None else 0)
except Exception as e:
raise ValueError((f"{self.load_path.as_posix()} does not contain the required `def load()`")) from e

Expand Down
8 changes: 8 additions & 0 deletions test/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ def get_path_to_sample_load_parquet_as_pandas() -> Path:
return Path(__file__).parent / "load/parquet_as_pandas.py"


def get_path_to_sample_load_parquet_as_pandas_with_sample_frac() -> Path:
return Path(__file__).parent / "load/parquet_as_pandas_with_sample_frac.py"


def get_path_to_sample_load_parquet_as_pandas_with_sample_n() -> Path:
return Path(__file__).parent / "load/parquet_as_pandas_with_sample_n.py"


def get_path_to_self_contained_load_as_pandas() -> Path:
return Path(__file__).parent / "load/self_contained_as_pandas.py"

Expand Down
7 changes: 7 additions & 0 deletions test/data/load/parquet_as_pandas_with_sample_frac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pathlib import Path

import pandas as pd


def load(sample_frac: float = 1.0) -> pd.DataFrame:
return pd.read_parquet(Path(__file__).parent / "sample.parquet").sample(frac=sample_frac)
7 changes: 7 additions & 0 deletions test/data/load/parquet_as_pandas_with_sample_n.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pathlib import Path

import pandas as pd


def load(sample_n: int) -> pd.DataFrame:
return pd.read_parquet(Path(__file__).parent / "sample.parquet").sample(n=sample_n)
25 changes: 25 additions & 0 deletions test/unit_test/_input/config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
get_path_to_invalid_load,
get_path_to_invalid_schema,
get_path_to_sample_load_parquet_as_pandas,
get_path_to_sample_load_parquet_as_pandas_with_sample_frac,
get_path_to_sample_load_parquet_as_pandas_with_sample_n,
get_path_to_sample_parquet,
get_path_to_sample_schema,
get_path_to_schema_incompatible_with_sample_df,
Expand Down Expand Up @@ -75,6 +77,29 @@ def test_if_load_does_not_contain_expected_function_then_raise_exception(pyproje
)


def test_if_load_contain_optional_arguments_then_do_not_raise_exception(pyproject: PyProjectConfig):
with TemporaryDirectory() as tmp_dir:
PackConfig(
data_path=get_path_to_sample_parquet(),
load_path=get_path_to_sample_load_parquet_as_pandas_with_sample_frac(),
schema_path=get_path_to_sample_schema(),
wheel_dir=Path(tmp_dir),
pyproject=pyproject,
)


def test_if_load_contain_non_optional_arguments_then_raise_exception(pyproject: PyProjectConfig):
with TemporaryDirectory() as tmp_dir:
with pytest.raises(ValueError):
PackConfig(
data_path=get_path_to_sample_parquet(),
load_path=get_path_to_sample_load_parquet_as_pandas_with_sample_n(),
schema_path=get_path_to_sample_schema(),
wheel_dir=Path(tmp_dir),
pyproject=pyproject,
)


def test_if_invalid_schema_path_then_raise_exception(pyproject: PyProjectConfig):
with TemporaryDirectory() as tmp_dir:
with pytest.raises(ValueError):
Expand Down