diff --git a/docs/design/interface/python-functions.qmd b/docs/design/interface/python-functions.qmd index dcfe9fa42..43629dfe3 100644 --- a/docs/design/interface/python-functions.qmd +++ b/docs/design/interface/python-functions.qmd @@ -417,6 +417,38 @@ print(path_resource_data(4, 2)) ``` ::: +## Properties dataclasses + +These dataclasses contain an explicit, structured set of official +properties defined within a data package. The main purpose of these is +to allow us to pass structured properties objects between functions. +They also enable users to create valid properties objects more easily +and get an overview of optional and required class fields. + +::: {.callout-note collapse="true"} +### `PackageProperties` + +See the help documentation with `help(PackageProperties())` for more +details on the properties. +::: + +## Properties functions + +::: {.callout-warning collapse="true"} +### `read_properties(path)` + +Reads the `datapackage.json` file, checks that is correct, and then +outputs a `PackageProperties` object. +::: + +::: {.callout-warning collapse="true"} +### `view_properties(path)` + +Reads the `datapackage.json` file, checks that is correct, and then +outputs a human-readable version of the properties as a string. +::: + + ::: {.callout-note collapse="true"} ### `path_properties(package_id)` @@ -497,130 +529,6 @@ print(path_sprout_global()) ``` [PosixPath('~/.sprout')] ``` -::: - -## Properties dataclasses - -These dataclasses contain an explicit, structured set of official -properties defined within a data package. The main purpose of these is -to allow us to pass structured properties objects between functions. -They also enable users to create valid properties objects more easily -and get an overview of optional and required class fields. - -::: {.callout-note collapse="true"} -### `PackageProperties(...)` - -Creates a dataclass object with all the necessary properties for the top -level metadata of a data package. - -#### Example usage - -``` python -print(PackageProperties()) -``` - - - -``` -PackageProperties(title=None, description=None, licenses=None, contributors=None, resources=None) -``` - -``` python -print(PackageProperties(title="Diabetes Cohort")) -``` - -``` -PackageProperties(title="Diabetes Cohort", description=None, licenses=None, contributors=None, resources=None) -``` - -``` python -print(PackageProperties(licenses=[LicenseProperties(name="ODC-BY-1.0")])) -``` - -``` -PackageProperties(title=None, description=None, licenses=[LicenseProperties(name="ODC-BY-1.0")], contributors=None, resources=None) -``` -::: - -::: {.callout-note collapse="true"} -### `ResourceProperties(...)` - -Creates a dataclass object with all the necessary properties for a -resource, which would be given in the `resources` field of a -`PackageProperties` object. - -#### Example usage - -``` python -print(ResourceProperties()) -``` - -``` -ResourceProperties(name=None, description=None, path=None, schema=None) -``` - -``` python -print(ResourceProperties(name="Blood Samples")) -``` - -``` -ResourceProperties(name="Blood Samples", description=None, path=None, schema=None) -``` -::: - -::: {.callout-note collapse="true"} -### `ContributorProperties(...)` - -Creates a dataclass object with all the necessary properties for a -contributor. This would be given in the `contributors` field of a -`PackageProperties` object. - -#### Example usage - -``` python -print(ContributorProperties()) -``` - -``` -ContributorProperties(title=None, email=None, roles=None) -``` -::: - -::: {.callout-note collapse="true"} -### `LicenseProperties(...)` - -Creates a dataclass object with all the necessary properties for a -license, so that it can be added to the `licenses` field of a -`PackageProperties` object. - -#### Example usage - -``` python -print(LicenseProperties()) -``` - -``` -LicenseProperties(name=None, path=None, title=None) -``` -::: - -::: {.callout-note collapse="true"} -### `TableSchemaProperties(...)` - -Creates a dataclass object with all the necessary properties for a table -schema, so that it can be added to the `schema` field of a -`ResourceProperties` object. - -#### Example usage - -``` python -print(TableSchemaProperties()) -``` - -``` -TableSchemaProperties(fields=[], missingValues=[], primaryKey=[], foreignKeys=[]) -``` -::: ## Helper functions diff --git a/seedcase_sprout/core/properties.py b/seedcase_sprout/core/properties.py index 6be054638..794a0a762 100644 --- a/seedcase_sprout/core/properties.py +++ b/seedcase_sprout/core/properties.py @@ -19,22 +19,24 @@ class Properties(ABC): - """An abstract base class for all *Properties classes holding common logic.""" + """An abstract base class for all `*Properties` classes holding common logic.""" @classmethod @abstractmethod def default(cls: type[Self]) -> Self: - """Creates an instance with default values.""" + """Creates a dataclass `*Properties` object with default values.""" pass @property def compact_dict(self) -> dict: - """Converts the object to a dictionary, removing any keys with None values. + """Converts the dataclass `*Properties` object to a dictionary. - Applies recursively to nested `*Properties` objects. + Applies recursively to nested `*Properties` objects. Also removes any keys with + None values. Returns: - A dictionary representation of the object with only non-None values. + A dictionary representation of the `*Properties` object with only non-None + values. """ return asdict( obj=self, @@ -45,14 +47,14 @@ def compact_dict(self) -> dict: @classmethod def from_dict(cls: type[Self], data: dict) -> Self: - """Creates an instance populated with data from a dictionary. + """Creates a dataclass `*Properties` object filled with data from a dictionary. Args: - cls: The class to create an instance of. - data: The data to populate the instance with. + cls: The class to create the `*Properties` object from. + data: The data to fill the `*Properties` object with. Returns: - An instance of the class with the properties from the dictionary. + A `*Properties` object with the properties from the dictionary. """ return from_dict(data_class=cls, data=data) @@ -61,6 +63,10 @@ def from_dict(cls: type[Self], data: dict) -> Self: class ContributorProperties(Properties): """The people or organizations who contributed to this data package. + Creates a dataclass object with all the necessary properties for a + contributor. This would be given in the `contributors` field of a + `PackageProperties` object. + Attributes: title (str | None): The name of the contributor. path (str | None): A fully qualified URL pointing to a relevant @@ -74,6 +80,13 @@ class ContributorProperties(Properties): contributor. roles (list[str] | None): An array of strings describing the roles of the contributor. + + Examples: + ```{python} + import seedcase_sprout.core as sp + print(sp.ContributorProperties()) + print(sp.ContributorProperties(title="Amir Smith")) + ``` """ title: str | None = None @@ -86,10 +99,10 @@ class ContributorProperties(Properties): @classmethod def default(cls: type[Self]) -> Self: - """Creates an instance with default values. + """Creates a `ContributorProperties` object with default values. Returns: - A ContributorProperties object with default values + A `ContributorProperties` object with default values. """ return cls( title="", @@ -106,11 +119,21 @@ def default(cls: type[Self]) -> Self: class LicenseProperties(Properties): """The license(s) under which the package or resource is provided. + Creates a dataclass object with all the necessary properties for a + license, so that it can be added to the `licenses` field of a + `PackageProperties` object. + Attributes: name (str | None): Must be an Open Definition license identifier, see http://licenses.opendefinition.org/ path (str | None): A fully qualified URL, or a POSIX file path. title (str | None): A human-readable title. + + Examples: + ```{python} + import seedcase_sprout.core as sp + print(sp.LicenseProperties()) + ``` """ name: str | None = None @@ -119,10 +142,10 @@ class LicenseProperties(Properties): @classmethod def default(cls: type[Self]) -> Self: - """Creates an instance with default values. + """Creates a `LicenseProperties` object with default values. Returns: - A LicenseProperties object with default values + A `LicenseProperties` object with default values. """ return cls(name="", path="", title="") @@ -145,10 +168,10 @@ class SourceProperties(Properties): @classmethod def default(cls: type[Self]) -> Self: - """Creates an instance with default values. + """Creates a `SourceProperties` dataclass with default values. Returns: - A SourceProperties object with default values + A `SourceProperties` object with default values. """ return cls(title="", path="", email="", version="") @@ -221,10 +244,10 @@ class TableDialectProperties(Properties): @classmethod def default(cls: type[Self]) -> Self: - """Creates an instance with default values. + """Creates a `TableDialectProperties` dataclass with default values. Returns: - A TableDialectProperties object with default values + A `TableDialectProperties` dataclass with default values. """ return cls( header=True, @@ -265,10 +288,10 @@ class ReferenceProperties(Properties): @classmethod def default(cls: type[Self]) -> Self: - """Creates an instance with default values. + """Creates a `ReferenceProperties` dataclass with default values. Returns: - A ReferenceProperties object with default values + A `ReferenceProperties` dataclass with default values. """ return cls(resource="", fields=[]) @@ -293,10 +316,10 @@ class TableSchemaForeignKeyProperties(Properties): @classmethod def default(cls: type[Self]) -> Self: - """Creates an instance with default values. + """Creates a `TableSchemaForeignKeyProperties` dataclass with default values. Returns: - A TableSchemaForeignKeyProperties object with default values + A `TableSchemaForeignKeyProperties` dataclass with default values. """ return cls(fields=[]) @@ -315,10 +338,10 @@ class MissingValueProperties(Properties): @classmethod def default(cls: type[Self]) -> Self: - """Creates an instance with default values. + """Creates a `MissingValueProperties` dataclass with default values. Returns: - A MissingValueProperties object with default values + A `MissingValueProperties` dataclass with default values. """ return cls(value="", label="") @@ -346,7 +369,11 @@ def default(cls: type[Self]) -> Self: @dataclass class ConstraintsProperties(Properties): - """A class that expresses constraints for validating field values. + """A dataclass that expresses constraints for validating field values. + + A constraint is a rule that dictates the given values, or range of values, + that a variable or column can have in a dataset. For instance, a constraint + for an "age" column could be that it must be greater than 0 but less than 120. Attributes: required (bool | None): Indicates whether a property must have a @@ -387,10 +414,10 @@ class ConstraintsProperties(Properties): @classmethod def default(cls: type[Self]) -> Self: - """Creates an instance with default values. + """Creates a `ConstraintsProperties` dataclass with default values. Returns: - A ConstraintsProperties object with default values + A `ConstraintsProperties` dataclass with default values. """ return cls( required=False, @@ -442,10 +469,10 @@ class FieldProperties(Properties): @classmethod def default(cls: "type[Self]") -> Self: - """Creates an instance with default values. + """Creates a `FieldProperties` dataclass with default values. Returns: - A FieldProperties object with default values + A `FieldProperties` dataclass with default values. """ return cls( name="", @@ -489,6 +516,12 @@ class TableSchemaProperties(Properties): table (resource). missing_values (list[str] | list[MissingValueProperties] | None): Values that, when encountered in the source, should be considered as not present. + + Examples: + ```{python} + import seedcase_sprout.core as sp + print(sp.TableSchemaProperties(primary_key="id")) + ``` """ fields: list[FieldProperties] | None = None @@ -500,10 +533,10 @@ class TableSchemaProperties(Properties): @classmethod def default(cls: type[Self]) -> Self: - """Creates an instance with default values. + """Creates a `TableSchemaProperties` dataclass with default values. Returns: - A TableSchemaProperties object with default values + A `TableSchemaProperties` dataclass with default values. """ return cls( fields=[], @@ -524,6 +557,10 @@ class ResourceProperties(Properties): the data it describes. A range of other properties can be declared to provide a richer set of metadata. + Creates a dataclass object with all the necessary properties for a resource, + which would be given in the `resources` field of a `PackageProperties` + dataclass. + Attributes: name (str | None): A simple name or identifier to be used for this resource. Should consist only of lowercase English alphanumeric characters plus @@ -548,6 +585,13 @@ class ResourceProperties(Properties): data. schema (TableSchemaProperties | None): A table schema for the resource data, compliant with the table schema specification. + + Examples: + ```{python} + import seedcase_sprout.core as sp + print(sp.ResourceProperties()) + print(sp.ResourceProperties(name="Blood samples", path="data.csv")) + ``` """ name: str | None = None @@ -567,10 +611,10 @@ class ResourceProperties(Properties): @classmethod def default(cls: "type[Self]") -> Self: - """Creates an instance with default values. + """Creates a `ResourcesProperties` dataclass with default values. Returns: - A ResourceProperties object with default values + A `ResourceProperties` dataclass with default values. """ return cls( name="", @@ -590,12 +634,14 @@ def default(cls: "type[Self]") -> Self: @dataclass class PackageProperties(Properties): - """A data package. + """Properties for a data package. A simple container format for describing a coherent collection of data in a single "package". It provides the basis for convenient delivery, installation and management of datasets. + Creates a dataclass object with all the necessary properties for the package. + Attributes: name (str | None): A simple name or identifier to be used for this package. Should consist only of lowercase English alphanumeric characters plus @@ -616,6 +662,26 @@ class PackageProperties(Properties): in this data package, each compliant with the data resource specification. sources (list[SourceProperties] | None): The raw sources for this data package. + + Examples: + ```{python} + import seedcase_sprout.core as sp + print(sp.PackageProperties()) + print(sp.PackageProperties(name="diabetes-cohort", title="Diabetes Cohort")) + print(sp.PackageProperties(licenses=[sp.LicenseProperties(name="ODC-BY-1.0")])) + + # To allow multiline strings, use dedent. + from textwrap import dedent + print(sp.PackageProperties( + title="Birds of North America", + description=dedent(''' + # Markdown header + + A dataset of bird sightings. With some **bolding**. + ''' + ) + )) + ``` """ name: str | None = None @@ -634,10 +700,10 @@ class PackageProperties(Properties): @classmethod def default(cls: type[Self]) -> Self: - """Creates an instance with default values. + """Creates a `PackageProperties` dataclass with default values. Returns: - A PackageProperties object with default values + A `PackageProperties` dataclass with default values. """ return cls( name="",