diff --git a/README.md b/README.md index a7eaa81..c420ce3 100644 --- a/README.md +++ b/README.md @@ -51,40 +51,44 @@ fn pig_latinnify(inputs: &[Series], kwargs: PigLatinKwargs) -> PolarsResult pl.Expr: - return self._expr._register_plugin( - lib=lib, - symbol="pig_latinnify", - is_elementwise=True, - kwargs={"capitalize": capatilize} - ) +def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr: + expr = parse_into_expr(expr) + return expr.register_plugin( + lib=lib, + symbol="pig_latinnify", + is_elementwise=True, + kwargs={"capitalize": capitalize}, + ) ``` - Compile/ship and then it is ready to use: ```python import polars as pl -import expression_lib +from expression_lib import language df = pl.DataFrame({ "names": ["Richard", "Alice", "Bob"], }) +out = df.with_columns( + pig_latin = language.pig_latinnify("names") +) +``` +Alternatively, you can [register a custom namespace](https://docs.pola.rs/py-polars/html/reference/api/polars.api.register_expr_namespace.html#polars.api.register_expr_namespace), which enables you to write: +```python out = df.with_columns( pig_latin = pl.col("names").language.pig_latinnify() ) diff --git a/example/derive_expression/expression_lib/expression_lib/__init__.py b/example/derive_expression/expression_lib/expression_lib/__init__.py index 87982af..e69de29 100644 --- a/example/derive_expression/expression_lib/expression_lib/__init__.py +++ b/example/derive_expression/expression_lib/expression_lib/__init__.py @@ -1,111 +0,0 @@ -import polars as pl -from polars.type_aliases import IntoExpr -from polars.utils.udfs import _get_shared_lib_location - -lib = _get_shared_lib_location(__file__) - - -@pl.api.register_expr_namespace("language") -class Language: - def __init__(self, expr: pl.Expr): - self._expr = expr - - def pig_latinnify(self, capitalize: bool = False) -> pl.Expr: - return self._expr.register_plugin( - lib=lib, - symbol="pig_latinnify", - is_elementwise=True, - kwargs={"capitalize": capitalize}, - ) - - def append_args( - self, - float_arg: float, - integer_arg: int, - string_arg: str, - boolean_arg: bool, - ) -> pl.Expr: - """ - This example shows how arguments other than `Series` can be used. - """ - return self._expr.register_plugin( - lib=lib, - args=[], - kwargs={ - "float_arg": float_arg, - "integer_arg": integer_arg, - "string_arg": string_arg, - "boolean_arg": boolean_arg, - }, - symbol="append_kwargs", - is_elementwise=True, - ) - - -@pl.api.register_expr_namespace("dist") -class Distance: - def __init__(self, expr: pl.Expr): - self._expr = expr - - def hamming_distance(self, other: IntoExpr) -> pl.Expr: - return self._expr.register_plugin( - lib=lib, - args=[other], - symbol="hamming_distance", - is_elementwise=True, - ) - - def jaccard_similarity(self, other: IntoExpr) -> pl.Expr: - return self._expr.register_plugin( - lib=lib, - args=[other], - symbol="jaccard_similarity", - is_elementwise=True, - ) - - def haversine( - self, - start_lat: IntoExpr, - start_long: IntoExpr, - end_lat: IntoExpr, - end_long: IntoExpr, - ) -> pl.Expr: - return self._expr.register_plugin( - lib=lib, - args=[start_lat, start_long, end_lat, end_long], - symbol="haversine", - is_elementwise=True, - cast_to_supertypes=True, - ) - - -@pl.api.register_expr_namespace("date_util") -class DateUtil: - def __init__(self, expr: pl.Expr): - self._expr = expr - - def is_leap_year(self) -> pl.Expr: - return self._expr.register_plugin( - lib=lib, - symbol="is_leap_year", - is_elementwise=True, - ) - - # Note that this already exists in Polars. It is just for explanatory - # purposes. - def change_time_zone(self, tz: str = "Europe/Amsterdam") -> pl.Expr: - return self._expr.register_plugin( - lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz} - ) - - -@pl.api.register_expr_namespace("panic") -class Panic: - def __init__(self, expr: pl.Expr): - self._expr = expr - - def panic(self) -> pl.Expr: - return self._expr.register_plugin( - lib=lib, - symbol="panic", - ) diff --git a/example/derive_expression/expression_lib/expression_lib/date_util.py b/example/derive_expression/expression_lib/expression_lib/date_util.py new file mode 100644 index 0000000..b461902 --- /dev/null +++ b/example/derive_expression/expression_lib/expression_lib/date_util.py @@ -0,0 +1,25 @@ +import polars as pl +from polars.type_aliases import IntoExpr +from polars.utils.udfs import _get_shared_lib_location + +from expression_lib.utils import parse_into_expr + +lib = _get_shared_lib_location(__file__) + + +def is_leap_year(expr: IntoExpr) -> pl.Expr: + expr = parse_into_expr(expr) + return expr.register_plugin( + lib=lib, + symbol="is_leap_year", + is_elementwise=True, + ) + + +# Note that this already exists in Polars. It is just for explanatory +# purposes. +def change_time_zone(expr: IntoExpr, tz: str = "Europe/Amsterdam") -> pl.Expr: + expr = parse_into_expr(expr) + return expr.register_plugin( + lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz} + ) diff --git a/example/derive_expression/expression_lib/expression_lib/dist.py b/example/derive_expression/expression_lib/expression_lib/dist.py new file mode 100644 index 0000000..86e1e18 --- /dev/null +++ b/example/derive_expression/expression_lib/expression_lib/dist.py @@ -0,0 +1,44 @@ +import polars as pl +from polars.type_aliases import IntoExpr +from polars.utils.udfs import _get_shared_lib_location + +from expression_lib.utils import parse_into_expr + +lib = _get_shared_lib_location(__file__) + + +def hamming_distance(expr: IntoExpr, other: IntoExpr) -> pl.Expr: + expr = parse_into_expr(expr) + return expr.register_plugin( + lib=lib, + args=[other], + symbol="hamming_distance", + is_elementwise=True, + ) + + +def jaccard_similarity(expr: IntoExpr, other: IntoExpr) -> pl.Expr: + expr = parse_into_expr(expr) + return expr.register_plugin( + lib=lib, + args=[other], + symbol="jaccard_similarity", + is_elementwise=True, + ) + + +def haversine( + expr: IntoExpr, + start_lat: IntoExpr, + start_long: IntoExpr, + end_lat: IntoExpr, + end_long: IntoExpr, +) -> pl.Expr: + expr = parse_into_expr(expr) + return expr.register_plugin( + lib=lib, + args=[start_lat, start_long, end_lat, end_long], + symbol="haversine", + is_elementwise=True, + cast_to_supertypes=True, + ) diff --git a/example/derive_expression/expression_lib/expression_lib/extension.py b/example/derive_expression/expression_lib/expression_lib/extension.py new file mode 100644 index 0000000..c286424 --- /dev/null +++ b/example/derive_expression/expression_lib/expression_lib/extension.py @@ -0,0 +1,83 @@ +""" +Register Expressions extension with extra functionality. + +Enables you to write + + pl.col("dist_a").dist.jaccard_similarity("dist_b") + +instead of + + dist.jaccard_similarity("dist_a", "dist_b") + +However, note that: + +- you will need to add `import expression_lib.extension` to your code. + Add `# noqa: F401` to avoid linting errors due to unused imports. +- static typing will not recognise your custom namespace. Errors such + as `"Expr" has no attribute "dist" [attr-defined]`. +""" +from __future__ import annotations + +import polars as pl +from typing import Any, Callable +from expression_lib import date_util, dist, language, utils, panic + + +@pl.api.register_expr_namespace("language") +class Language: + def __init__(self, expr: pl.Expr): + self._expr = expr + + def __getattr__(self, attr: str) -> Callable[..., pl.Expr]: + if attr in ("pig_latinnify", "append_args"): + + def func(*args: Any, **kwargs: Any) -> pl.Expr: + return getattr(language, attr)(self._expr, *args, **kwargs) + + return func + raise AttributeError(f"{self.__class__} has no attribute {attr}") + + +@pl.api.register_expr_namespace("dist") +class Distance: + def __init__(self, expr: pl.Expr): + self._expr = expr + + def __getattr__(self, attr: str) -> Callable[..., pl.Expr]: + if attr in ("hamming_distance", "jaccard_similarity", "haversine"): + + def func(*args: Any, **kwargs: Any) -> pl.Expr: + return getattr(dist, attr)(self._expr, *args, **kwargs) + + return func + raise AttributeError(f"{self.__class__} has no attribute {attr}") + + +@pl.api.register_expr_namespace("date_util") +class DateUtil: + def __init__(self, expr: pl.Expr): + self._expr = expr + + def __getattr__(self, attr: str) -> Callable[..., pl.Expr]: + if attr in ("change_time_zone", "is_leap_year"): + + def func(*args: Any, **kwargs: Any) -> pl.Expr: + return getattr(date_util, attr)(self._expr, *args, **kwargs) + + return func + raise AttributeError(f"{self.__class__} has no attribute {attr}") + + +@pl.api.register_expr_namespace("panic") +class Panic: + def __init__(self, expr: pl.Expr): + self._expr = expr + + def __getattr__(self, attr: str) -> Callable[..., pl.Expr]: + if attr in ("panic",): + + def func(*args: Any, **kwargs: Any) -> pl.Expr: + return getattr(panic, attr)(self._expr, *args, **kwargs) + + return func + raise AttributeError(f"{self.__class__} has no attribute {attr}") diff --git a/example/derive_expression/expression_lib/expression_lib/language.py b/example/derive_expression/expression_lib/expression_lib/language.py new file mode 100644 index 0000000..20dcb66 --- /dev/null +++ b/example/derive_expression/expression_lib/expression_lib/language.py @@ -0,0 +1,42 @@ +import polars as pl +from polars.type_aliases import IntoExpr +from polars.utils.udfs import _get_shared_lib_location + +from expression_lib.utils import parse_into_expr + +lib = _get_shared_lib_location(__file__) + + +def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr: + expr = parse_into_expr(expr) + return expr.register_plugin( + lib=lib, + symbol="pig_latinnify", + is_elementwise=True, + kwargs={"capitalize": capitalize}, + ) + + +def append_args( + expr: IntoExpr, + float_arg: float, + integer_arg: int, + string_arg: str, + boolean_arg: bool, +) -> pl.Expr: + """ + This example shows how arguments other than `Series` can be used. + """ + expr = parse_into_expr(expr) + return expr.register_plugin( + lib=lib, + args=[], + kwargs={ + "float_arg": float_arg, + "integer_arg": integer_arg, + "string_arg": string_arg, + "boolean_arg": boolean_arg, + }, + symbol="append_kwargs", + is_elementwise=True, + ) diff --git a/example/derive_expression/expression_lib/expression_lib/panic.py b/example/derive_expression/expression_lib/expression_lib/panic.py new file mode 100644 index 0000000..eff27e0 --- /dev/null +++ b/example/derive_expression/expression_lib/expression_lib/panic.py @@ -0,0 +1,15 @@ +import polars as pl +from polars.type_aliases import IntoExpr +from polars.utils.udfs import _get_shared_lib_location + +from expression_lib.utils import parse_into_expr + +lib = _get_shared_lib_location(__file__) + + +def panic(expr: IntoExpr) -> pl.Expr: + expr = parse_into_expr(expr) + return expr.register_plugin( + lib=lib, + symbol="panic", + ) diff --git a/example/derive_expression/expression_lib/expression_lib/utils.py b/example/derive_expression/expression_lib/expression_lib/utils.py new file mode 100644 index 0000000..73f7a19 --- /dev/null +++ b/example/derive_expression/expression_lib/expression_lib/utils.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import polars as pl + +if TYPE_CHECKING: + from polars.type_aliases import IntoExpr, PolarsDataType + + +def parse_into_expr( + expr: IntoExpr, + *, + str_as_lit: bool = False, + list_as_lit: bool = True, + dtype: PolarsDataType | None = None, +) -> pl.Expr: + """ + Parse a single input into an expression. + + Parameters + ---------- + expr + The input to be parsed as an expression. + str_as_lit + Interpret string input as a string literal. If set to `False` (default), + strings are parsed as column names. + list_as_lit + Interpret list input as a lit literal, If set to `False`, + lists are parsed as `Series` literals. + dtype + If the input is expected to resolve to a literal with a known dtype, pass + this to the `lit` constructor. + + Returns + ------- + polars.Expr + """ + if isinstance(expr, pl.Expr): + pass + elif isinstance(expr, str) and not str_as_lit: + expr = pl.col(expr) + elif isinstance(expr, list) and not list_as_lit: + expr = pl.lit(pl.Series(expr), dtype=dtype) + else: + expr = pl.lit(expr, dtype=dtype) + + return expr diff --git a/example/derive_expression/run.py b/example/derive_expression/run.py index 07644b3..49d8502 100644 --- a/example/derive_expression/run.py +++ b/example/derive_expression/run.py @@ -1,6 +1,6 @@ import polars as pl -from expression_lib import * from datetime import date, datetime, timezone +from expression_lib import language, dist, date_util, panic df = pl.DataFrame( { @@ -14,6 +14,29 @@ } ) +out = df.with_columns( + pig_latin=language.pig_latinnify("names"), + pig_latin_cap=language.pig_latinnify("names", capitalize=True), +).with_columns( + hamming_dist=dist.hamming_distance("names", "pig_latin"), + jaccard_sim=dist.jaccard_similarity("dist_a", "dist_b"), + haversine=dist.haversine("floats", "floats", "floats", "floats", "floats"), + leap_year=date_util.is_leap_year("dates"), + new_tz=date_util.change_time_zone("datetime"), + appended_args=language.append_args( + "names", + float_arg=11.234, + integer_arg=93, + boolean_arg=False, + string_arg="example", + ), +) + +print(out) + +# Test we can extend the expressions by importing the extension module. + +import expression_lib.extension # noqa: F401 out = df.with_columns( pig_latin=pl.col("names").language.pig_latinnify(),