Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't autoregister extensions #65

Merged
merged 3 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 19 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,40 +51,44 @@ fn pig_latinnify(inputs: &[Series], kwargs: PigLatinKwargs) -> PolarsResult<Seri
}
```

On the python side this expression can then be registered under a namespace:
This can then be exposed on the Python side:

```python
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

lib = _get_shared_lib_location(__file__)
from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)

@pl.api.register_expr_namespace("language")
class Language:
def __init__(self, expr: pl.Expr):
self._expr = expr

def pig_latinnify(self, capatilize: bool = False) -> pl.Expr:
return self._expr._register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capatilize}
)
def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capitalize},
)
```

Compile/ship and then it is ready to use:

```python
import polars as pl
import expression_lib
from expression_lib import language

df = pl.DataFrame({
"names": ["Richard", "Alice", "Bob"],
})


out = df.with_columns(
pig_latin = language.pig_latinnify("names")
)
```
Alternatively, you can [register a custom namespace](https://docs.pola.rs/py-polars/html/reference/api/polars.api.register_expr_namespace.html#polars.api.register_expr_namespace), which enables you to write:
```python
out = df.with_columns(
pig_latin = pl.col("names").language.pig_latinnify()
)
Expand Down
111 changes: 0 additions & 111 deletions example/derive_expression/expression_lib/expression_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,111 +0,0 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

lib = _get_shared_lib_location(__file__)


@pl.api.register_expr_namespace("language")
class Language:
def __init__(self, expr: pl.Expr):
self._expr = expr

def pig_latinnify(self, capitalize: bool = False) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capitalize},
)

def append_args(
self,
float_arg: float,
integer_arg: int,
string_arg: str,
boolean_arg: bool,
) -> pl.Expr:
"""
This example shows how arguments other than `Series` can be used.
"""
return self._expr.register_plugin(
lib=lib,
args=[],
kwargs={
"float_arg": float_arg,
"integer_arg": integer_arg,
"string_arg": string_arg,
"boolean_arg": boolean_arg,
},
symbol="append_kwargs",
is_elementwise=True,
)


@pl.api.register_expr_namespace("dist")
class Distance:
def __init__(self, expr: pl.Expr):
self._expr = expr

def hamming_distance(self, other: IntoExpr) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
args=[other],
symbol="hamming_distance",
is_elementwise=True,
)

def jaccard_similarity(self, other: IntoExpr) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
args=[other],
symbol="jaccard_similarity",
is_elementwise=True,
)

def haversine(
self,
start_lat: IntoExpr,
start_long: IntoExpr,
end_lat: IntoExpr,
end_long: IntoExpr,
) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
args=[start_lat, start_long, end_lat, end_long],
symbol="haversine",
is_elementwise=True,
cast_to_supertypes=True,
)


@pl.api.register_expr_namespace("date_util")
class DateUtil:
def __init__(self, expr: pl.Expr):
self._expr = expr

def is_leap_year(self) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
symbol="is_leap_year",
is_elementwise=True,
)

# Note that this already exists in Polars. It is just for explanatory
# purposes.
def change_time_zone(self, tz: str = "Europe/Amsterdam") -> pl.Expr:
return self._expr.register_plugin(
lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz}
)


@pl.api.register_expr_namespace("panic")
class Panic:
def __init__(self, expr: pl.Expr):
self._expr = expr

def panic(self) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
symbol="panic",
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def is_leap_year(expr: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="is_leap_year",
is_elementwise=True,
)


# Note that this already exists in Polars. It is just for explanatory
# purposes.
def change_time_zone(expr: IntoExpr, tz: str = "Europe/Amsterdam") -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz}
)
44 changes: 44 additions & 0 deletions example/derive_expression/expression_lib/expression_lib/dist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def hamming_distance(expr: IntoExpr, other: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[other],
symbol="hamming_distance",
is_elementwise=True,
)


def jaccard_similarity(expr: IntoExpr, other: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[other],
symbol="jaccard_similarity",
is_elementwise=True,
)


def haversine(
expr: IntoExpr,
start_lat: IntoExpr,
start_long: IntoExpr,
end_lat: IntoExpr,
end_long: IntoExpr,
) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[start_lat, start_long, end_lat, end_long],
symbol="haversine",
is_elementwise=True,
cast_to_supertypes=True,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
Register Expressions extension with extra functionality.

Enables you to write

pl.col("dist_a").dist.jaccard_similarity("dist_b")

instead of

dist.jaccard_similarity("dist_a", "dist_b")

However, note that:

- you will need to add `import expression_lib.extension` to your code.
Add `# noqa: F401` to avoid linting errors due to unused imports.
- static typing will not recognise your custom namespace. Errors such
as `"Expr" has no attribute "dist" [attr-defined]`.
"""
from __future__ import annotations

import polars as pl
from typing import Any, Callable
from expression_lib import date_util, dist, language, utils, panic


@pl.api.register_expr_namespace("language")
class Language:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("pig_latinnify", "append_args"):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(language, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")


@pl.api.register_expr_namespace("dist")
class Distance:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("hamming_distance", "jaccard_similarity", "haversine"):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(dist, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")


@pl.api.register_expr_namespace("date_util")
class DateUtil:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("change_time_zone", "is_leap_year"):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(date_util, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")


@pl.api.register_expr_namespace("panic")
class Panic:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("panic",):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(panic, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capitalize},
)


def append_args(
expr: IntoExpr,
float_arg: float,
integer_arg: int,
string_arg: str,
boolean_arg: bool,
) -> pl.Expr:
"""
This example shows how arguments other than `Series` can be used.
"""
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[],
kwargs={
"float_arg": float_arg,
"integer_arg": integer_arg,
"string_arg": string_arg,
"boolean_arg": boolean_arg,
},
symbol="append_kwargs",
is_elementwise=True,
)
15 changes: 15 additions & 0 deletions example/derive_expression/expression_lib/expression_lib/panic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def panic(expr: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="panic",
)
Loading
Loading