Skip to content

Commit

Permalink
dont autoregister extensions
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Jan 29, 2024
1 parent 239daf3 commit 4c0f283
Show file tree
Hide file tree
Showing 8 changed files with 284 additions and 112 deletions.
111 changes: 0 additions & 111 deletions example/derive_expression/expression_lib/expression_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,111 +0,0 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

lib = _get_shared_lib_location(__file__)


@pl.api.register_expr_namespace("language")
class Language:
def __init__(self, expr: pl.Expr):
self._expr = expr

def pig_latinnify(self, capitalize: bool = False) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capitalize},
)

def append_args(
self,
float_arg: float,
integer_arg: int,
string_arg: str,
boolean_arg: bool,
) -> pl.Expr:
"""
This example shows how arguments other than `Series` can be used.
"""
return self._expr.register_plugin(
lib=lib,
args=[],
kwargs={
"float_arg": float_arg,
"integer_arg": integer_arg,
"string_arg": string_arg,
"boolean_arg": boolean_arg,
},
symbol="append_kwargs",
is_elementwise=True,
)


@pl.api.register_expr_namespace("dist")
class Distance:
def __init__(self, expr: pl.Expr):
self._expr = expr

def hamming_distance(self, other: IntoExpr) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
args=[other],
symbol="hamming_distance",
is_elementwise=True,
)

def jaccard_similarity(self, other: IntoExpr) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
args=[other],
symbol="jaccard_similarity",
is_elementwise=True,
)

def haversine(
self,
start_lat: IntoExpr,
start_long: IntoExpr,
end_lat: IntoExpr,
end_long: IntoExpr,
) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
args=[start_lat, start_long, end_lat, end_long],
symbol="haversine",
is_elementwise=True,
cast_to_supertypes=True,
)


@pl.api.register_expr_namespace("date_util")
class DateUtil:
def __init__(self, expr: pl.Expr):
self._expr = expr

def is_leap_year(self) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
symbol="is_leap_year",
is_elementwise=True,
)

# Note that this already exists in Polars. It is just for explanatory
# purposes.
def change_time_zone(self, tz: str = "Europe/Amsterdam") -> pl.Expr:
return self._expr.register_plugin(
lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz}
)


@pl.api.register_expr_namespace("panic")
class Panic:
def __init__(self, expr: pl.Expr):
self._expr = expr

def panic(self) -> pl.Expr:
return self._expr.register_plugin(
lib=lib,
symbol="panic",
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def is_leap_year(expr: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="is_leap_year",
is_elementwise=True,
)


# Note that this already exists in Polars. It is just for explanatory
# purposes.
def change_time_zone(expr: IntoExpr, tz: str = "Europe/Amsterdam") -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz}
)
44 changes: 44 additions & 0 deletions example/derive_expression/expression_lib/expression_lib/dist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def hamming_distance(expr: IntoExpr, other: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[other],
symbol="hamming_distance",
is_elementwise=True,
)


def jaccard_similarity(expr: IntoExpr, other: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[other],
symbol="jaccard_similarity",
is_elementwise=True,
)


def haversine(
expr: IntoExpr,
start_lat: IntoExpr,
start_long: IntoExpr,
end_lat: IntoExpr,
end_long: IntoExpr,
) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[start_lat, start_long, end_lat, end_long],
symbol="haversine",
is_elementwise=True,
cast_to_supertypes=True,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
Register Expressions extension with extra functionality.
Enables you to write
pl.col("dist_a").dist.jaccard_similarity("dist_b")
instead of
dist.jaccard_similarity("dist_a", "dist_b")
However, note that:
- you will need to add `import expression_lib.extension` to your code.
Add `# noqa: F401` to avoid linting errors due to unused imports.
- static typing will not recognise your custom namespace. Errors such
as `"Expr" has no attribute "dist" [attr-defined]`.
"""
from __future__ import annotations

import polars as pl
from typing import Any, Callable
from expression_lib import date_util, dist, language, utils, panic


@pl.api.register_expr_namespace("language")
class Language:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("pig_latinnify", "append_args"):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(language, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")


@pl.api.register_expr_namespace("dist")
class Distance:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("hamming_distance", "jaccard_similarity", "haversine"):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(dist, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")


@pl.api.register_expr_namespace("date_util")
class DateUtil:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("change_time_zone", "is_leap_year"):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(date_util, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")


@pl.api.register_expr_namespace("panic")
class Panic:
def __init__(self, expr: pl.Expr):
self._expr = expr

def __getattr__(self, attr: str) -> Callable[..., pl.Expr]:
if attr in ("panic",):

def func(*args: Any, **kwargs: Any) -> pl.Expr:
return getattr(panic, attr)(self._expr, *args, **kwargs)

return func
raise AttributeError(f"{self.__class__} has no attribute {attr}")


Distance(pl.col("a")).hamming_distance(pl.col("b"))
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="pig_latinnify",
is_elementwise=True,
kwargs={"capitalize": capitalize},
)


def append_args(
expr: IntoExpr,
float_arg: float,
integer_arg: int,
string_arg: str,
boolean_arg: bool,
) -> pl.Expr:
"""
This example shows how arguments other than `Series` can be used.
"""
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
args=[],
kwargs={
"float_arg": float_arg,
"integer_arg": integer_arg,
"string_arg": string_arg,
"boolean_arg": boolean_arg,
},
symbol="append_kwargs",
is_elementwise=True,
)
15 changes: 15 additions & 0 deletions example/derive_expression/expression_lib/expression_lib/panic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import polars as pl
from polars.type_aliases import IntoExpr
from polars.utils.udfs import _get_shared_lib_location

from expression_lib.utils import parse_into_expr

lib = _get_shared_lib_location(__file__)


def panic(expr: IntoExpr) -> pl.Expr:
expr = parse_into_expr(expr)
return expr.register_plugin(
lib=lib,
symbol="panic",
)
48 changes: 48 additions & 0 deletions example/derive_expression/expression_lib/expression_lib/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import polars as pl

if TYPE_CHECKING:
from polars.type_aliases import IntoExpr, PolarsDataType


def parse_into_expr(
expr: IntoExpr,
*,
str_as_lit: bool = False,
list_as_lit: bool = True,
dtype: PolarsDataType | None = None,
) -> pl.Expr:
"""
Parse a single input into an expression.
Parameters
----------
expr
The input to be parsed as an expression.
str_as_lit
Interpret string input as a string literal. If set to `False` (default),
strings are parsed as column names.
list_as_lit
Interpret list input as a lit literal, If set to `False`,
lists are parsed as `Series` literals.
dtype
If the input is expected to resolve to a literal with a known dtype, pass
this to the `lit` constructor.
Returns
-------
polars.Expr
"""
if isinstance(expr, pl.Expr):
pass
elif isinstance(expr, str) and not str_as_lit:
expr = pl.col(expr)
elif isinstance(expr, list) and not list_as_lit:
expr = pl.lit(pl.Series(expr), dtype=dtype)
else:
expr = pl.lit(expr, dtype=dtype)

return expr
Loading

0 comments on commit 4c0f283

Please sign in to comment.