-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
239daf3
commit 4c0f283
Showing
8 changed files
with
284 additions
and
112 deletions.
There are no files selected for viewing
111 changes: 0 additions & 111 deletions
111
example/derive_expression/expression_lib/expression_lib/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,111 +0,0 @@ | ||
import polars as pl | ||
from polars.type_aliases import IntoExpr | ||
from polars.utils.udfs import _get_shared_lib_location | ||
|
||
lib = _get_shared_lib_location(__file__) | ||
|
||
|
||
@pl.api.register_expr_namespace("language") | ||
class Language: | ||
def __init__(self, expr: pl.Expr): | ||
self._expr = expr | ||
|
||
def pig_latinnify(self, capitalize: bool = False) -> pl.Expr: | ||
return self._expr.register_plugin( | ||
lib=lib, | ||
symbol="pig_latinnify", | ||
is_elementwise=True, | ||
kwargs={"capitalize": capitalize}, | ||
) | ||
|
||
def append_args( | ||
self, | ||
float_arg: float, | ||
integer_arg: int, | ||
string_arg: str, | ||
boolean_arg: bool, | ||
) -> pl.Expr: | ||
""" | ||
This example shows how arguments other than `Series` can be used. | ||
""" | ||
return self._expr.register_plugin( | ||
lib=lib, | ||
args=[], | ||
kwargs={ | ||
"float_arg": float_arg, | ||
"integer_arg": integer_arg, | ||
"string_arg": string_arg, | ||
"boolean_arg": boolean_arg, | ||
}, | ||
symbol="append_kwargs", | ||
is_elementwise=True, | ||
) | ||
|
||
|
||
@pl.api.register_expr_namespace("dist") | ||
class Distance: | ||
def __init__(self, expr: pl.Expr): | ||
self._expr = expr | ||
|
||
def hamming_distance(self, other: IntoExpr) -> pl.Expr: | ||
return self._expr.register_plugin( | ||
lib=lib, | ||
args=[other], | ||
symbol="hamming_distance", | ||
is_elementwise=True, | ||
) | ||
|
||
def jaccard_similarity(self, other: IntoExpr) -> pl.Expr: | ||
return self._expr.register_plugin( | ||
lib=lib, | ||
args=[other], | ||
symbol="jaccard_similarity", | ||
is_elementwise=True, | ||
) | ||
|
||
def haversine( | ||
self, | ||
start_lat: IntoExpr, | ||
start_long: IntoExpr, | ||
end_lat: IntoExpr, | ||
end_long: IntoExpr, | ||
) -> pl.Expr: | ||
return self._expr.register_plugin( | ||
lib=lib, | ||
args=[start_lat, start_long, end_lat, end_long], | ||
symbol="haversine", | ||
is_elementwise=True, | ||
cast_to_supertypes=True, | ||
) | ||
|
||
|
||
@pl.api.register_expr_namespace("date_util") | ||
class DateUtil: | ||
def __init__(self, expr: pl.Expr): | ||
self._expr = expr | ||
|
||
def is_leap_year(self) -> pl.Expr: | ||
return self._expr.register_plugin( | ||
lib=lib, | ||
symbol="is_leap_year", | ||
is_elementwise=True, | ||
) | ||
|
||
# Note that this already exists in Polars. It is just for explanatory | ||
# purposes. | ||
def change_time_zone(self, tz: str = "Europe/Amsterdam") -> pl.Expr: | ||
return self._expr.register_plugin( | ||
lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz} | ||
) | ||
|
||
|
||
@pl.api.register_expr_namespace("panic") | ||
class Panic: | ||
def __init__(self, expr: pl.Expr): | ||
self._expr = expr | ||
|
||
def panic(self) -> pl.Expr: | ||
return self._expr.register_plugin( | ||
lib=lib, | ||
symbol="panic", | ||
) | ||
25 changes: 25 additions & 0 deletions
25
example/derive_expression/expression_lib/expression_lib/date_util.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import polars as pl | ||
from polars.type_aliases import IntoExpr | ||
from polars.utils.udfs import _get_shared_lib_location | ||
|
||
from expression_lib.utils import parse_into_expr | ||
|
||
lib = _get_shared_lib_location(__file__) | ||
|
||
|
||
def is_leap_year(expr: IntoExpr) -> pl.Expr: | ||
expr = parse_into_expr(expr) | ||
return expr.register_plugin( | ||
lib=lib, | ||
symbol="is_leap_year", | ||
is_elementwise=True, | ||
) | ||
|
||
|
||
# Note that this already exists in Polars. It is just for explanatory | ||
# purposes. | ||
def change_time_zone(expr: IntoExpr, tz: str = "Europe/Amsterdam") -> pl.Expr: | ||
expr = parse_into_expr(expr) | ||
return expr.register_plugin( | ||
lib=lib, symbol="change_time_zone", is_elementwise=True, kwargs={"tz": tz} | ||
) |
44 changes: 44 additions & 0 deletions
44
example/derive_expression/expression_lib/expression_lib/dist.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import polars as pl | ||
from polars.type_aliases import IntoExpr | ||
from polars.utils.udfs import _get_shared_lib_location | ||
|
||
from expression_lib.utils import parse_into_expr | ||
|
||
lib = _get_shared_lib_location(__file__) | ||
|
||
|
||
def hamming_distance(expr: IntoExpr, other: IntoExpr) -> pl.Expr: | ||
expr = parse_into_expr(expr) | ||
return expr.register_plugin( | ||
lib=lib, | ||
args=[other], | ||
symbol="hamming_distance", | ||
is_elementwise=True, | ||
) | ||
|
||
|
||
def jaccard_similarity(expr: IntoExpr, other: IntoExpr) -> pl.Expr: | ||
expr = parse_into_expr(expr) | ||
return expr.register_plugin( | ||
lib=lib, | ||
args=[other], | ||
symbol="jaccard_similarity", | ||
is_elementwise=True, | ||
) | ||
|
||
|
||
def haversine( | ||
expr: IntoExpr, | ||
start_lat: IntoExpr, | ||
start_long: IntoExpr, | ||
end_lat: IntoExpr, | ||
end_long: IntoExpr, | ||
) -> pl.Expr: | ||
expr = parse_into_expr(expr) | ||
return expr.register_plugin( | ||
lib=lib, | ||
args=[start_lat, start_long, end_lat, end_long], | ||
symbol="haversine", | ||
is_elementwise=True, | ||
cast_to_supertypes=True, | ||
) |
86 changes: 86 additions & 0 deletions
86
example/derive_expression/expression_lib/expression_lib/extension.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
""" | ||
Register Expressions extension with extra functionality. | ||
Enables you to write | ||
pl.col("dist_a").dist.jaccard_similarity("dist_b") | ||
instead of | ||
dist.jaccard_similarity("dist_a", "dist_b") | ||
However, note that: | ||
- you will need to add `import expression_lib.extension` to your code. | ||
Add `# noqa: F401` to avoid linting errors due to unused imports. | ||
- static typing will not recognise your custom namespace. Errors such | ||
as `"Expr" has no attribute "dist" [attr-defined]`. | ||
""" | ||
from __future__ import annotations | ||
|
||
import polars as pl | ||
from typing import Any, Callable | ||
from expression_lib import date_util, dist, language, utils, panic | ||
|
||
|
||
@pl.api.register_expr_namespace("language") | ||
class Language: | ||
def __init__(self, expr: pl.Expr): | ||
self._expr = expr | ||
|
||
def __getattr__(self, attr: str) -> Callable[..., pl.Expr]: | ||
if attr in ("pig_latinnify", "append_args"): | ||
|
||
def func(*args: Any, **kwargs: Any) -> pl.Expr: | ||
return getattr(language, attr)(self._expr, *args, **kwargs) | ||
|
||
return func | ||
raise AttributeError(f"{self.__class__} has no attribute {attr}") | ||
|
||
|
||
@pl.api.register_expr_namespace("dist") | ||
class Distance: | ||
def __init__(self, expr: pl.Expr): | ||
self._expr = expr | ||
|
||
def __getattr__(self, attr: str) -> Callable[..., pl.Expr]: | ||
if attr in ("hamming_distance", "jaccard_similarity", "haversine"): | ||
|
||
def func(*args: Any, **kwargs: Any) -> pl.Expr: | ||
return getattr(dist, attr)(self._expr, *args, **kwargs) | ||
|
||
return func | ||
raise AttributeError(f"{self.__class__} has no attribute {attr}") | ||
|
||
|
||
@pl.api.register_expr_namespace("date_util") | ||
class DateUtil: | ||
def __init__(self, expr: pl.Expr): | ||
self._expr = expr | ||
|
||
def __getattr__(self, attr: str) -> Callable[..., pl.Expr]: | ||
if attr in ("change_time_zone", "is_leap_year"): | ||
|
||
def func(*args: Any, **kwargs: Any) -> pl.Expr: | ||
return getattr(date_util, attr)(self._expr, *args, **kwargs) | ||
|
||
return func | ||
raise AttributeError(f"{self.__class__} has no attribute {attr}") | ||
|
||
|
||
@pl.api.register_expr_namespace("panic") | ||
class Panic: | ||
def __init__(self, expr: pl.Expr): | ||
self._expr = expr | ||
|
||
def __getattr__(self, attr: str) -> Callable[..., pl.Expr]: | ||
if attr in ("panic",): | ||
|
||
def func(*args: Any, **kwargs: Any) -> pl.Expr: | ||
return getattr(panic, attr)(self._expr, *args, **kwargs) | ||
|
||
return func | ||
raise AttributeError(f"{self.__class__} has no attribute {attr}") | ||
|
||
|
||
Distance(pl.col("a")).hamming_distance(pl.col("b")) |
42 changes: 42 additions & 0 deletions
42
example/derive_expression/expression_lib/expression_lib/language.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import polars as pl | ||
from polars.type_aliases import IntoExpr | ||
from polars.utils.udfs import _get_shared_lib_location | ||
|
||
from expression_lib.utils import parse_into_expr | ||
|
||
lib = _get_shared_lib_location(__file__) | ||
|
||
|
||
def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr: | ||
expr = parse_into_expr(expr) | ||
return expr.register_plugin( | ||
lib=lib, | ||
symbol="pig_latinnify", | ||
is_elementwise=True, | ||
kwargs={"capitalize": capitalize}, | ||
) | ||
|
||
|
||
def append_args( | ||
expr: IntoExpr, | ||
float_arg: float, | ||
integer_arg: int, | ||
string_arg: str, | ||
boolean_arg: bool, | ||
) -> pl.Expr: | ||
""" | ||
This example shows how arguments other than `Series` can be used. | ||
""" | ||
expr = parse_into_expr(expr) | ||
return expr.register_plugin( | ||
lib=lib, | ||
args=[], | ||
kwargs={ | ||
"float_arg": float_arg, | ||
"integer_arg": integer_arg, | ||
"string_arg": string_arg, | ||
"boolean_arg": boolean_arg, | ||
}, | ||
symbol="append_kwargs", | ||
is_elementwise=True, | ||
) |
15 changes: 15 additions & 0 deletions
15
example/derive_expression/expression_lib/expression_lib/panic.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import polars as pl | ||
from polars.type_aliases import IntoExpr | ||
from polars.utils.udfs import _get_shared_lib_location | ||
|
||
from expression_lib.utils import parse_into_expr | ||
|
||
lib = _get_shared_lib_location(__file__) | ||
|
||
|
||
def panic(expr: IntoExpr) -> pl.Expr: | ||
expr = parse_into_expr(expr) | ||
return expr.register_plugin( | ||
lib=lib, | ||
symbol="panic", | ||
) |
48 changes: 48 additions & 0 deletions
48
example/derive_expression/expression_lib/expression_lib/utils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING | ||
|
||
import polars as pl | ||
|
||
if TYPE_CHECKING: | ||
from polars.type_aliases import IntoExpr, PolarsDataType | ||
|
||
|
||
def parse_into_expr( | ||
expr: IntoExpr, | ||
*, | ||
str_as_lit: bool = False, | ||
list_as_lit: bool = True, | ||
dtype: PolarsDataType | None = None, | ||
) -> pl.Expr: | ||
""" | ||
Parse a single input into an expression. | ||
Parameters | ||
---------- | ||
expr | ||
The input to be parsed as an expression. | ||
str_as_lit | ||
Interpret string input as a string literal. If set to `False` (default), | ||
strings are parsed as column names. | ||
list_as_lit | ||
Interpret list input as a lit literal, If set to `False`, | ||
lists are parsed as `Series` literals. | ||
dtype | ||
If the input is expected to resolve to a literal with a known dtype, pass | ||
this to the `lit` constructor. | ||
Returns | ||
------- | ||
polars.Expr | ||
""" | ||
if isinstance(expr, pl.Expr): | ||
pass | ||
elif isinstance(expr, str) and not str_as_lit: | ||
expr = pl.col(expr) | ||
elif isinstance(expr, list) and not list_as_lit: | ||
expr = pl.lit(pl.Series(expr), dtype=dtype) | ||
else: | ||
expr = pl.lit(expr, dtype=dtype) | ||
|
||
return expr |
Oops, something went wrong.