diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index ba00e34e0..173983006 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -257,6 +257,40 @@ class ArrowExprStringNamespace: def __init__(self, expr: ArrowExpr) -> None: self._expr = expr + def starts_with(self, prefix: str) -> ArrowExpr: + return reuse_series_namespace_implementation( + self._expr, + "str", + "starts_with", + prefix, + ) + + def ends_with(self, suffix: str) -> ArrowExpr: + return reuse_series_namespace_implementation( + self._expr, + "str", + "ends_with", + suffix, + ) + + def contains(self, pattern: str, *, literal: bool) -> ArrowExpr: + return reuse_series_namespace_implementation( + self._expr, "str", "contains", pattern, literal=literal + ) + + def slice(self, offset: int, length: int | None = None) -> ArrowExpr: + return reuse_series_namespace_implementation( + self._expr, "str", "slice", offset, length + ) + + def to_datetime(self, format: str | None = None) -> ArrowExpr: # noqa: A002 + return reuse_series_namespace_implementation( + self._expr, + "str", + "to_datetime", + format, + ) + def to_uppercase(self) -> ArrowExpr: return reuse_series_namespace_implementation( self._expr, diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index f4080e489..331d9a4a0 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -375,16 +375,50 @@ def get_categories(self) -> ArrowSeries: class ArrowSeriesStringNamespace: - def __init__(self, series: ArrowSeries) -> None: + def __init__(self: Self, series: ArrowSeries) -> None: self._arrow_series = series - def to_uppercase(self) -> ArrowSeries: + def starts_with(self: Self, prefix: str) -> ArrowSeries: + pc = get_pyarrow_compute() + return self._arrow_series._from_native_series( + pc.equal(self.slice(0, len(prefix))._native_series, prefix) + ) + + def ends_with(self: Self, suffix: str) -> ArrowSeries: + pc = get_pyarrow_compute() + return self._arrow_series._from_native_series( + pc.equal(self.slice(-len(suffix))._native_series, suffix) + ) + + def contains(self: Self, pattern: str, *, literal: bool = False) -> ArrowSeries: + pc = get_pyarrow_compute() + check_func = pc.match_substring if literal else pc.match_substring_regex + return self._arrow_series._from_native_series( + check_func(self._arrow_series._native_series, pattern) + ) + + def slice(self: Self, offset: int, length: int | None = None) -> ArrowSeries: + pc = get_pyarrow_compute() + stop = offset + length if length else None + return self._arrow_series._from_native_series( + pc.utf8_slice_codeunits( + self._arrow_series._native_series, start=offset, stop=stop + ), + ) + + def to_datetime(self: Self, format: str | None = None) -> ArrowSeries: # noqa: A002 + pc = get_pyarrow_compute() + return self._arrow_series._from_native_series( + pc.strptime(self._arrow_series._native_series, format=format, unit="us") + ) + + def to_uppercase(self: Self) -> ArrowSeries: pc = get_pyarrow_compute() return self._arrow_series._from_native_series( pc.utf8_upper(self._arrow_series._native_series), ) - def to_lowercase(self) -> ArrowSeries: + def to_lowercase(self: Self) -> ArrowSeries: pc = get_pyarrow_compute() return self._arrow_series._from_native_series( pc.utf8_lower(self._arrow_series._native_series), diff --git a/tests/expr_and_series/str/contains_test.py b/tests/expr_and_series/str/contains_test.py index 32e45de03..831aa82d3 100644 --- a/tests/expr_and_series/str/contains_test.py +++ b/tests/expr_and_series/str/contains_test.py @@ -2,7 +2,6 @@ import pandas as pd import polars as pl -import pytest import narwhals.stable.v1 as nw from tests.utils import compare_dicts @@ -13,9 +12,8 @@ df_polars = pl.DataFrame(data) -@pytest.mark.parametrize("df_any", [df_pandas, df_polars]) -def test_contains(df_any: Any) -> None: - df = nw.from_native(df_any, eager_only=True) +def test_contains(constructor: Any) -> None: + df = nw.from_native(constructor(data), eager_only=True) result = df.with_columns( case_insensitive_match=nw.col("pets").str.contains("(?i)parrot|Dove") ) diff --git a/tests/expr_and_series/str/head_test.py b/tests/expr_and_series/str/head_test.py index 9fe2638ff..9deb28623 100644 --- a/tests/expr_and_series/str/head_test.py +++ b/tests/expr_and_series/str/head_test.py @@ -1,16 +1,12 @@ from typing import Any -import pytest - import narwhals.stable.v1 as nw from tests.utils import compare_dicts data = {"a": ["foo", "bars"]} -def test_str_head(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): - request.applymarker(pytest.mark.xfail) +def test_str_head(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) result = df.select(nw.col("a").str.head(3)) expected = { diff --git a/tests/expr_and_series/str/slice_test.py b/tests/expr_and_series/str/slice_test.py index 946f4e35e..e38319521 100644 --- a/tests/expr_and_series/str/slice_test.py +++ b/tests/expr_and_series/str/slice_test.py @@ -15,11 +15,8 @@ [(1, 2, {"a": ["da", "df"]}), (-2, None, {"a": ["as", "as"]})], ) def test_str_slice( - request: Any, constructor: Any, offset: int, length: int | None, expected: Any + constructor: Any, offset: int, length: int | None, expected: Any ) -> None: - if "pyarrow_table" in str(constructor): - request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data), eager_only=True) result_frame = df.select(nw.col("a").str.slice(offset, length)) compare_dicts(result_frame, expected) diff --git a/tests/expr_and_series/str/starts_with_ends_with_test.py b/tests/expr_and_series/str/starts_with_ends_with_test.py index ee477461c..9216f2e62 100644 --- a/tests/expr_and_series/str/starts_with_ends_with_test.py +++ b/tests/expr_and_series/str/starts_with_ends_with_test.py @@ -2,8 +2,6 @@ from typing import Any -import pytest - import narwhals.stable.v1 as nw # Don't move this into typechecking block, for coverage @@ -13,10 +11,7 @@ data = {"a": ["fdas", "edfas"]} -def test_ends_with(request: Any, constructor_with_lazy: Any) -> None: - if "pyarrow_table" in str(constructor_with_lazy): - request.applymarker(pytest.mark.xfail) - +def test_ends_with(constructor_with_lazy: Any) -> None: df = nw.from_native(constructor_with_lazy(data)).lazy() result = df.select(nw.col("a").str.ends_with("das")) expected = { @@ -31,10 +26,7 @@ def test_ends_with(request: Any, constructor_with_lazy: Any) -> None: compare_dicts(result, expected) -def test_starts_with(request: Any, constructor_with_lazy: Any) -> None: - if "pyarrow_table" in str(constructor_with_lazy): - request.applymarker(pytest.mark.xfail) - +def test_starts_with(constructor_with_lazy: Any) -> None: df = nw.from_native(constructor_with_lazy(data)).lazy() result = df.select(nw.col("a").str.starts_with("fda")) expected = { diff --git a/tests/expr_and_series/str/tail_test.py b/tests/expr_and_series/str/tail_test.py index b7c187e19..f116e4450 100644 --- a/tests/expr_and_series/str/tail_test.py +++ b/tests/expr_and_series/str/tail_test.py @@ -1,17 +1,12 @@ from typing import Any -import pytest - import narwhals.stable.v1 as nw from tests.utils import compare_dicts data = {"a": ["foo", "bars"]} -def test_str_tail(request: Any, constructor: Any) -> None: - if "pyarrow_table" in str(constructor): - request.applymarker(pytest.mark.xfail) - +def test_str_tail(constructor: Any) -> None: df = nw.from_native(constructor(data), eager_only=True) expected = {"a": ["foo", "ars"]} diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py index 62c438f88..ea248bdef 100644 --- a/tests/expr_and_series/str/to_datetime_test.py +++ b/tests/expr_and_series/str/to_datetime_test.py @@ -1,19 +1,15 @@ from datetime import datetime from typing import Any -import pandas as pd -import polars as pl -import pytest - import narwhals.stable.v1 as nw -df_pandas = pd.DataFrame({"a": ["2020-01-01T12:34:56"]}) -df_polars = pl.DataFrame({"a": ["2020-01-01T12:34:56"]}) +data = {"a": ["2020-01-01T12:34:56"]} -@pytest.mark.parametrize("df_any", [df_pandas, df_polars]) -def test_to_datetime(df_any: Any) -> None: - result = nw.from_native(df_any, eager_only=True).select( - b=nw.col("a").str.to_datetime(format="%Y-%m-%dT%H:%M:%S") - )["b"][0] +def test_to_datetime(constructor: Any) -> None: + result = ( + nw.from_native(constructor(data), eager_only=True) + .select(b=nw.col("a").str.to_datetime(format="%Y-%m-%dT%H:%M:%S")) + .item(row=0, column="b") + ) assert result == datetime(2020, 1, 1, 12, 34, 56)