Skip to content

Commit

Permalink
Merge branch 'main' into perf/pyarrow-with-columns
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi committed Nov 10, 2024
2 parents f8a591f + 3759660 commit bf5d0fd
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 39 deletions.
4 changes: 2 additions & 2 deletions narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def lit(self, value: Any, dtype: DType | None) -> ArrowExpr:
def _lit_arrow_series(_: ArrowDataFrame) -> ArrowSeries:
arrow_series = ArrowSeries._from_iterable(
data=[value],
name="lit",
name="literal",
backend_version=self._backend_version,
dtypes=self._dtypes,
)
Expand All @@ -165,7 +165,7 @@ def _lit_arrow_series(_: ArrowDataFrame) -> ArrowSeries:
depth=0,
function_name="lit",
root_names=None,
output_names=["lit"],
output_names=[_lit_arrow_series.__name__],
backend_version=self._backend_version,
dtypes=self._dtypes,
)
Expand Down
16 changes: 9 additions & 7 deletions narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,28 +354,28 @@ def convert_str_slice_to_int_slice(


# Regex for date, time, separator and timezone components
DATE_RE = r"(?P<date>\d{1,4}[-/.]\d{1,2}[-/.]\d{1,4})"
DATE_RE = r"(?P<date>\d{1,4}[-/.]\d{1,2}[-/.]\d{1,4}|\d{8})"
SEP_RE = r"(?P<sep>\s|T)"
TIME_RE = r"(?P<time>\d{2}:\d{2}(?::\d{2})?)" # \s*(?P<period>[AP]M)?)?
TIME_RE = r"(?P<time>\d{2}:\d{2}(?::\d{2})?|\d{6}?)" # \s*(?P<period>[AP]M)?)?
HMS_RE = r"^(?P<hms>\d{2}:\d{2}:\d{2})$"
HM_RE = r"^(?P<hm>\d{2}:\d{2})$"
HMS_RE_NO_SEP = r"^(?P<hms_no_sep>\d{6})$"
TZ_RE = r"(?P<tz>Z|[+-]\d{2}:?\d{2})" # Matches 'Z', '+02:00', '+0200', '+02', etc.
FULL_RE = rf"{DATE_RE}{SEP_RE}?{TIME_RE}?{TZ_RE}?$"

# Separate regexes for different date formats
YMD_RE = r"^(?P<year>(?:[12][0-9])?[0-9]{2})(?P<sep1>[-/.])(?P<month>0[1-9]|1[0-2])(?P<sep2>[-/.])(?P<day>0[1-9]|[12][0-9]|3[01])$"
DMY_RE = r"^(?P<day>0[1-9]|[12][0-9]|3[01])(?P<sep1>[-/.])(?P<month>0[1-9]|1[0-2])(?P<sep2>[-/.])(?P<year>(?:[12][0-9])?[0-9]{2})$"
MDY_RE = r"^(?P<month>0[1-9]|1[0-2])(?P<sep1>[-/.])(?P<day>0[1-9]|[12][0-9]|3[01])(?P<sep2>[-/.])(?P<year>(?:[12][0-9])?[0-9]{2})$"
YMD_RE_NO_SEP = r"^(?P<year>(?:[12][0-9])?[0-9]{2})(?P<month>0[1-9]|1[0-2])(?P<day>0[1-9]|[12][0-9]|3[01])$"

DATE_FORMATS = (
(YMD_RE_NO_SEP, "%Y%m%d"),
(YMD_RE, "%Y-%m-%d"),
(DMY_RE, "%d-%m-%Y"),
(MDY_RE, "%m-%d-%Y"),
)
TIME_FORMATS = (
(HMS_RE, "%H:%M:%S"),
(HM_RE, "%H:%M"),
)
TIME_FORMATS = ((HMS_RE, "%H:%M:%S"), (HM_RE, "%H:%M"), (HMS_RE_NO_SEP, "%H%M%S"))


def parse_datetime_format(arr: pa.StringArray) -> str:
Expand Down Expand Up @@ -422,7 +422,9 @@ def _parse_date_format(arr: pa.Array) -> str:

for date_rgx, date_fmt in DATE_FORMATS:
matches = pc.extract_regex(arr, pattern=date_rgx)
if (
if date_fmt == "%Y%m%d" and pc.all(matches.is_valid()).as_py():
return date_fmt
elif (
pc.all(matches.is_valid()).as_py()
and pc.count(pc.unique(sep1 := matches.field("sep1"))).as_py() == 1
and pc.count(pc.unique(sep2 := matches.field("sep2"))).as_py() == 1
Expand Down
6 changes: 3 additions & 3 deletions narwhals/_dask/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,14 @@ def convert_if_dtype(

return DaskExpr(
lambda df: [
df._native_frame.assign(lit=value)
.loc[:, "lit"]
df._native_frame.assign(literal=value)
.loc[:, "literal"]
.pipe(convert_if_dtype, dtype)
],
depth=0,
function_name="lit",
root_names=None,
output_names=["lit"],
output_names=["literal"],
returns_scalar=False,
backend_version=self._backend_version,
dtypes=self._dtypes,
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def lit(self, value: Any, dtype: DType | None) -> PandasLikeExpr:
def _lit_pandas_series(df: PandasLikeDataFrame) -> PandasLikeSeries:
pandas_series = PandasLikeSeries._from_iterable(
data=[value],
name="lit",
name="literal",
index=df._native_frame.index[0:1],
implementation=self._implementation,
backend_version=self._backend_version,
Expand All @@ -154,7 +154,7 @@ def _lit_pandas_series(df: PandasLikeDataFrame) -> PandasLikeSeries:
depth=0,
function_name="lit",
root_names=None,
output_names=["lit"],
output_names=[_lit_pandas_series.__name__],
implementation=self._implementation,
backend_version=self._backend_version,
dtypes=self._dtypes,
Expand Down
24 changes: 12 additions & 12 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5197,31 +5197,31 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
>>> @nw.narwhalify
... def func(df):
... return df.with_columns(nw.lit(3).alias("b"))
... return df.with_columns(nw.lit(3))
We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
>>> func(df_pd)
a b
a literal
0 1 3
1 2 3
>>> func(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b
│ --- ┆ --- │
│ i64 ┆ i32 │
╞═════╪═════╡
│ 1 ┆ 3 │
│ 2 ┆ 3 │
└─────┴─────┘
┌─────┬─────────
│ a ┆ literal
│ --- ┆ ---
│ i64 ┆ i32
╞═════╪═════════
│ 1 ┆ 3
│ 2 ┆ 3
└─────┴─────────
>>> func(df_pa)
pyarrow.Table
a: int64
b: int64
literal: int64
----
a: [[1,2]]
b: [[3,3]]
literal: [[3,3]]
"""
if is_numpy_array(value):
msg = (
Expand Down
24 changes: 12 additions & 12 deletions narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1447,31 +1447,31 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
>>> @nw.narwhalify
... def func(df):
... return df.with_columns(nw.lit(3).alias("b"))
... return df.with_columns(nw.lit(3))
We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
>>> func(df_pd)
a b
a literal
0 1 3
1 2 3
>>> func(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b
│ --- ┆ --- │
│ i64 ┆ i32 │
╞═════╪═════╡
│ 1 ┆ 3 │
│ 2 ┆ 3 │
└─────┴─────┘
┌─────┬─────────
│ a ┆ literal
│ --- ┆ ---
│ i64 ┆ i32
╞═════╪═════════
│ 1 ┆ 3
│ 2 ┆ 3
└─────┴─────────
>>> func(df_pa)
pyarrow.Table
a: int64
b: int64
literal: int64
----
a: [[1,2]]
b: [[3,3]]
literal: [[3,3]]
"""
return _stableify(nw.lit(value, dtype))

Expand Down
17 changes: 16 additions & 1 deletion tests/expr_and_series/str/to_datetime_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,24 @@ def test_to_datetime_series(constructor_eager: ConstructorEager) -> None:
"2020-01-01 12:34:00",
"2020-01-01T12:34:00.000000000",
),
(
{"a": ["20240101123456"]},
"2024-01-01 12:34:56",
"2024-01-01T12:34:56.000000000",
),
],
)
def test_to_datetime_infer_fmt(
request: pytest.FixtureRequest,
constructor: Constructor,
data: dict[str, list[str]],
expected: str,
expected_cudf: str,
) -> None:
if "polars" in str(constructor) and str(data["a"][0]).isdigit():
request.applymarker(pytest.mark.xfail)
if "cudf" in str(constructor): # pragma: no cover
expected = expected_cudf

result = (
nw.from_native(constructor(data))
.lazy()
Expand All @@ -94,14 +101,22 @@ def test_to_datetime_infer_fmt(
"2020-01-01 12:34:00",
"2020-01-01T12:34:00.000000000",
),
(
{"a": ["20240101123456"]},
"2024-01-01 12:34:56",
"2024-01-01T12:34:56.000000000",
),
],
)
def test_to_datetime_series_infer_fmt(
request: pytest.FixtureRequest,
constructor_eager: ConstructorEager,
data: dict[str, list[str]],
expected: str,
expected_cudf: str,
) -> None:
if "polars" in str(constructor_eager) and str(data["a"][0]).isdigit():
request.applymarker(pytest.mark.xfail)
if "cudf" in str(constructor_eager): # pragma: no cover
expected = expected_cudf

Expand Down
12 changes: 12 additions & 0 deletions tests/frame/lit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,15 @@ def test_lit_error(constructor: Constructor) -> None:
NotImplementedError, match="Nested datatypes are not supported yet."
):
_ = df.with_columns(nw.lit([1, 2]).alias("lit"))


def test_lit_out_name(constructor: Constructor) -> None:
data = {"a": [1, 3, 2]}
df_raw = constructor(data)
df = nw.from_native(df_raw).lazy()
result = df.with_columns(nw.lit(2))
expected = {
"a": [1, 3, 2],
"literal": [2, 2, 2],
}
assert_equal_data(result, expected)

0 comments on commit bf5d0fd

Please sign in to comment.