Merge remote-tracking branch 'upstream/main' into ewm

narwhals-dev · Nov 1, 2024 · 1bf1571 · 1bf1571
2 parents 212b78a + 5c3db5b
commit 1bf1571
Show file tree

Hide file tree

Showing 5 changed files with 133 additions and 11 deletions.
diff --git a/docs/api-reference/dependencies.md b/docs/api-reference/dependencies.md
@@ -15,6 +15,7 @@
         - is_cudf_series
         - is_dask_dataframe
         - is_ibis_table
+        - is_into_dataframe
         - is_into_series
         - is_modin_dataframe
         - is_modin_index

diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
@@ -340,7 +340,9 @@ def convert_str_slice_to_int_slice(
 # Regex for date, time, separator and timezone components
 DATE_RE = r"(?P<date>\d{1,4}[-/.]\d{1,2}[-/.]\d{1,4})"
 SEP_RE = r"(?P<sep>\s|T)"
-TIME_RE = r"(?P<time>\d{2}:\d{2}:\d{2})"  # \s*(?P<period>[AP]M)?)?
+TIME_RE = r"(?P<time>\d{2}:\d{2}(?::\d{2})?)"  # \s*(?P<period>[AP]M)?)?
+HMS_RE = r"^(?P<hms>\d{2}:\d{2}:\d{2})$"
+HM_RE = r"^(?P<hm>\d{2}:\d{2})$"
 TZ_RE = r"(?P<tz>Z|[+-]\d{2}:?\d{2})"  # Matches 'Z', '+02:00', '+0200', '+02', etc.
 FULL_RE = rf"{DATE_RE}{SEP_RE}?{TIME_RE}?{TZ_RE}?$"
 
@@ -354,6 +356,10 @@ def convert_str_slice_to_int_slice(
     (DMY_RE, "%d-%m-%Y"),
     (MDY_RE, "%m-%d-%Y"),
 )
+TIME_FORMATS = (
+    (HMS_RE, "%H:%M:%S"),
+    (HM_RE, "%H:%M"),
+)
 
 
 def parse_datetime_format(arr: pa.StringArray) -> str:
@@ -418,5 +424,8 @@ def _parse_date_format(arr: pa.Array) -> str:
 def _parse_time_format(arr: pa.Array) -> str:
     import pyarrow.compute as pc  # ignore-banned-import
 
-    matches = pc.extract_regex(arr, pattern=TIME_RE)
-    return "%H:%M:%S" if pc.all(matches.is_valid()).as_py() else ""
+    for time_rgx, time_fmt in TIME_FORMATS:
+        matches = pc.extract_regex(arr, pattern=time_rgx)
+        if pc.all(matches.is_valid()).as_py():
+            return time_fmt
+    return ""
diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py
@@ -251,6 +251,44 @@ def is_into_series(native_series: IntoSeries) -> bool:
     )
 
 
+def is_into_dataframe(native_dataframe: Any) -> bool:
+    """
+    Check whether `native_dataframe` can be converted to a Narwhals DataFrame.
+
+    Arguments:
+        native_dataframe: The object to check.
+
+    Returns:
+        `True` if `native_dataframe` can be converted to a Narwhals DataFrame, `False` otherwise.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import polars as pl
+        >>> import numpy as np
+        >>> from narwhals.dependencies import is_into_dataframe
+
+        >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        >>> np_arr = np.array([[1, 4], [2, 5], [3, 6]])
+
+        >>> is_into_dataframe(df_pd)
+        True
+        >>> is_into_dataframe(df_pl)
+        True
+        >>> is_into_dataframe(np_arr)
+        False
+    """
+    from narwhals.dataframe import DataFrame
+
+    return (
+        isinstance(native_dataframe, DataFrame)
+        or hasattr(native_dataframe, "__narwhals_dataframe__")
+        or is_polars_dataframe(native_dataframe)
+        or is_pyarrow_table(native_dataframe)
+        or is_pandas_like_dataframe(native_dataframe)
+    )
+
+
 __all__ = [
     "get_polars",
     "get_pandas",
@@ -275,5 +313,6 @@ def is_into_series(native_series: IntoSeries) -> bool:
     "is_dask_dataframe",
     "is_pandas_like_dataframe",
     "is_pandas_like_series",
+    "is_into_dataframe",
     "is_into_series",
 ]
diff --git a/tests/dependencies/is_into_dataframe_test.py b/tests/dependencies/is_into_dataframe_test.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing import Any
+
+import numpy as np
+import pandas as pd
+import polars as pl
+import pyarrow as pa
+
+import narwhals as nw
+from narwhals.dependencies import is_into_dataframe
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+
+class DictDataFrame:
+    def __init__(self, data: dict[str, list[Any]]) -> None:
+        self._data = data
+
+    def __len__(self) -> int:  # pragma: no cover
+        return len(next(iter(self._data.values())))
+
+    def __narwhals_dataframe__(self) -> Self:  # pragma: no cover
+        return self
+
+
+def test_is_into_dataframe() -> None:
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    assert is_into_dataframe(pa.table(data))
+    assert is_into_dataframe(pl.DataFrame(data))
+    assert is_into_dataframe(pd.DataFrame(data))
+    assert is_into_dataframe(nw.from_native(pd.DataFrame(data)))
+    assert is_into_dataframe(DictDataFrame(data))
+    assert not is_into_dataframe(np.array([[1, 4], [2, 5], [3, 6]]))
+    assert not is_into_dataframe(data)
diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py
@@ -47,11 +47,29 @@ def test_to_datetime_series(constructor_eager: ConstructorEager) -> None:
     assert str(result) == expected
 
 
-def test_to_datetime_infer_fmt(constructor: Constructor) -> None:
+@pytest.mark.parametrize(
+    ("data", "expected", "expected_cudf"),
+    [
+        (
+            {"a": ["2020-01-01T12:34:56"]},
+            "2020-01-01 12:34:56",
+            "2020-01-01T12:34:56.000000000",
+        ),
+        (
+            {"a": ["2020-01-01T12:34"]},
+            "2020-01-01 12:34:00",
+            "2020-01-01T12:34:00.000000000",
+        ),
+    ],
+)
+def test_to_datetime_infer_fmt(
+    constructor: Constructor,
+    data: dict[str, list[str]],
+    expected: str,
+    expected_cudf: str,
+) -> None:
     if "cudf" in str(constructor):  # pragma: no cover
-        expected = "2020-01-01T12:34:56.000000000"
-    else:
-        expected = "2020-01-01 12:34:56"
+        expected = expected_cudf
 
     result = (
         nw.from_native(constructor(data))
@@ -63,11 +81,29 @@ def test_to_datetime_infer_fmt(constructor: Constructor) -> None:
     assert str(result) == expected
 
 
-def test_to_datetime_series_infer_fmt(constructor_eager: ConstructorEager) -> None:
+@pytest.mark.parametrize(
+    ("data", "expected", "expected_cudf"),
+    [
+        (
+            {"a": ["2020-01-01T12:34:56"]},
+            "2020-01-01 12:34:56",
+            "2020-01-01T12:34:56.000000000",
+        ),
+        (
+            {"a": ["2020-01-01T12:34"]},
+            "2020-01-01 12:34:00",
+            "2020-01-01T12:34:00.000000000",
+        ),
+    ],
+)
+def test_to_datetime_series_infer_fmt(
+    constructor_eager: ConstructorEager,
+    data: dict[str, list[str]],
+    expected: str,
+    expected_cudf: str,
+) -> None:
     if "cudf" in str(constructor_eager):  # pragma: no cover
-        expected = "2020-01-01T12:34:56.000000000"
-    else:
-        expected = "2020-01-01 12:34:56"
+        expected = expected_cudf
 
     result = (
         nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_datetime()