diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 4a23b40910..b7a84a5921 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -5,8 +5,8 @@ updates:
   - package-ecosystem: "github-actions"
     directory: "/"
     schedule:
-      # Check for updates to GitHub Actions every week
-      interval: "weekly"
+      # Check for updates to GitHub Actions every month
+      interval: "monthly"
     commit-message:
       prefix: "skip changelog" # So this PR will not be added to release-drafter
       include: "scope" # List of the updated dependencies in the commit will be added
\ No newline at end of file
diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml
index 4f9cbe06d6..bb69ccd941 100644
--- a/.github/workflows/downstream_tests.yml
+++ b/.github/workflows/downstream_tests.yml
@@ -55,7 +55,7 @@ jobs:
       matrix:
         python-version: ["3.12"]
         os: [ubuntu-latest]
-        dependencies: ["core", "core,optional"]
+        dependencies: ["core,optional"]
 
     runs-on: ${{ matrix.os }}
     steps:
@@ -73,19 +73,27 @@ jobs:
         run: |
             git clone https://github.com/marimo-team/marimo.git --depth=1
             cd marimo
+            uv venv -p 3.12
             git log
       - name: install-basics
         run: uv pip install --upgrade tox virtualenv setuptools hatch --system
       - name: install-marimo-dev
         run: |
             cd marimo
-            uv pip install -e ".[dev]" --system
+            . .venv/bin/activate
+            uv pip install -e ".[dev]"
+            which python
       - name: install-narwhals-dev
         run: |
-            uv pip uninstall narwhals --system
-            uv pip install -e . --system
+            cd marimo
+            . .venv/bin/activate
+            uv pip uninstall narwhals
+            uv pip install -e ./..
       - name: show-deps
-        run: uv pip freeze
+        run: |
+            cd marimo
+            . .venv/bin/activate
+            uv pip freeze
       - name: Create assets directory, copy over index.html
         continue-on-error: true
         run: |
@@ -96,12 +104,13 @@ jobs:
         if: ${{ matrix.dependencies == 'core,optional' }}
         run: |
             cd marimo
-            hatch run +py=${{ matrix.python-version }} test-optional:test-narwhals
+            . .venv/bin/activate
+            # make sure that we use the .venv when running tests, so that
+            # the local narwhals install is picked up
+            sed -i '/^\[tool.hatch.envs.default\]/a path = ".venv"' pyproject.toml
+            hatch run python -c "import narwhals; print(narwhals.__file__)"
+            hatch run test-optional:test-narwhals
         timeout-minutes: 15
-      - name: Run typechecks
-        run: |
-            cd marimo
-            hatch run typecheck:check
 
   scikit-lego:
     strategy:
@@ -181,3 +190,43 @@ jobs:
         run: |
             cd py-shiny
             make narwhals-test-integration
+  
+  tubular:
+    strategy:
+      matrix:
+        python-version: ["3.12"]
+        os: [ubuntu-latest]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          enable-cache: "true"
+          cache-suffix: ${{ matrix.python-version }}
+          cache-dependency-glob: "**requirements*.txt"
+      - name: clone-tubular
+        run: |
+            git clone https://github.com/lvgig/tubular --depth=1
+            cd tubular
+            git log
+      - name: install-basics
+        run: uv pip install --upgrade tox virtualenv setuptools pytest-env --system
+      - name: install-tubular-dev
+        run: |
+            cd tubular
+            uv pip install -e .[dev] --system
+      - name: install-narwhals-dev
+        run: |
+            uv pip uninstall narwhals --system
+            uv pip install -e . --system
+      - name: show-deps
+        run: uv pip freeze
+      - name: Run pytest
+        run: |
+            cd tubular
+            pytest tests --config-file=pyproject.toml
diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml
index 3f02f965f6..fd6a7cfb2a 100644
--- a/.github/workflows/extremes.yml
+++ b/.github/workflows/extremes.yml
@@ -90,7 +90,7 @@ jobs:
   nightlies:
     strategy:
       matrix:
-        python-version: ["3.12"]
+        python-version: ["3.13"]
         os: [ubuntu-latest]
     if: github.event.pull_request.head.repo.full_name == github.repository
     runs-on: ${{ matrix.os }}
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index ee88911eaf..7847939b9b 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -34,7 +34,7 @@ jobs:
   pytest-windows:
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.12"]
         os: [windows-latest]
 
     runs-on: ${{ matrix.os }}
@@ -61,7 +61,7 @@ jobs:
   pytest-coverage:
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.11", "3.13"]
         os: [ubuntu-latest]
 
     runs-on: ${{ matrix.os }}
diff --git a/.gitignore b/.gitignore
index 8b9adeb8f9..774f096378 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,7 @@ coverage.xml
 # Documentation
 site/
 todo.md
+docs/this.md
 docs/api-completeness/*.md
 !docs/api-completeness/index.md
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4d416e2371..141e9d3c48 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,9 @@
+ci:
+  autoupdate_schedule: monthly
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
-  rev: 'v0.6.9'
+  rev: 'v0.7.1'
   hooks:
     # Run the formatter.
     - id: ruff-format
@@ -9,7 +11,7 @@ repos:
     - id: ruff
       args: [--fix]
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: 'v1.11.2'
+  rev: 'v1.13.0'
   hooks:
     - id: mypy
       additional_dependencies: ['polars==1.4.1', 'pytest==8.3.2']
@@ -40,7 +42,7 @@ repos:
   hooks:
     - id: nbstripout
 - repo: https://github.com/adamchainz/blacken-docs
-  rev: "1.19.0"  # replace with latest tag on GitHub
+  rev: "1.19.1"  # replace with latest tag on GitHub
   hooks:
   -   id: blacken-docs
       args: [--skip-errors]
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c7d7c44a09..b8f333f1e1 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -109,6 +109,10 @@ nox
 
 Notice that nox will also require to have all the python versions that are defined in the `noxfile.py` installed in your system.
 
+#### Testing cuDF
+
+We can't currently test in CI against cuDF, but you can test it manually in Kaggle using GPUs. Please follow this [Kaggle notebook](https://www.kaggle.com/code/marcogorelli/testing-cudf-in-narwhals) to run the tests.
+
 ### 7. Building docs
 
 To build the docs, run `mkdocs serve`, and then open the link provided in a browser.
diff --git a/README.md b/README.md
index 44fc31e565..b3acb17ba4 100644
--- a/README.md
+++ b/README.md
@@ -43,10 +43,13 @@ Join the party!
 
 - [Altair](https://github.com/vega/altair/)
 - [Hamilton](https://github.com/DAGWorks-Inc/hamilton/tree/main/examples/narwhals)
+- [marimo](https://github.com/marimo-team/marimo)
+- [pymarginaleffects](https://github.com/vincentarelbundock/pymarginaleffects)
 - [scikit-lego](https://github.com/koaning/scikit-lego)
 - [scikit-playtime](https://github.com/koaning/scikit-playtime)
 - [timebasedcv](https://github.com/FBruzzesi/timebasedcv)
-- [marimo](https://github.com/marimo-team/marimo)
+- [tubular](https://github.com/lvgig/tubular)
+- [wimsey](https://github.com/benrutter/wimsey)
 
 Feel free to add your project to the list if it's missing, and/or
 [chat with us on Discord](https://discord.gg/V3PqtB4VA4) if you'd like any support.
diff --git a/docs/api-reference/dependencies.md b/docs/api-reference/dependencies.md
index 959e8ee0c1..f8995e36a8 100644
--- a/docs/api-reference/dependencies.md
+++ b/docs/api-reference/dependencies.md
@@ -11,14 +11,20 @@
         - get_polars
         - get_pyarrow
         - is_cudf_dataframe
+        - is_cudf_index
         - is_cudf_series
         - is_dask_dataframe
         - is_ibis_table
+        - is_into_dataframe
+        - is_into_series
         - is_modin_dataframe
+        - is_modin_index
         - is_modin_series
         - is_numpy_array
         - is_pandas_dataframe
+        - is_pandas_index
         - is_pandas_like_dataframe
+        - is_pandas_like_index
         - is_pandas_like_series
         - is_pandas_series
         - is_polars_dataframe
diff --git a/docs/api-reference/expr_dt.md b/docs/api-reference/expr_dt.md
index 5c9ab41f3c..604ac4abfd 100644
--- a/docs/api-reference/expr_dt.md
+++ b/docs/api-reference/expr_dt.md
@@ -6,22 +6,23 @@
       members:
         - convert_time_zone
         - date
-        - year
-        - month
         - day
-        - ordinal_day
         - hour
-        - minute
-        - second
-        - millisecond
         - microsecond
+        - millisecond
+        - minute
+        - month
         - nanosecond
+        - ordinal_day
         - replace_time_zone
-        - total_minutes
-        - total_seconds
-        - total_milliseconds
+        - second
+        - timestamp
         - total_microseconds
+        - total_milliseconds
+        - total_minutes
         - total_nanoseconds
+        - total_seconds
         - to_string
+        - year
       show_source: false
       show_bases: false
diff --git a/docs/api-reference/narwhals.md b/docs/api-reference/narwhals.md
index c4b04a2f41..2b5be6e8ca 100644
--- a/docs/api-reference/narwhals.md
+++ b/docs/api-reference/narwhals.md
@@ -15,6 +15,7 @@ Here are the top-level functions available in Narwhals.
         - from_dict
         - from_native
         - from_arrow
+        - generate_temporary_column_name
         - get_level
         - get_native_namespace
         - is_ordered_categorical
diff --git a/docs/api-reference/series_dt.md b/docs/api-reference/series_dt.md
index c925924116..23d4817cb1 100644
--- a/docs/api-reference/series_dt.md
+++ b/docs/api-reference/series_dt.md
@@ -6,22 +6,23 @@
       members:
         - convert_time_zone
         - date
-        - year
-        - month
         - day
-        - ordinal_day
         - hour
-        - minute
-        - second
-        - millisecond
         - microsecond
+        - millisecond
+        - minute
+        - month
         - nanosecond
+        - ordinal_day
         - replace_time_zone
-        - total_minutes
-        - total_seconds
-        - total_milliseconds
+        - second
+        - timestamp
         - total_microseconds
+        - total_milliseconds
+        - total_minutes
         - total_nanoseconds
+        - total_seconds
         - to_string
+        - year
       show_source: false
       show_bases: false
diff --git a/docs/basics/dataframe_conversion.md b/docs/basics/dataframe_conversion.md
new file mode 100644
index 0000000000..690f5d0936
--- /dev/null
+++ b/docs/basics/dataframe_conversion.md
@@ -0,0 +1,76 @@
+# Conversion between libraries
+
+Some library maintainers must apply complex dataframe operations, using methods and functions that may not (yet) be implemented in Narwhals. In such cases, Narwhals can still be highly beneficial, by allowing easy dataframe conversion.
+
+## Dataframe X in, pandas out
+
+Imagine that you maintain a library with a function that operates on pandas dataframes to produce automated reports. You want to allow users to supply a dataframe in any format to that function (pandas, Polars, DuckDB, cuDF, Modin, etc.) without adding all those dependencies to your own project and without special-casing each input library's variation of `to_pandas` / `toPandas` / `to_pandas_df` / `df` ...
+
+One solution is to use Narwhals as a thin Dataframe ingestion layer, to convert user-supplied dataframe to the format that your library uses internally. Since Narwhals is zero-dependency, this is a much more lightweight solution than including all the dataframe libraries as dependencies,
+and easier to write than special casing each input library's `to_pandas` method (if it even exists!).
+
+To illustrate, we create dataframes in various formats:
+
+```python exec="1" source="above" session="conversion"
+import narwhals as nw
+from narwhals.typing import IntoDataFrame
+
+import duckdb
+import polars as pl
+import pandas as pd
+
+df_polars = pl.DataFrame(
+    {
+        "A": [1, 2, 3, 4, 5],
+        "fruits": ["banana", "banana", "apple", "apple", "banana"],
+        "B": [5, 4, 3, 2, 1],
+        "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
+    }
+)
+df_pandas = df_polars.to_pandas()
+df_duckdb = duckdb.sql("SELECT * FROM df_polars")
+```
+
+Now, we define a function that can ingest any dataframe type supported by Narwhals, and convert it to a pandas DataFrame for internal use:
+
+```python exec="1" source="above" session="conversion" result="python"
+def df_to_pandas(df: IntoDataFrame) -> pd.DataFrame:
+    return nw.from_native(df).to_pandas()
+
+
+print(df_to_pandas(df_polars))
+```
+
+## Dataframe X in, Polars out
+
+### Via PyCapsule Interface
+
+Similarly, if your library uses Polars internally, you can convert any user-supplied dataframe to Polars format using Narwhals.
+
+```python exec="1" source="above" session="conversion" result="python"
+def df_to_polars(df: IntoDataFrame) -> pl.DataFrame:
+    return nw.from_arrow(nw.from_native(df), native_namespace=pl).to_native()
+
+
+print(df_to_polars(df_duckdb))  # You can only execute this line of code once.
+```
+
+It works to pass Polars to `native_namespace` here because Polars supports the [PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) for import.
+
+Note that the PyCapsule Interface makes no guarantee that you can call it repeatedly, so the approach above only works if you
+only expect to perform the conversion a single time on each input object.
+
+### Via PyArrow
+
+If you need to ingest the same dataframe multiple times, then you may want to go via PyArrow instead.
+This may be less efficient than the PyCapsule approach above (and always requires PyArrow!), but is more forgiving:
+
+```python exec="1" source="above" session="conversion" result="python"
+def df_to_polars(df: IntoDataFrame) -> pl.DataFrame:
+    return pl.DataFrame(nw.from_native(df).to_arrow())
+
+
+df_duckdb = duckdb.sql("SELECT * FROM df_polars")
+print(df_to_polars(df_duckdb))  # We can execute this...
+print(df_to_polars(df_duckdb))  # ...as many times as we like!
+```
diff --git a/docs/extending.md b/docs/extending.md
index 22d85f7017..865a93b08c 100644
--- a/docs/extending.md
+++ b/docs/extending.md
@@ -37,6 +37,7 @@ def func(df: FrameT) -> FrameT:
         b_std=nw.col("b").std(),
     )
 ```
+
 will work for any of pandas, Polars, cuDF, Modin, and PyArrow.
 
 However, sometimes you don't need to do complex operations on dataframes - all you need
@@ -57,9 +58,22 @@ def func(df: Any) -> Schema:
     df = nw.from_native(df, eager_or_interchange_only=True)
     return df.schema
 ```
+
 is also supported, meaning that, in addition to the libraries mentioned above, you can
 also pass Ibis, DuckDB, Vaex, and any library which implements the protocol.
 
+#### Interchange-only support
+
+While libraries for which we have full support can benefit from the whole Narwhals API,
+libraries which have interchange only support can access the following methods after 
+converting to Narwhals DataFrame:
+
+- `.schema`, hence column names via `.schema.names()` and column types via `.schema.dtypes()`
+- `.columns`
+- `.to_pandas()` and `.to_arrow()`, for converting to Pandas and Arrow, respectively.
+- `.select(names)` (Ibis and DuckDB), where `names` is a list of (string) column names. This is useful for
+  selecting columns before converting to another library.
+
 ### Extending Narwhals
 
 If you want your own library to be recognised too, you're welcome open a PR (with tests)!.
diff --git a/docs/index.md b/docs/index.md
index f18d9af850..e9fe021708 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -6,7 +6,7 @@ Extremely lightweight and extensible compatibility layer between dataframe libra
 
 - **Full API support**: cuDF, Modin, pandas, Polars, PyArrow
 - **Lazy-only support**: Dask
-- **Interchange-level support**: Ibis, Vaex, anything else which implements the DataFrame Interchange Protocol
+- **Interchange-level support**: Ibis, DuckDB, Vaex, anything else which implements the DataFrame Interchange Protocol
 
 Seamlessly support all, without depending on any!
 
diff --git a/docs/installation.md b/docs/installation.md
index 1695a7eec6..9f57a05df7 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -29,7 +29,7 @@ To verify the installation, start the Python REPL and execute:
 ```python
 >>> import narwhals
 >>> narwhals.__version__
-'1.9.4'
+'1.12.1'
 ```
 If you see the version number, then the installation was successful!
 
diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt
index 929f35790a..beec6070b9 100644
--- a/docs/requirements-docs.txt
+++ b/docs/requirements-docs.txt
@@ -1,4 +1,5 @@
 jinja2
+duckdb
 markdown-exec[ansi]
 mkdocs
 mkdocs-autorefs
diff --git a/mkdocs.yml b/mkdocs.yml
index 3793d898af..46cb5335fe 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -10,6 +10,7 @@ nav:
     - basics/dataframe.md
     - basics/series.md
     - basics/complete_example.md
+    - basics/dataframe_conversion.md
   - Pandas-like concepts:
     - other/pandas_index.md
     - other/user_warning.md
@@ -45,6 +46,7 @@ nav:
     - api-reference/dtypes.md
     - api-reference/selectors.md
     - api-reference/typing.md
+  - This: this.md
 theme:
   name: material
   font: false
@@ -76,9 +78,7 @@ theme:
       toggle:
         icon: material/brightness-4
         name: Switch to system preference
-extra_css:
-  - https://unpkg.com/katex@0/dist/katex.min.css
-  - css/mkdocstrings.css
+
 
 plugins:
 - search
@@ -89,9 +89,12 @@ plugins:
         - https://installer.readthedocs.io/en/stable/objects.inv
         rendering:
           show_signature_annotations: true
+        options:
+          members_order: alphabetical
 
 hooks:
 - utils/generate_backend_completeness.py
+- utils/generate_zen_content.py
 
 
 markdown_extensions:
diff --git a/narwhals/__init__.py b/narwhals/__init__.py
index 8dd76d0813..2214d1cf7e 100644
--- a/narwhals/__init__.py
+++ b/narwhals/__init__.py
@@ -59,6 +59,7 @@
 from narwhals.translate import narwhalify
 from narwhals.translate import to_native
 from narwhals.translate import to_py_scalar
+from narwhals.utils import generate_temporary_column_name
 from narwhals.utils import is_ordered_categorical
 from narwhals.utils import maybe_align_index
 from narwhals.utils import maybe_convert_dtypes
@@ -66,7 +67,7 @@
 from narwhals.utils import maybe_reset_index
 from narwhals.utils import maybe_set_index
 
-__version__ = "1.9.4"
+__version__ = "1.12.1"
 
 __all__ = [
     "dependencies",
@@ -74,6 +75,7 @@
     "concat",
     "from_dict",
     "from_arrow",
+    "generate_temporary_column_name",
     "get_level",
     "new_series",
     "to_native",
diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py
index 6b87f1d8d6..ac845853a7 100644
--- a/narwhals/_arrow/dataframe.py
+++ b/narwhals/_arrow/dataframe.py
@@ -17,7 +17,7 @@
 from narwhals.dependencies import is_numpy_array
 from narwhals.utils import Implementation
 from narwhals.utils import flatten
-from narwhals.utils import generate_unique_token
+from narwhals.utils import generate_temporary_column_name
 from narwhals.utils import is_sequence_but_not_str
 from narwhals.utils import parse_columns_to_drop
 
@@ -172,7 +172,7 @@ def __getitem__(
         ),
     ) -> ArrowSeries | ArrowDataFrame:
         if isinstance(item, tuple):
-            item = tuple(list(i) if is_sequence_but_not_str(i) else i for i in item)
+            item = tuple(list(i) if is_sequence_but_not_str(i) else i for i in item)  # type: ignore[assignment]
 
         if isinstance(item, str):
             from narwhals._arrow.series import ArrowSeries
@@ -335,10 +335,10 @@ def with_columns(
         df = self._native_frame.__class__.from_arrays(to_concat, names=output_names)
         return self._from_native_frame(df)
 
-    def group_by(self, *keys: str) -> ArrowGroupBy:
+    def group_by(self, *keys: str, drop_null_keys: bool) -> ArrowGroupBy:
         from narwhals._arrow.group_by import ArrowGroupBy
 
-        return ArrowGroupBy(self, list(keys))
+        return ArrowGroupBy(self, list(keys), drop_null_keys=drop_null_keys)
 
     def join(
         self,
@@ -358,7 +358,7 @@ def join(
 
         if how == "cross":
             plx = self.__narwhals_namespace__()
-            key_token = generate_unique_token(
+            key_token = generate_temporary_column_name(
                 n_bytes=8, columns=[*self.columns, *other.columns]
             )
 
@@ -579,7 +579,7 @@ def is_duplicated(self: Self) -> ArrowSeries:
         df = self._native_frame
 
         columns = self.columns
-        col_token = generate_unique_token(n_bytes=8, columns=columns)
+        col_token = generate_temporary_column_name(n_bytes=8, columns=columns)
         row_count = (
             df.append_column(col_token, pa.array(np.arange(len(self))))
             .group_by(columns)
@@ -638,7 +638,7 @@ def unique(
             agg_func_map = {"any": "min", "first": "min", "last": "max"}
 
             agg_func = agg_func_map[keep]
-            col_token = generate_unique_token(n_bytes=8, columns=self.columns)
+            col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns)
             keep_idx = (
                 df.append_column(col_token, pa.array(np.arange(len(self))))
                 .group_by(subset)
diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py
index 55c529d304..35e936d72f 100644
--- a/narwhals/_arrow/expr.py
+++ b/narwhals/_arrow/expr.py
@@ -353,7 +353,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
                     "`nw.col('a', 'b')`\n"
                 )
                 raise ValueError(msg)
-            tmp = df.group_by(*keys).agg(self)
+            tmp = df.group_by(*keys, drop_null_keys=False).agg(self)
             tmp = df.select(*keys).join(
                 tmp, how="left", left_on=keys, right_on=keys, suffix="_right"
             )
@@ -420,6 +420,11 @@ def convert_time_zone(self: Self, time_zone: str) -> ArrowExpr:
             self._expr, "dt", "convert_time_zone", time_zone
         )
 
+    def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowExpr:
+        return reuse_series_namespace_implementation(
+            self._expr, "dt", "timestamp", time_unit
+        )
+
     def date(self: Self) -> ArrowExpr:
         return reuse_series_namespace_implementation(self._expr, "dt", "date")
 
diff --git a/narwhals/_arrow/group_by.py b/narwhals/_arrow/group_by.py
index 6c7b204853..991a96a51b 100644
--- a/narwhals/_arrow/group_by.py
+++ b/narwhals/_arrow/group_by.py
@@ -37,10 +37,15 @@ def get_function_name_option(function_name: str) -> Any | None:
 
 
 class ArrowGroupBy:
-    def __init__(self, df: ArrowDataFrame, keys: list[str]) -> None:
+    def __init__(
+        self, df: ArrowDataFrame, keys: list[str], *, drop_null_keys: bool
+    ) -> None:
         import pyarrow as pa  # ignore-banned-import()
 
-        self._df = df
+        if drop_null_keys:
+            self._df = df.drop_nulls(keys)
+        else:
+            self._df = df
         self._keys = list(keys)
         self._grouped = pa.TableGroupBy(self._df._native_frame, list(self._keys))
 
@@ -74,11 +79,7 @@ def agg(
         )
 
     def __iter__(self) -> Iterator[tuple[Any, ArrowDataFrame]]:
-        key_values = (
-            self._df.select(*self._keys)
-            .unique(subset=self._keys, keep="first")
-            .iter_rows()
-        )
+        key_values = self._df.select(*self._keys).unique(subset=self._keys, keep="first")
         nw_namespace = self._df.__narwhals_namespace__()
         yield from (
             (
@@ -87,7 +88,7 @@ def __iter__(self) -> Iterator[tuple[Any, ArrowDataFrame]]:
                     *[nw_namespace.col(k) == v for k, v in zip(self._keys, key_value)]
                 ),
             )
-            for key_value in key_values
+            for key_value in key_values.iter_rows()
         )
 
 
diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py
index 2eb738291d..70009df43c 100644
--- a/narwhals/_arrow/series.py
+++ b/narwhals/_arrow/series.py
@@ -12,9 +12,10 @@
 from narwhals._arrow.utils import floordiv_compat
 from narwhals._arrow.utils import narwhals_to_native_dtype
 from narwhals._arrow.utils import native_to_narwhals_dtype
+from narwhals._arrow.utils import parse_datetime_format
 from narwhals._arrow.utils import validate_column_comparand
 from narwhals.utils import Implementation
-from narwhals.utils import generate_unique_token
+from narwhals.utils import generate_temporary_column_name
 
 if TYPE_CHECKING:
     from types import ModuleType
@@ -604,7 +605,7 @@ def is_first_distinct(self: Self) -> Self:
         import pyarrow.compute as pc  # ignore-banned-import()
 
         row_number = pa.array(np.arange(len(self)))
-        col_token = generate_unique_token(n_bytes=8, columns=[self.name])
+        col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
         first_distinct_index = (
             pa.Table.from_arrays([self._native_series], names=[self.name])
             .append_column(col_token, row_number)
@@ -621,7 +622,7 @@ def is_last_distinct(self: Self) -> Self:
         import pyarrow.compute as pc  # ignore-banned-import()
 
         row_number = pa.array(np.arange(len(self)))
-        col_token = generate_unique_token(n_bytes=8, columns=[self.name])
+        col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
         last_distinct_index = (
             pa.Table.from_arrays([self._native_series], names=[self.name])
             .append_column(col_token, row_number)
@@ -715,7 +716,7 @@ def to_arrow(self: Self) -> pa.Array:
 
     def mode(self: Self) -> ArrowSeries:
         plx = self.__narwhals_namespace__()
-        col_token = generate_unique_token(n_bytes=8, columns=[self.name])
+        col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name])
         return self.value_counts(name=col_token, normalize=False).filter(
             plx.col(col_token) == plx.col(col_token).max()
         )[self.name]
@@ -780,6 +781,59 @@ def convert_time_zone(self: Self, time_zone: str) -> ArrowSeries:
 
         return self._arrow_series._from_native_series(result)
 
+    def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowSeries:
+        import pyarrow as pa  # ignore-banned-import
+        import pyarrow.compute as pc  # ignore-banned-import
+
+        s = self._arrow_series._native_series
+        dtype = self._arrow_series.dtype
+        if dtype == self._arrow_series._dtypes.Datetime:
+            unit = dtype.time_unit  # type: ignore[attr-defined]
+            s_cast = s.cast(pa.int64())
+            if unit == "ns":
+                if time_unit == "ns":
+                    result = s_cast
+                elif time_unit == "us":
+                    result = floordiv_compat(s_cast, 1_000)
+                else:
+                    result = floordiv_compat(s_cast, 1_000_000)
+            elif unit == "us":
+                if time_unit == "ns":
+                    result = pc.multiply(s_cast, 1_000)
+                elif time_unit == "us":
+                    result = s_cast
+                else:
+                    result = floordiv_compat(s_cast, 1_000)
+            elif unit == "ms":
+                if time_unit == "ns":
+                    result = pc.multiply(s_cast, 1_000_000)
+                elif time_unit == "us":
+                    result = pc.multiply(s_cast, 1_000)
+                else:
+                    result = s_cast
+            elif unit == "s":
+                if time_unit == "ns":
+                    result = pc.multiply(s_cast, 1_000_000_000)
+                elif time_unit == "us":
+                    result = pc.multiply(s_cast, 1_000_000)
+                else:
+                    result = pc.multiply(s_cast, 1_000)
+            else:  # pragma: no cover
+                msg = f"unexpected time unit {unit}, please report an issue at https://github.com/narwhals-dev/narwhals"
+                raise AssertionError(msg)
+        elif dtype == self._arrow_series._dtypes.Date:
+            time_s = pc.multiply(s.cast(pa.int32()), 86400)
+            if time_unit == "ns":
+                result = pc.multiply(time_s, 1_000_000_000)
+            elif time_unit == "us":
+                result = pc.multiply(time_s, 1_000_000)
+            else:
+                result = pc.multiply(time_s, 1_000)
+        else:
+            msg = "Input should be either of Date or Datetime type"
+            raise TypeError(msg)
+        return self._arrow_series._from_native_series(result)
+
     def date(self: Self) -> ArrowSeries:
         import pyarrow as pa  # ignore-banned-import()
 
@@ -1062,8 +1116,7 @@ def to_datetime(self: Self, format: str | None) -> ArrowSeries:  # noqa: A002
         import pyarrow.compute as pc  # ignore-banned-import()
 
         if format is None:
-            msg = "`format` is required for pyarrow backend."
-            raise ValueError(msg)
+            format = parse_datetime_format(self._arrow_series._native_series)
 
         return self._arrow_series._from_native_series(
             pc.strptime(self._arrow_series._native_series, format=format, unit="us")
diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py
index 7f6fa6558d..6f74294d5a 100644
--- a/narwhals/_arrow/utils.py
+++ b/narwhals/_arrow/utils.py
@@ -335,3 +335,97 @@ def convert_str_slice_to_int_slice(
     stop = columns.index(str_slice.stop) + 1 if str_slice.stop is not None else None
     step = str_slice.step
     return (start, stop, step)
+
+
+# Regex for date, time, separator and timezone components
+DATE_RE = r"(?P<date>\d{1,4}[-/.]\d{1,2}[-/.]\d{1,4})"
+SEP_RE = r"(?P<sep>\s|T)"
+TIME_RE = r"(?P<time>\d{2}:\d{2}(?::\d{2})?)"  # \s*(?P<period>[AP]M)?)?
+HMS_RE = r"^(?P<hms>\d{2}:\d{2}:\d{2})$"
+HM_RE = r"^(?P<hm>\d{2}:\d{2})$"
+TZ_RE = r"(?P<tz>Z|[+-]\d{2}:?\d{2})"  # Matches 'Z', '+02:00', '+0200', '+02', etc.
+FULL_RE = rf"{DATE_RE}{SEP_RE}?{TIME_RE}?{TZ_RE}?$"
+
+# Separate regexes for different date formats
+YMD_RE = r"^(?P<year>(?:[12][0-9])?[0-9]{2})(?P<sep1>[-/.])(?P<month>0[1-9]|1[0-2])(?P<sep2>[-/.])(?P<day>0[1-9]|[12][0-9]|3[01])$"
+DMY_RE = r"^(?P<day>0[1-9]|[12][0-9]|3[01])(?P<sep1>[-/.])(?P<month>0[1-9]|1[0-2])(?P<sep2>[-/.])(?P<year>(?:[12][0-9])?[0-9]{2})$"
+MDY_RE = r"^(?P<month>0[1-9]|1[0-2])(?P<sep1>[-/.])(?P<day>0[1-9]|[12][0-9]|3[01])(?P<sep2>[-/.])(?P<year>(?:[12][0-9])?[0-9]{2})$"
+
+DATE_FORMATS = (
+    (YMD_RE, "%Y-%m-%d"),
+    (DMY_RE, "%d-%m-%Y"),
+    (MDY_RE, "%m-%d-%Y"),
+)
+TIME_FORMATS = (
+    (HMS_RE, "%H:%M:%S"),
+    (HM_RE, "%H:%M"),
+)
+
+
+def parse_datetime_format(arr: pa.StringArray) -> str:
+    """Try to infer datetime format from StringArray."""
+    import pyarrow as pa  # ignore-banned-import
+    import pyarrow.compute as pc  # ignore-banned-import
+
+    matches = pa.concat_arrays(  # converts from ChunkedArray to StructArray
+        pc.extract_regex(pc.drop_null(arr).slice(0, 10), pattern=FULL_RE).chunks
+    )
+
+    if not pc.all(matches.is_valid()).as_py():
+        msg = (
+            "Unable to infer datetime format, provided format is not supported. "
+            "Please report a bug to https://github.com/narwhals-dev/narwhals/issues"
+        )
+        raise NotImplementedError(msg)
+
+    dates = matches.field("date")
+    separators = matches.field("sep")
+    times = matches.field("time")
+    tz = matches.field("tz")
+
+    # separators and time zones must be unique
+    if pc.count(pc.unique(separators)).as_py() > 1:
+        msg = "Found multiple separator values while inferring datetime format."
+        raise ValueError(msg)
+
+    if pc.count(pc.unique(tz)).as_py() > 1:
+        msg = "Found multiple timezone values while inferring datetime format."
+        raise ValueError(msg)
+
+    date_value = _parse_date_format(dates)
+    time_value = _parse_time_format(times)
+
+    sep_value = separators[0].as_py()
+    tz_value = "%z" if tz[0].as_py() else ""
+
+    return f"{date_value}{sep_value}{time_value}{tz_value}"
+
+
+def _parse_date_format(arr: pa.Array) -> str:
+    import pyarrow.compute as pc  # ignore-banned-import
+
+    for date_rgx, date_fmt in DATE_FORMATS:
+        matches = pc.extract_regex(arr, pattern=date_rgx)
+        if (
+            pc.all(matches.is_valid()).as_py()
+            and pc.count(pc.unique(sep1 := matches.field("sep1"))).as_py() == 1
+            and pc.count(pc.unique(sep2 := matches.field("sep2"))).as_py() == 1
+            and (date_sep_value := sep1[0].as_py()) == sep2[0].as_py()
+        ):
+            return date_fmt.replace("-", date_sep_value)
+
+    msg = (
+        "Unable to infer datetime format. "
+        "Please report a bug to https://github.com/narwhals-dev/narwhals/issues"
+    )
+    raise ValueError(msg)
+
+
+def _parse_time_format(arr: pa.Array) -> str:
+    import pyarrow.compute as pc  # ignore-banned-import
+
+    for time_rgx, time_fmt in TIME_FORMATS:
+        matches = pc.extract_regex(arr, pattern=time_rgx)
+        if pc.all(matches.is_valid()).as_py():
+            return time_fmt
+    return ""
diff --git a/narwhals/_dask/dataframe.py b/narwhals/_dask/dataframe.py
index 3f632ec3ea..b581af009e 100644
--- a/narwhals/_dask/dataframe.py
+++ b/narwhals/_dask/dataframe.py
@@ -11,7 +11,7 @@
 from narwhals._pandas_like.utils import native_to_narwhals_dtype
 from narwhals.utils import Implementation
 from narwhals.utils import flatten
-from narwhals.utils import generate_unique_token
+from narwhals.utils import generate_temporary_column_name
 from narwhals.utils import parse_columns_to_drop
 from narwhals.utils import parse_version
 
@@ -174,7 +174,9 @@ def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
     @property
     def schema(self) -> dict[str, DType]:
         return {
-            col: native_to_narwhals_dtype(self._native_frame.loc[:, col], self._dtypes)
+            col: native_to_narwhals_dtype(
+                self._native_frame.loc[:, col], self._dtypes, self._implementation
+            )
             for col in self._native_frame.columns
         }
 
@@ -217,7 +219,7 @@ def unique(
         native_frame = self._native_frame
         if keep == "none":
             subset = subset or self.columns
-            token = generate_unique_token(n_bytes=8, columns=subset)
+            token = generate_temporary_column_name(n_bytes=8, columns=subset)
             ser = native_frame.groupby(subset).size().rename(token)
             ser = ser.loc[ser == 1]
             unique = ser.reset_index().drop(columns=token)
@@ -259,7 +261,7 @@ def join(
         if isinstance(right_on, str):
             right_on = [right_on]
         if how == "cross":
-            key_token = generate_unique_token(
+            key_token = generate_temporary_column_name(
                 n_bytes=8, columns=[*self.columns, *other.columns]
             )
 
@@ -276,7 +278,7 @@ def join(
             )
 
         if how == "anti":
-            indicator_token = generate_unique_token(
+            indicator_token = generate_temporary_column_name(
                 n_bytes=8, columns=[*self.columns, *other.columns]
             )
 
@@ -370,23 +372,23 @@ def join_asof(
             ),
         )
 
-    def group_by(self, *by: str) -> DaskLazyGroupBy:
+    def group_by(self, *by: str, drop_null_keys: bool) -> DaskLazyGroupBy:
         from narwhals._dask.group_by import DaskLazyGroupBy
 
-        return DaskLazyGroupBy(self, list(by))
+        return DaskLazyGroupBy(self, list(by), drop_null_keys=drop_null_keys)
 
     def tail(self: Self, n: int) -> Self:
         native_frame = self._native_frame
         n_partitions = native_frame.npartitions
 
-        if n_partitions == 1:
+        if n_partitions == 1:  # pragma: no cover
             return self._from_native_frame(self._native_frame.tail(n=n, compute=False))
         else:
             msg = "`LazyFrame.tail` is not supported for Dask backend with multiple partitions."
             raise NotImplementedError(msg)
 
     def gather_every(self: Self, n: int, offset: int) -> Self:
-        row_index_token = generate_unique_token(n_bytes=8, columns=self.columns)
+        row_index_token = generate_temporary_column_name(n_bytes=8, columns=self.columns)
         pln = self.__narwhals_namespace__()
         return (
             self.with_row_index(name=row_index_token)
diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
index 28fa13e9cb..e3e6c391f5 100644
--- a/narwhals/_dask/expr.py
+++ b/narwhals/_dask/expr.py
@@ -10,8 +10,11 @@
 from narwhals._dask.utils import add_row_index
 from narwhals._dask.utils import maybe_evaluate
 from narwhals._dask.utils import narwhals_to_native_dtype
+from narwhals._pandas_like.utils import calculate_timestamp_date
+from narwhals._pandas_like.utils import calculate_timestamp_datetime
 from narwhals._pandas_like.utils import native_to_narwhals_dtype
-from narwhals.utils import generate_unique_token
+from narwhals.utils import Implementation
+from narwhals.utils import generate_temporary_column_name
 
 if TYPE_CHECKING:
     import dask_expr
@@ -594,7 +597,7 @@ def func(_input: dask_expr.Series, _quantile: float) -> dask_expr.Series:
                 if _input.npartitions > 1:
                     msg = "`Expr.quantile` is not supported for Dask backend with multiple partitions."
                     raise NotImplementedError(msg)
-                return _input.quantile(q=_quantile, method="dask")
+                return _input.quantile(q=_quantile, method="dask")  # pragma: no cover
 
             return self._from_call(
                 func,
@@ -610,7 +613,7 @@ def func(_input: dask_expr.Series, _quantile: float) -> dask_expr.Series:
     def is_first_distinct(self: Self) -> Self:
         def func(_input: dask_expr.Series) -> dask_expr.Series:
             _name = _input.name
-            col_token = generate_unique_token(n_bytes=8, columns=[_name])
+            col_token = generate_temporary_column_name(n_bytes=8, columns=[_name])
             _input = add_row_index(_input.to_frame(), col_token)
             first_distinct_index = _input.groupby(_name).agg({col_token: "min"})[
                 col_token
@@ -628,7 +631,7 @@ def func(_input: dask_expr.Series) -> dask_expr.Series:
     def is_last_distinct(self: Self) -> Self:
         def func(_input: dask_expr.Series) -> dask_expr.Series:
             _name = _input.name
-            col_token = generate_unique_token(n_bytes=8, columns=[_name])
+            col_token = generate_temporary_column_name(n_bytes=8, columns=[_name])
             _input = add_row_index(_input.to_frame(), col_token)
             last_distinct_index = _input.groupby(_name).agg({col_token: "max"})[col_token]
 
@@ -696,17 +699,18 @@ def func(df: DaskLazyFrame) -> list[Any]:
                 )
                 raise ValueError(msg)
 
-            if df._native_frame.npartitions > 1:
-                msg = "`Expr.over` is not supported for Dask backend with multiple partitions."
-                raise NotImplementedError(msg)
-
-            tmp = df.group_by(*keys).agg(self)
-            tmp_native = (
-                df.select(*keys)
-                .join(tmp, how="left", left_on=keys, right_on=keys, suffix="_right")
-                ._native_frame
+            if df._native_frame.npartitions == 1:  # pragma: no cover
+                tmp = df.group_by(*keys, drop_null_keys=False).agg(self)
+                tmp_native = (
+                    df.select(*keys)
+                    .join(tmp, how="left", left_on=keys, right_on=keys, suffix="_right")
+                    ._native_frame
+                )
+                return [tmp_native[name] for name in self._output_names]
+            msg = (
+                "`Expr.over` is not supported for Dask backend with multiple partitions."
             )
-            return [tmp_native[name] for name in self._output_names]
+            raise NotImplementedError(msg)
 
         return self.__class__(
             func,
@@ -1107,7 +1111,7 @@ def replace_time_zone(self, time_zone: str | None) -> DaskExpr:
 
     def convert_time_zone(self, time_zone: str) -> DaskExpr:
         def func(s: dask_expr.Series, time_zone: str) -> dask_expr.Series:
-            dtype = native_to_narwhals_dtype(s, self._expr._dtypes)
+            dtype = native_to_narwhals_dtype(s, self._expr._dtypes, Implementation.DASK)
             if dtype.time_zone is None:  # type: ignore[attr-defined]
                 return s.dt.tz_localize("UTC").dt.tz_convert(time_zone)
             else:
@@ -1121,6 +1125,38 @@ def func(s: dask_expr.Series, time_zone: str) -> dask_expr.Series:
             modifies_index=False,
         )
 
+    def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> DaskExpr:
+        def func(
+            s: dask_expr.Series, time_unit: Literal["ns", "us", "ms"] = "us"
+        ) -> dask_expr.Series:
+            dtype = native_to_narwhals_dtype(s, self._expr._dtypes, Implementation.DASK)
+            is_pyarrow_dtype = "pyarrow" in str(dtype)
+            mask_na = s.isna()
+            if dtype == self._expr._dtypes.Date:
+                # Date is only supported in pandas dtypes if pyarrow-backed
+                s_cast = s.astype("Int32[pyarrow]")
+                result = calculate_timestamp_date(s_cast, time_unit)
+            elif dtype == self._expr._dtypes.Datetime:
+                original_time_unit = dtype.time_unit  # type: ignore[attr-defined]
+                s_cast = (
+                    s.astype("Int64[pyarrow]") if is_pyarrow_dtype else s.astype("int64")
+                )
+                result = calculate_timestamp_datetime(
+                    s_cast, original_time_unit, time_unit
+                )
+            else:
+                msg = "Input should be either of Date or Datetime type"
+                raise TypeError(msg)
+            return result.where(~mask_na)
+
+        return self._expr._from_call(
+            func,
+            "datetime",
+            time_unit,
+            returns_scalar=False,
+            modifies_index=False,
+        )
+
     def total_minutes(self) -> DaskExpr:
         return self._expr._from_call(
             lambda _input: _input.dt.total_seconds() // 60,
diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py
index 55ef69f468..e4c1e14c18 100644
--- a/narwhals/_dask/group_by.py
+++ b/narwhals/_dask/group_by.py
@@ -41,12 +41,14 @@ def agg(s0: pd.core.groupby.generic.SeriesGroupBy) -> int:
 
 
 class DaskLazyGroupBy:
-    def __init__(self, df: DaskLazyFrame, keys: list[str]) -> None:
+    def __init__(
+        self, df: DaskLazyFrame, keys: list[str], *, drop_null_keys: bool
+    ) -> None:
         self._df = df
         self._keys = keys
         self._grouped = self._df._native_frame.groupby(
             list(self._keys),
-            dropna=False,
+            dropna=drop_null_keys,
             observed=True,
         )
 
diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py
index 82ac6d41bc..69eaf467f0 100644
--- a/narwhals/_duckdb/dataframe.py
+++ b/narwhals/_duckdb/dataframe.py
@@ -90,6 +90,22 @@ def __getitem__(self, item: str) -> DuckDBInterchangeSeries:
             self._native_frame.select(item), dtypes=self._dtypes
         )
 
+    def select(
+        self: Self,
+        *exprs: Any,
+        **named_exprs: Any,
+    ) -> Self:
+        if named_exprs or not all(isinstance(x, str) for x in exprs):  # pragma: no cover
+            msg = (
+                "`select`-ing not by name is not supported for DuckDB backend.\n\n"
+                "If you would like to see this kind of object better supported in "
+                "Narwhals, please open a feature request "
+                "at https://github.com/narwhals-dev/narwhals/issues."
+            )
+            raise NotImplementedError(msg)
+
+        return self._from_native_frame(self._native_frame.select(*exprs))
+
     def __getattr__(self, attr: str) -> Any:
         if attr == "schema":
             return {
@@ -100,6 +116,8 @@ def __getattr__(self, attr: str) -> Any:
                     self._native_frame.columns, self._native_frame.types
                 )
             }
+        elif attr == "columns":
+            return self._native_frame.columns
 
         msg = (  # pragma: no cover
             f"Attribute {attr} is not supported for metadata-only dataframes.\n\n"
@@ -120,3 +138,6 @@ def to_pandas(self: Self) -> pd.DataFrame:
 
     def to_arrow(self: Self) -> pa.Table:
         return self._native_frame.arrow()
+
+    def _from_native_frame(self: Self, df: Any) -> Self:
+        return self.__class__(df, dtypes=self._dtypes)
diff --git a/narwhals/_ibis/dataframe.py b/narwhals/_ibis/dataframe.py
index a9c3a49faf..571eb1b63b 100644
--- a/narwhals/_ibis/dataframe.py
+++ b/narwhals/_ibis/dataframe.py
@@ -85,12 +85,32 @@ def to_pandas(self: Self) -> pd.DataFrame:
     def to_arrow(self: Self) -> pa.Table:
         return self._native_frame.to_pyarrow()
 
+    def select(
+        self: Self,
+        *exprs: Any,
+        **named_exprs: Any,
+    ) -> Self:
+        if named_exprs or not all(isinstance(x, str) for x in exprs):  # pragma: no cover
+            msg = (
+                "`select`-ing not by name is not supported for Ibis backend.\n\n"
+                "If you would like to see this kind of object better supported in "
+                "Narwhals, please open a feature request "
+                "at https://github.com/narwhals-dev/narwhals/issues."
+            )
+            raise NotImplementedError(msg)
+
+        import ibis.selectors as s
+
+        return self._from_native_frame(self._native_frame.select(s.cols(*exprs)))
+
     def __getattr__(self, attr: str) -> Any:
         if attr == "schema":
             return {
                 column_name: map_ibis_dtype_to_narwhals_dtype(ibis_dtype, self._dtypes)
                 for column_name, ibis_dtype in self._native_frame.schema().items()
             }
+        elif attr == "columns":
+            return self._native_frame.columns
         msg = (
             f"Attribute {attr} is not supported for metadata-only dataframes.\n\n"
             "If you would like to see this kind of object better supported in "
@@ -98,3 +118,6 @@ def __getattr__(self, attr: str) -> Any:
             "at https://github.com/narwhals-dev/narwhals/issues."
         )
         raise NotImplementedError(msg)
+
+    def _from_native_frame(self: Self, df: Any) -> Self:
+        return self.__class__(df, dtypes=self._dtypes)
diff --git a/narwhals/_interchange/dataframe.py b/narwhals/_interchange/dataframe.py
index 4e8e542e7f..dc2af3bad7 100644
--- a/narwhals/_interchange/dataframe.py
+++ b/narwhals/_interchange/dataframe.py
@@ -75,7 +75,6 @@ def map_interchange_dtype_to_narwhals_dtype(
 
 class InterchangeFrame:
     def __init__(self, df: Any, dtypes: DTypes) -> None:
-        self._native_frame = df
         self._interchange_frame = df.__dataframe__()
         self._dtypes = dtypes
 
@@ -97,21 +96,11 @@ def __getitem__(self, item: str) -> InterchangeSeries:
             self._interchange_frame.get_column_by_name(item), dtypes=self._dtypes
         )
 
-    @property
-    def schema(self) -> dict[str, DType]:
-        return {
-            column_name: map_interchange_dtype_to_narwhals_dtype(
-                self._interchange_frame.get_column_by_name(column_name).dtype,
-                self._dtypes,
-            )
-            for column_name in self._interchange_frame.column_names()
-        }
-
     def to_pandas(self: Self) -> pd.DataFrame:
         import pandas as pd  # ignore-banned-import()
 
         if parse_version(pd.__version__) >= parse_version("1.5.0"):
-            return pd.api.interchange.from_dataframe(self._native_frame)
+            return pd.api.interchange.from_dataframe(self._interchange_frame)
         else:  # pragma: no cover
             msg = (
                 "Conversion to pandas is achieved via interchange protocol which requires"
@@ -122,9 +111,19 @@ def to_pandas(self: Self) -> pd.DataFrame:
     def to_arrow(self: Self) -> pa.Table:
         from pyarrow.interchange import from_dataframe  # ignore-banned-import()
 
-        return from_dataframe(self._native_frame)
-
-    def __getattr__(self, attr: str) -> NoReturn:
+        return from_dataframe(self._interchange_frame)
+
+    def __getattr__(self, attr: str) -> Any:
+        if attr == "schema":
+            return {
+                column_name: map_interchange_dtype_to_narwhals_dtype(
+                    self._interchange_frame.get_column_by_name(column_name).dtype,
+                    self._dtypes,
+                )
+                for column_name in self._interchange_frame.column_names()
+            }
+        elif attr == "columns":
+            return list(self._interchange_frame.column_names())
         msg = (
             f"Attribute {attr} is not supported for metadata-only dataframes.\n\n"
             "Hint: you probably called `nw.from_native` on an object which isn't fully "
@@ -133,3 +132,26 @@ def __getattr__(self, attr: str) -> NoReturn:
             "at https://github.com/narwhals-dev/narwhals/issues."
         )
         raise NotImplementedError(msg)
+
+    def select(
+        self: Self,
+        *exprs: Any,
+        **named_exprs: Any,
+    ) -> Self:
+        if named_exprs or not all(isinstance(x, str) for x in exprs):  # pragma: no cover
+            msg = (
+                "`select`-ing not by name is not supported for interchange-only level.\n\n"
+                "If you would like to see this kind of object better supported in "
+                "Narwhals, please open a feature request "
+                "at https://github.com/narwhals-dev/narwhals/issues."
+            )
+            raise NotImplementedError(msg)
+
+        frame = self._interchange_frame.select_columns_by_name(exprs)
+        if not hasattr(frame, "_df"):  # pragma: no cover
+            msg = (
+                "Expected interchange object to implement `_df` property to allow for recovering original object.\n"
+                "See https://github.com/data-apis/dataframe-api/issues/360."
+            )
+            raise NotImplementedError(frame)
+        return self.__class__(frame._df, dtypes=self._dtypes)
diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py
index 8380b48dbd..7be808afd1 100644
--- a/narwhals/_pandas_like/dataframe.py
+++ b/narwhals/_pandas_like/dataframe.py
@@ -19,7 +19,7 @@
 from narwhals.dependencies import is_numpy_array
 from narwhals.utils import Implementation
 from narwhals.utils import flatten
-from narwhals.utils import generate_unique_token
+from narwhals.utils import generate_temporary_column_name
 from narwhals.utils import is_sequence_but_not_str
 from narwhals.utils import parse_columns_to_drop
 
@@ -89,7 +89,14 @@ def _validate_columns(self, columns: pd.Index) -> None:
             raise ValueError(msg) from None
 
         if len(columns) != len_unique_columns:
-            msg = f"Expected unique column names, got: {columns}"
+            from collections import Counter
+
+            counter = Counter(columns)
+            msg = ""
+            for key, value in counter.items():
+                if value > 1:
+                    msg += f"\n- '{key}' {value} times"
+            msg = f"Expected unique column names, got:{msg}"
             raise ValueError(msg)
 
     def _from_native_frame(self, df: Any) -> Self:
@@ -153,7 +160,7 @@ def __getitem__(
         ),
     ) -> PandasLikeSeries | PandasLikeDataFrame:
         if isinstance(item, tuple):
-            item = tuple(list(i) if is_sequence_but_not_str(i) else i for i in item)
+            item = tuple(list(i) if is_sequence_but_not_str(i) else i for i in item)  # type: ignore[assignment]
 
         if isinstance(item, str):
             from narwhals._pandas_like.series import PandasLikeSeries
@@ -304,7 +311,9 @@ def iter_rows(
     @property
     def schema(self) -> dict[str, DType]:
         return {
-            col: native_to_narwhals_dtype(self._native_frame[col], self._dtypes)
+            col: native_to_narwhals_dtype(
+                self._native_frame[col], self._dtypes, self._implementation
+            )
             for col in self._native_frame.columns
         }
 
@@ -431,7 +440,9 @@ def with_columns(
         return self._from_native_frame(df)
 
     def rename(self, mapping: dict[str, str]) -> Self:
-        return self._from_native_frame(self._native_frame.rename(columns=mapping))
+        return self._from_native_frame(
+            self._native_frame.rename(columns=mapping, copy=False)
+        )
 
     def drop(self: Self, columns: list[str], strict: bool) -> Self:  # noqa: FBT001
         to_drop = parse_columns_to_drop(
@@ -468,12 +479,13 @@ def collect(self) -> PandasLikeDataFrame:
         )
 
     # --- actions ---
-    def group_by(self, *keys: str) -> PandasLikeGroupBy:
+    def group_by(self, *keys: str, drop_null_keys: bool) -> PandasLikeGroupBy:
         from narwhals._pandas_like.group_by import PandasLikeGroupBy
 
         return PandasLikeGroupBy(
             self,
             list(keys),
+            drop_null_keys=drop_null_keys,
         )
 
     def join(
@@ -497,7 +509,7 @@ def join(
                 self._implementation is Implementation.PANDAS
                 and self._backend_version < (1, 4)
             ):
-                key_token = generate_unique_token(
+                key_token = generate_temporary_column_name(
                     n_bytes=8, columns=[*self.columns, *other.columns]
                 )
 
@@ -532,14 +544,15 @@ def join(
                     )
                 )
             else:
-                indicator_token = generate_unique_token(
+                indicator_token = generate_temporary_column_name(
                     n_bytes=8, columns=[*self.columns, *other.columns]
                 )
 
                 other_native = (
                     other._native_frame.loc[:, right_on]
                     .rename(  # rename to avoid creating extra columns in join
-                        columns=dict(zip(right_on, left_on))  # type: ignore[arg-type]
+                        columns=dict(zip(right_on, left_on)),  # type: ignore[arg-type]
+                        copy=False,
                     )
                     .drop_duplicates()
                 )
@@ -559,7 +572,8 @@ def join(
             other_native = (
                 other._native_frame.loc[:, right_on]
                 .rename(  # rename to avoid creating extra columns in join
-                    columns=dict(zip(right_on, left_on))  # type: ignore[arg-type]
+                    columns=dict(zip(right_on, left_on)),  # type: ignore[arg-type]
+                    copy=False,
                 )
                 .drop_duplicates()  # avoids potential rows duplication from inner join
             )
diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py
index 07ba3e56dd..a58597eea8 100644
--- a/narwhals/_pandas_like/expr.py
+++ b/narwhals/_pandas_like/expr.py
@@ -331,7 +331,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
                     "`nw.col('a', 'b')`\n"
                 )
                 raise ValueError(msg)
-            tmp = df.group_by(*keys).agg(self)
+            tmp = df.group_by(*keys, drop_null_keys=False).agg(self)
             tmp = df.select(*keys).join(
                 tmp, how="left", left_on=keys, right_on=keys, suffix="_right"
             )
@@ -582,6 +582,11 @@ def convert_time_zone(self, time_zone: str) -> PandasLikeExpr:
             self._expr, "dt", "convert_time_zone", time_zone
         )
 
+    def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> PandasLikeExpr:
+        return reuse_series_namespace_implementation(
+            self._expr, "dt", "timestamp", time_unit
+        )
+
 
 class PandasLikeExprNameNamespace:
     def __init__(self: Self, expr: PandasLikeExpr) -> None:
diff --git a/narwhals/_pandas_like/group_by.py b/narwhals/_pandas_like/group_by.py
index 366a52e1ee..c628ecbdb6 100644
--- a/narwhals/_pandas_like/group_by.py
+++ b/narwhals/_pandas_like/group_by.py
@@ -13,6 +13,7 @@
 from narwhals._pandas_like.utils import native_series_from_iterable
 from narwhals.utils import Implementation
 from narwhals.utils import remove_prefix
+from narwhals.utils import tupleify
 
 if TYPE_CHECKING:
     from narwhals._pandas_like.dataframe import PandasLikeDataFrame
@@ -26,14 +27,19 @@
 
 
 class PandasLikeGroupBy:
-    def __init__(self, df: PandasLikeDataFrame, keys: list[str]) -> None:
+    def __init__(
+        self, df: PandasLikeDataFrame, keys: list[str], *, drop_null_keys: bool
+    ) -> None:
         self._df = df
         self._keys = keys
         if (
             self._df._implementation is Implementation.PANDAS
-            and self._df._backend_version < (1, 0)
+            and self._df._backend_version < (1, 1)
         ):  # pragma: no cover
-            if self._df._native_frame.loc[:, self._keys].isna().any().any():
+            if (
+                not drop_null_keys
+                and self._df._native_frame.loc[:, self._keys].isna().any().any()
+            ):
                 msg = "Grouping by null values is not supported in pandas < 1.0.0"
                 raise NotImplementedError(msg)
             self._grouped = self._df._native_frame.groupby(
@@ -47,7 +53,7 @@ def __init__(self, df: PandasLikeDataFrame, keys: list[str]) -> None:
                 list(self._keys),
                 sort=False,
                 as_index=True,
-                dropna=False,
+                dropna=drop_null_keys,
                 observed=True,
             )
 
@@ -96,16 +102,17 @@ def _from_native_frame(self, df: PandasLikeDataFrame) -> PandasLikeDataFrame:
         )
 
     def __iter__(self) -> Iterator[tuple[Any, PandasLikeDataFrame]]:
-        with warnings.catch_warnings():
-            # we already use `tupleify` above, so we're already opting in to
-            # the new behaviour
-            warnings.filterwarnings(
-                "ignore",
-                message="In a future version of pandas, a length 1 tuple will be returned",
-                category=FutureWarning,
-            )
-            iterator = self._grouped.__iter__()
-        yield from ((key, self._from_native_frame(sub_df)) for (key, sub_df) in iterator)
+        indices = self._grouped.indices
+        if (
+            self._df._implementation is Implementation.PANDAS
+            and self._df._backend_version < (2, 2)
+        ) or (self._df._implementation is Implementation.CUDF):  # pragma: no cover
+            for key in indices:
+                yield (key, self._from_native_frame(self._grouped.get_group(key)))
+        else:
+            for key in indices:
+                key = tupleify(key)  # noqa: PLW2901
+                yield (key, self._from_native_frame(self._grouped.get_group(key)))
 
 
 def agg_pandas(  # noqa: PLR0915
@@ -186,14 +193,19 @@ def agg_pandas(  # noqa: PLR0915
                 f"{a}_{b}" for a, b in result_simple_aggs.columns
             ]
             result_simple_aggs = result_simple_aggs.rename(
-                columns=name_mapping
-            ).reset_index()
+                columns=name_mapping, copy=False
+            )
+            # Keep inplace=True to avoid making a redundant copy.
+            # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
+            result_simple_aggs.reset_index(inplace=True)  # noqa: PD002
         if nunique_aggs:
             result_nunique_aggs = grouped[list(nunique_aggs.values())].nunique(
                 dropna=False
             )
             result_nunique_aggs.columns = list(nunique_aggs.keys())
-            result_nunique_aggs = result_nunique_aggs.reset_index()
+            # Keep inplace=True to avoid making a redundant copy.
+            # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
+            result_nunique_aggs.reset_index(inplace=True)  # noqa: PD002
         if simple_aggs and nunique_aggs:
             if (
                 set(result_simple_aggs.columns)
@@ -259,6 +271,8 @@ def func(df: Any) -> Any:
     else:  # pragma: no cover
         result_complex = grouped.apply(func)
 
-    result = result_complex.reset_index()
+    # Keep inplace=True to avoid making a redundant copy.
+    # This may need updating, depending on https://github.com/pandas-dev/pandas/pull/51466/files
+    result_complex.reset_index(inplace=True)  # noqa: PD002
 
-    return from_dataframe(result.loc[:, output_names])
+    return from_dataframe(result_complex.loc[:, output_names])
diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py
index 63d3454a59..bb43689089 100644
--- a/narwhals/_pandas_like/namespace.py
+++ b/narwhals/_pandas_like/namespace.py
@@ -290,7 +290,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
                         (s.to_frame() for s in series), how="horizontal"
                     )
                     ._native_frame.min(axis=1)
-                    .rename(series[0].name),
+                    .rename(series[0].name, copy=False),
                     implementation=self._implementation,
                     backend_version=self._backend_version,
                     dtypes=self._dtypes,
@@ -317,7 +317,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
                         (s.to_frame() for s in series), how="horizontal"
                     )
                     ._native_frame.max(axis=1)
-                    .rename(series[0].name),
+                    .rename(series[0].name, copy=False),
                     implementation=self._implementation,
                     backend_version=self._backend_version,
                     dtypes=self._dtypes,
diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py
index 9dc9f20f69..35df78e2f9 100644
--- a/narwhals/_pandas_like/series.py
+++ b/narwhals/_pandas_like/series.py
@@ -8,6 +8,8 @@
 from typing import Sequence
 from typing import overload
 
+from narwhals._pandas_like.utils import calculate_timestamp_date
+from narwhals._pandas_like.utils import calculate_timestamp_datetime
 from narwhals._pandas_like.utils import int_dtype_mapper
 from narwhals._pandas_like.utils import narwhals_to_native_dtype
 from narwhals._pandas_like.utils import native_series_from_iterable
@@ -124,11 +126,6 @@ def __getitem__(self, idx: int | slice | Sequence[int]) -> Any | Self:
             return self._native_series.iloc[idx]
         return self._from_native_series(self._native_series.iloc[idx])
 
-    def _rename(self, series: Any, name: str) -> Any:
-        if self._use_copy_false:
-            return series.rename(name, copy=False)
-        return series.rename(name)  # pragma: no cover
-
     def _from_native_series(self, series: Any) -> Self:
         return self.__class__(
             series,
@@ -173,7 +170,9 @@ def shape(self) -> tuple[int]:
 
     @property
     def dtype(self: Self) -> DType:
-        return native_to_narwhals_dtype(self._native_series, self._dtypes)
+        return native_to_narwhals_dtype(
+            self._native_series, self._dtypes, self._implementation
+        )
 
     def scatter(self, indices: int | Sequence[int], values: Any) -> Self:
         if isinstance(values, self.__class__):
@@ -258,127 +257,135 @@ def filter(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         if not (isinstance(other, list) and all(isinstance(x, bool) for x in other)):
             other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.loc[other], ser.name))
+        return self._from_native_series(ser.loc[other].rename(ser.name, copy=False))
 
     def __eq__(self, other: object) -> PandasLikeSeries:  # type: ignore[override]
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__eq__(other), ser.name))
+        return self._from_native_series(ser.__eq__(other).rename(ser.name, copy=False))
 
     def __ne__(self, other: object) -> PandasLikeSeries:  # type: ignore[override]
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__ne__(other), ser.name))
+        return self._from_native_series(ser.__ne__(other).rename(ser.name, copy=False))
 
     def __ge__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__ge__(other), ser.name))
+        return self._from_native_series(ser.__ge__(other).rename(ser.name, copy=False))
 
     def __gt__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__gt__(other), ser.name))
+        return self._from_native_series(ser.__gt__(other).rename(ser.name, copy=False))
 
     def __le__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__le__(other), ser.name))
+        return self._from_native_series(ser.__le__(other).rename(ser.name, copy=False))
 
     def __lt__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__lt__(other), ser.name))
+        return self._from_native_series(ser.__lt__(other).rename(ser.name, copy=False))
 
     def __and__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__and__(other), ser.name))
+        return self._from_native_series(ser.__and__(other).rename(ser.name, copy=False))
 
     def __rand__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rand__(other), ser.name))
+        return self._from_native_series(ser.__rand__(other).rename(ser.name, copy=False))
 
     def __or__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__or__(other), ser.name))
+        return self._from_native_series(ser.__or__(other).rename(ser.name, copy=False))
 
     def __ror__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__ror__(other), ser.name))
+        return self._from_native_series(ser.__ror__(other).rename(ser.name, copy=False))
 
     def __add__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__add__(other), ser.name))
+        return self._from_native_series(ser.__add__(other).rename(ser.name, copy=False))
 
     def __radd__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__radd__(other), ser.name))
+        return self._from_native_series(ser.__radd__(other).rename(ser.name, copy=False))
 
     def __sub__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__sub__(other), ser.name))
+        return self._from_native_series(ser.__sub__(other).rename(ser.name, copy=False))
 
     def __rsub__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rsub__(other), ser.name))
+        return self._from_native_series(ser.__rsub__(other).rename(ser.name, copy=False))
 
     def __mul__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__mul__(other), ser.name))
+        return self._from_native_series(ser.__mul__(other).rename(ser.name, copy=False))
 
     def __rmul__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rmul__(other), ser.name))
+        return self._from_native_series(ser.__rmul__(other).rename(ser.name, copy=False))
 
     def __truediv__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__truediv__(other), ser.name))
+        return self._from_native_series(
+            ser.__truediv__(other).rename(ser.name, copy=False)
+        )
 
     def __rtruediv__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rtruediv__(other), ser.name))
+        return self._from_native_series(
+            ser.__rtruediv__(other).rename(ser.name, copy=False)
+        )
 
     def __floordiv__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__floordiv__(other), ser.name))
+        return self._from_native_series(
+            ser.__floordiv__(other).rename(ser.name, copy=False)
+        )
 
     def __rfloordiv__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rfloordiv__(other), ser.name))
+        return self._from_native_series(
+            ser.__rfloordiv__(other).rename(ser.name, copy=False)
+        )
 
     def __pow__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__pow__(other), ser.name))
+        return self._from_native_series(ser.__pow__(other).rename(ser.name, copy=False))
 
     def __rpow__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rpow__(other), ser.name))
+        return self._from_native_series(ser.__rpow__(other).rename(ser.name, copy=False))
 
     def __mod__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__mod__(other), ser.name))
+        return self._from_native_series(ser.__mod__(other).rename(ser.name, copy=False))
 
     def __rmod__(self, other: Any) -> PandasLikeSeries:
         ser = self._native_series
         other = validate_column_comparand(self._native_series.index, other)
-        return self._from_native_series(self._rename(ser.__rmod__(other), ser.name))
+        return self._from_native_series(ser.__rmod__(other).rename(ser.name, copy=False))
 
     # Unary
 
@@ -484,13 +491,15 @@ def sort(
         na_position = "last" if nulls_last else "first"
         return self._from_native_series(
             ser.sort_values(ascending=not descending, na_position=na_position).rename(
-                self.name
+                self.name, copy=False
             )
         )
 
     def alias(self, name: str) -> Self:
-        ser = self._native_series
-        return self._from_native_series(self._rename(ser, name))
+        if name != self.name:
+            ser = self._native_series
+            return self._from_native_series(ser.rename(name, copy=False))
+        return self
 
     def __array__(self, dtype: Any = None, copy: bool | None = None) -> Any:
         # pandas used to always return object dtype for nullable dtypes.
@@ -544,28 +553,25 @@ def to_pandas(self) -> Any:
     # --- descriptive ---
     def is_duplicated(self: Self) -> Self:
         res = self._native_series.duplicated(keep=False)
-        res = self._rename(res, self.name)
+        res = res.rename(self.name, copy=False)
         return self._from_native_series(res)
 
     def is_empty(self: Self) -> bool:
         return self._native_series.empty  # type: ignore[no-any-return]
 
     def is_unique(self: Self) -> Self:
-        res = ~self._native_series.duplicated(keep=False)
-        res = self._rename(res, self.name)
+        res = ~self._native_series.duplicated(keep=False).rename(self.name, copy=False)
         return self._from_native_series(res)
 
     def null_count(self: Self) -> int:
         return self._native_series.isna().sum()  # type: ignore[no-any-return]
 
     def is_first_distinct(self: Self) -> Self:
-        res = ~self._native_series.duplicated(keep="first")
-        res = self._rename(res, self.name)
+        res = ~self._native_series.duplicated(keep="first").rename(self.name, copy=False)
         return self._from_native_series(res)
 
     def is_last_distinct(self: Self) -> Self:
-        res = ~self._native_series.duplicated(keep="last")
-        res = self._rename(res, self.name)
+        res = ~self._native_series.duplicated(keep="last").rename(self.name, copy=False)
         return self._from_native_series(res)
 
     def is_sorted(self: Self, *, descending: bool = False) -> bool:
@@ -865,8 +871,19 @@ def ordinal_day(self) -> PandasLikeSeries:
             )
         )
 
+    def _get_total_seconds(self) -> Any:
+        if hasattr(self._pandas_series._native_series.dt, "total_seconds"):
+            return self._pandas_series._native_series.dt.total_seconds()
+        else:  # pragma: no cover
+            return (
+                self._pandas_series._native_series.dt.days * 86400
+                + self._pandas_series._native_series.dt.seconds
+                + (self._pandas_series._native_series.dt.microseconds / 1e6)
+                + (self._pandas_series._native_series.dt.nanoseconds / 1e9)
+            )
+
     def total_minutes(self) -> PandasLikeSeries:
-        s = self._pandas_series._native_series.dt.total_seconds()
+        s = self._get_total_seconds()
         s_sign = (
             2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
         )  # this calculates the sign of each series element
@@ -876,7 +893,7 @@ def total_minutes(self) -> PandasLikeSeries:
         return self._pandas_series._from_native_series(s_abs * s_sign)
 
     def total_seconds(self) -> PandasLikeSeries:
-        s = self._pandas_series._native_series.dt.total_seconds()
+        s = self._get_total_seconds()
         s_sign = (
             2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
         )  # this calculates the sign of each series element
@@ -886,7 +903,7 @@ def total_seconds(self) -> PandasLikeSeries:
         return self._pandas_series._from_native_series(s_abs * s_sign)
 
     def total_milliseconds(self) -> PandasLikeSeries:
-        s = self._pandas_series._native_series.dt.total_seconds() * 1e3
+        s = self._get_total_seconds() * 1e3
         s_sign = (
             2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
         )  # this calculates the sign of each series element
@@ -896,7 +913,7 @@ def total_milliseconds(self) -> PandasLikeSeries:
         return self._pandas_series._from_native_series(s_abs * s_sign)
 
     def total_microseconds(self) -> PandasLikeSeries:
-        s = self._pandas_series._native_series.dt.total_seconds() * 1e6
+        s = self._get_total_seconds() * 1e6
         s_sign = (
             2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
         )  # this calculates the sign of each series element
@@ -906,7 +923,7 @@ def total_microseconds(self) -> PandasLikeSeries:
         return self._pandas_series._from_native_series(s_abs * s_sign)
 
     def total_nanoseconds(self) -> PandasLikeSeries:
-        s = self._pandas_series._native_series.dt.total_seconds() * 1e9
+        s = self._get_total_seconds() * 1e9
         s_sign = (
             2 * (s > 0).astype(int_dtype_mapper(s.dtype)) - 1
         )  # this calculates the sign of each series element
@@ -944,3 +961,30 @@ def convert_time_zone(self, time_zone: str) -> PandasLikeSeries:
         else:
             result = self._pandas_series._native_series.dt.tz_convert(time_zone)
         return self._pandas_series._from_native_series(result)
+
+    def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> PandasLikeSeries:
+        s = self._pandas_series._native_series
+        dtype = self._pandas_series.dtype
+        is_pyarrow_dtype = "pyarrow" in str(self._pandas_series._native_series.dtype)
+        mask_na = s.isna()
+        if dtype == self._pandas_series._dtypes.Date:
+            # Date is only supported in pandas dtypes if pyarrow-backed
+            s_cast = s.astype("Int32[pyarrow]")
+            result = calculate_timestamp_date(s_cast, time_unit)
+        elif dtype == self._pandas_series._dtypes.Datetime:
+            original_time_unit = dtype.time_unit  # type: ignore[attr-defined]
+            if (
+                self._pandas_series._implementation is Implementation.PANDAS
+                and self._pandas_series._backend_version < (2,)
+            ):  # pragma: no cover
+                s_cast = s.view("Int64[pyarrow]") if is_pyarrow_dtype else s.view("int64")
+            else:
+                s_cast = (
+                    s.astype("Int64[pyarrow]") if is_pyarrow_dtype else s.astype("int64")
+                )
+            result = calculate_timestamp_datetime(s_cast, original_time_unit, time_unit)
+        else:
+            msg = "Input should be either of Date or Datetime type"
+            raise TypeError(msg)
+        result[mask_na] = None
+        return self._pandas_series._from_native_series(result)
diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py
index 0c5ec4711e..99181bc1ec 100644
--- a/narwhals/_pandas_like/utils.py
+++ b/narwhals/_pandas_like/utils.py
@@ -218,7 +218,9 @@ def set_axis(
     return obj.set_axis(index, axis=0, **kwargs)  # type: ignore[attr-defined, no-any-return]
 
 
-def native_to_narwhals_dtype(native_column: Any, dtypes: DTypes) -> DType:
+def native_to_narwhals_dtype(
+    native_column: Any, dtypes: DTypes, implementation: Implementation
+) -> DType:
     dtype = str(native_column.dtype)
 
     pd_datetime_rgx = (
@@ -283,15 +285,20 @@ def native_to_narwhals_dtype(native_column: Any, dtypes: DTypes) -> DType:
     if dtype.startswith(("large_list", "list", "struct", "fixed_size_list")):
         return arrow_native_to_narwhals_dtype(native_column.dtype.pyarrow_dtype, dtypes)
     if dtype == "object":
-        if (  # pragma: no cover  TODO(unassigned): why does this show as uncovered?
-            idx := getattr(native_column, "first_valid_index", lambda: None)()
-        ) is not None and isinstance(native_column.loc[idx], str):
-            # Infer based on first non-missing value.
-            # For pandas pre 3.0, this isn't perfect.
-            # After pandas 3.0, pandas has a dedicated string dtype
-            # which is inferred by default.
+        if implementation is Implementation.DASK:
+            # Dask columns are lazy, so we can't inspect values.
+            # The most useful assumption is probably String
             return dtypes.String()
-        else:
+        if implementation is Implementation.PANDAS:  # pragma: no cover
+            # This is the most efficient implementation for pandas,
+            # and doesn't require the interchange protocol
+            import pandas as pd  # ignore-banned-import
+
+            dtype = pd.api.types.infer_dtype(native_column, skipna=True)
+            if dtype == "string":
+                return dtypes.String()
+            return dtypes.Object()
+        else:  # pragma: no cover
             df = native_column.to_frame()
             if hasattr(df, "__dataframe__"):
                 from narwhals._interchange.dataframe import (
@@ -302,10 +309,8 @@ def native_to_narwhals_dtype(native_column: Any, dtypes: DTypes) -> DType:
                     return map_interchange_dtype_to_narwhals_dtype(
                         df.__dataframe__().get_column(0).dtype, dtypes
                     )
-                except Exception:  # noqa: BLE001
-                    return dtypes.Object()
-            else:  # pragma: no cover
-                return dtypes.Object()
+                except Exception:  # noqa: BLE001, S110
+                    pass
     return dtypes.Unknown()
 
 
@@ -542,3 +547,51 @@ def convert_str_slice_to_int_slice(
     stop = columns.get_loc(str_slice.stop) + 1 if str_slice.stop is not None else None
     step = str_slice.step
     return (start, stop, step)
+
+
+def calculate_timestamp_datetime(
+    s: pd.Series, original_time_unit: str, time_unit: str
+) -> pd.Series:
+    if original_time_unit == "ns":
+        if time_unit == "ns":
+            result = s
+        elif time_unit == "us":
+            result = s // 1_000
+        else:
+            result = s // 1_000_000
+    elif original_time_unit == "us":
+        if time_unit == "ns":
+            result = s * 1_000
+        elif time_unit == "us":
+            result = s
+        else:
+            result = s // 1_000
+    elif original_time_unit == "ms":
+        if time_unit == "ns":
+            result = s * 1_000_000
+        elif time_unit == "us":
+            result = s * 1_000
+        else:
+            result = s
+    elif original_time_unit == "s":
+        if time_unit == "ns":
+            result = s * 1_000_000_000
+        elif time_unit == "us":
+            result = s * 1_000_000
+        else:
+            result = s * 1_000
+    else:  # pragma: no cover
+        msg = f"unexpected time unit {original_time_unit}, please report a bug at https://github.com/narwhals-dev/narwhals"
+        raise AssertionError(msg)
+    return result
+
+
+def calculate_timestamp_date(s: pd.Series, time_unit: str) -> pd.Series:
+    s = s * 86_400  # number of seconds in a day
+    if time_unit == "ns":
+        result = s * 1_000_000_000
+    elif time_unit == "us":
+        result = s * 1_000_000
+    else:
+        result = s * 1_000
+    return result
diff --git a/narwhals/_polars/dataframe.py b/narwhals/_polars/dataframe.py
index 0ef2f879df..832331ebfd 100644
--- a/narwhals/_polars/dataframe.py
+++ b/narwhals/_polars/dataframe.py
@@ -68,6 +68,12 @@ def _from_native_object(self, obj: Any) -> Any:
     def __getattr__(self, attr: str) -> Any:
         if attr == "collect":  # pragma: no cover
             raise AttributeError
+        if attr == "schema":
+            schema = self._native_frame.schema
+            return {
+                name: native_to_narwhals_dtype(dtype, self._dtypes)
+                for name, dtype in schema.items()
+            }
 
         def func(*args: Any, **kwargs: Any) -> Any:
             args, kwargs = extract_args_kwargs(args, kwargs)  # type: ignore[assignment]
@@ -85,14 +91,6 @@ def __array__(self, dtype: Any | None = None, copy: bool | None = None) -> np.nd
             return self._native_frame.__array__(dtype)
         return self._native_frame.__array__(dtype)
 
-    @property
-    def schema(self) -> dict[str, Any]:
-        schema = self._native_frame.schema
-        return {
-            name: native_to_narwhals_dtype(dtype, self._dtypes)
-            for name, dtype in schema.items()
-        }
-
     def collect_schema(self) -> dict[str, Any]:
         if self._backend_version < (1,):  # pragma: no cover
             schema = self._native_frame.schema
@@ -205,10 +203,10 @@ def to_dict(self, *, as_series: bool) -> Any:
         else:
             return df.to_dict(as_series=False)
 
-    def group_by(self, *by: str) -> Any:
+    def group_by(self, *by: str, drop_null_keys: bool) -> Any:
         from narwhals._polars.group_by import PolarsGroupBy
 
-        return PolarsGroupBy(self, list(by))
+        return PolarsGroupBy(self, list(by), drop_null_keys=drop_null_keys)
 
     def with_row_index(self, name: str) -> Any:
         if self._backend_version < (0, 20, 4):  # pragma: no cover
@@ -314,10 +312,10 @@ def collect(self) -> PolarsDataFrame:
             dtypes=self._dtypes,
         )
 
-    def group_by(self, *by: str) -> Any:
+    def group_by(self, *by: str, drop_null_keys: bool) -> Any:
         from narwhals._polars.group_by import PolarsLazyGroupBy
 
-        return PolarsLazyGroupBy(self, list(by))
+        return PolarsLazyGroupBy(self, list(by), drop_null_keys=drop_null_keys)
 
     def with_row_index(self, name: str) -> Any:
         if self._backend_version < (0, 20, 4):  # pragma: no cover
diff --git a/narwhals/_polars/group_by.py b/narwhals/_polars/group_by.py
index f03da610e9..aa69db37ff 100644
--- a/narwhals/_polars/group_by.py
+++ b/narwhals/_polars/group_by.py
@@ -11,10 +11,13 @@
 
 
 class PolarsGroupBy:
-    def __init__(self, df: Any, keys: list[str]) -> None:
+    def __init__(self, df: Any, keys: list[str], *, drop_null_keys: bool) -> None:
         self._compliant_frame = df
         self.keys = keys
-        self._grouped = df._native_frame.group_by(keys)
+        if drop_null_keys:
+            self._grouped = df.drop_nulls(keys)._native_frame.group_by(keys)
+        else:
+            self._grouped = df._native_frame.group_by(keys)
 
     def agg(self, *aggs: Any, **named_aggs: Any) -> PolarsDataFrame:
         aggs, named_aggs = extract_args_kwargs(aggs, named_aggs)  # type: ignore[assignment]
@@ -28,10 +31,13 @@ def __iter__(self) -> Any:
 
 
 class PolarsLazyGroupBy:
-    def __init__(self, df: Any, keys: list[str]) -> None:
+    def __init__(self, df: Any, keys: list[str], *, drop_null_keys: bool) -> None:
         self._compliant_frame = df
         self.keys = keys
-        self._grouped = df._native_frame.group_by(keys)
+        if drop_null_keys:
+            self._grouped = df.drop_nulls(keys)._native_frame.group_by(keys)
+        else:
+            self._grouped = df._native_frame.group_by(keys)
 
     def agg(self, *aggs: Any, **named_aggs: Any) -> PolarsLazyFrame:
         aggs, named_aggs = extract_args_kwargs(aggs, named_aggs)  # type: ignore[assignment]
diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py
index fe63f515f3..ac6ffb2bd5 100644
--- a/narwhals/_polars/utils.py
+++ b/narwhals/_polars/utils.py
@@ -84,7 +84,7 @@ def native_to_narwhals_dtype(dtype: Any, dtypes: DTypes) -> DType:
     if dtype == pl.List:
         return dtypes.List(native_to_narwhals_dtype(dtype.inner, dtypes))
     if dtype == pl.Array:
-        if parse_version(pl.__version__) < (1, 0):  # pragma: no cover
+        if parse_version(pl.__version__) < (0, 20, 30):  # pragma: no cover
             return dtypes.Array(
                 native_to_narwhals_dtype(dtype.inner, dtypes), dtype.width
             )
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
index 4645cfbb41..3ddaa28141 100644
--- a/narwhals/dataframe.py
+++ b/narwhals/dataframe.py
@@ -1867,12 +1867,16 @@ def filter(self, *predicates: IntoExpr | Iterable[IntoExpr] | list[bool]) -> Sel
         """
         return super().filter(*predicates)
 
-    def group_by(self, *keys: str | Iterable[str]) -> GroupBy[Self]:
+    def group_by(
+        self, *keys: str | Iterable[str], drop_null_keys: bool = False
+    ) -> GroupBy[Self]:
         r"""
         Start a group by operation.
 
         Arguments:
             *keys: Column(s) to group by. Accepts multiple columns names as a list.
+            drop_null_keys: if True, then groups where any key is null won't be included
+                in the result.
 
         Returns:
             GroupBy: Object which can be used to perform aggregations.
@@ -1941,7 +1945,7 @@ def group_by(self, *keys: str | Iterable[str]) -> GroupBy[Self]:
         """
         from narwhals.group_by import GroupBy
 
-        return GroupBy(self, *flatten(keys))
+        return GroupBy(self, *flatten(keys), drop_null_keys=drop_null_keys)
 
     def sort(
         self,
@@ -3322,11 +3326,6 @@ def rename(self, mapping: dict[str, str]) -> Self:
                       function that takes the old name as input and returns the
                       new name.
 
-        Notes:
-            If existing names are swapped (e.g. 'A' points to 'B' and 'B'
-             points to 'A'), polars will block projection and predicate
-             pushdowns at this node.
-
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
@@ -3758,7 +3757,9 @@ def filter(self, *predicates: IntoExpr | Iterable[IntoExpr] | list[bool]) -> Sel
         """
         return super().filter(*predicates)
 
-    def group_by(self, *keys: str | Iterable[str]) -> LazyGroupBy[Self]:
+    def group_by(
+        self, *keys: str | Iterable[str], drop_null_keys: bool = False
+    ) -> LazyGroupBy[Self]:
         r"""
         Start a group by operation.
 
@@ -3766,6 +3767,8 @@ def group_by(self, *keys: str | Iterable[str]) -> LazyGroupBy[Self]:
             *keys:
                 Column(s) to group by. Accepts expression input. Strings are
                 parsed as column names.
+            drop_null_keys: if True, then groups where any key is null won't be
+                included in the result.
 
         Examples:
             Group by one column and call `agg` to compute the grouped sum of
@@ -3858,7 +3861,7 @@ def group_by(self, *keys: str | Iterable[str]) -> LazyGroupBy[Self]:
         """
         from narwhals.group_by import LazyGroupBy
 
-        return LazyGroupBy(self, *flatten(keys))
+        return LazyGroupBy(self, *flatten(keys), drop_null_keys=drop_null_keys)
 
     def sort(
         self,
diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py
index 1f9ae19f59..7aaa9f15f1 100644
--- a/narwhals/dependencies.py
+++ b/narwhals/dependencies.py
@@ -23,6 +23,8 @@
     import polars as pl
     import pyarrow as pa
 
+    from narwhals.typing import IntoSeries
+
 
 def get_polars() -> Any:
     """Get Polars module (if already imported - else return None)."""
@@ -96,6 +98,11 @@ def is_pandas_series(ser: Any) -> TypeGuard[pd.Series[Any]]:
     return (pd := get_pandas()) is not None and isinstance(ser, pd.Series)
 
 
+def is_pandas_index(index: Any) -> TypeGuard[pd.Index]:
+    """Check whether `index` is a pandas Index without importing pandas."""
+    return (pd := get_pandas()) is not None and isinstance(index, pd.Index)
+
+
 def is_modin_dataframe(df: Any) -> TypeGuard[mpd.DataFrame]:
     """Check whether `df` is a modin DataFrame without importing modin."""
     return (mpd := get_modin()) is not None and isinstance(df, mpd.DataFrame)
@@ -106,6 +113,13 @@ def is_modin_series(ser: Any) -> TypeGuard[mpd.Series]:
     return (mpd := get_modin()) is not None and isinstance(ser, mpd.Series)
 
 
+def is_modin_index(index: Any) -> TypeGuard[mpd.Index]:
+    """Check whether `index` is a modin Index without importing modin."""
+    return (mpd := get_modin()) is not None and isinstance(
+        index, mpd.Index
+    )  # pragma: no cover
+
+
 def is_cudf_dataframe(df: Any) -> TypeGuard[cudf.DataFrame]:
     """Check whether `df` is a cudf DataFrame without importing cudf."""
     return (cudf := get_cudf()) is not None and isinstance(df, cudf.DataFrame)
@@ -116,6 +130,13 @@ def is_cudf_series(ser: Any) -> TypeGuard[cudf.Series[Any]]:
     return (cudf := get_cudf()) is not None and isinstance(ser, cudf.Series)
 
 
+def is_cudf_index(index: Any) -> TypeGuard[cudf.Index]:
+    """Check whether `index` is a cudf Index without importing cudf."""
+    return (cudf := get_cudf()) is not None and isinstance(
+        index, cudf.Index
+    )  # pragma: no cover
+
+
 def is_dask_dataframe(df: Any) -> TypeGuard[dd.DataFrame]:
     """Check whether `df` is a Dask DataFrame without importing Dask."""
     return (dd := get_dask_dataframe()) is not None and isinstance(df, dd.DataFrame)
@@ -172,13 +193,100 @@ def is_pandas_like_dataframe(df: Any) -> bool:
     return is_pandas_dataframe(df) or is_modin_dataframe(df) or is_cudf_dataframe(df)
 
 
-def is_pandas_like_series(arr: Any) -> bool:
+def is_pandas_like_series(ser: Any) -> bool:
     """
-    Check whether `arr` is a pandas-like Series without doing any imports
+    Check whether `ser` is a pandas-like Series without doing any imports
 
     By "pandas-like", we mean: pandas, Modin, cuDF.
     """
-    return is_pandas_series(arr) or is_modin_series(arr) or is_cudf_series(arr)
+    return is_pandas_series(ser) or is_modin_series(ser) or is_cudf_series(ser)
+
+
+def is_pandas_like_index(index: Any) -> bool:
+    """
+    Check whether `index` is a pandas-like Index without doing any imports
+
+    By "pandas-like", we mean: pandas, Modin, cuDF.
+    """
+    return (
+        is_pandas_index(index) or is_modin_index(index) or is_cudf_index(index)
+    )  # pragma: no cover
+
+
+def is_into_series(native_series: IntoSeries) -> bool:
+    """
+    Check whether `native_series` can be converted to a Narwhals Series.
+
+    Arguments:
+        native_series: The object to check.
+
+    Returns:
+        `True` if `native_series` can be converted to a Narwhals Series, `False` otherwise.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import polars as pl
+        >>> import numpy as np
+        >>> import narwhals as nw
+
+        >>> s_pd = pd.Series([1, 2, 3])
+        >>> s_pl = pl.Series([1, 2, 3])
+        >>> np_arr = np.array([1, 2, 3])
+
+        >>> nw.dependencies.is_into_series(s_pd)
+        True
+        >>> nw.dependencies.is_into_series(s_pl)
+        True
+        >>> nw.dependencies.is_into_series(np_arr)
+        False
+    """
+    from narwhals.series import Series
+
+    return (
+        isinstance(native_series, Series)
+        or hasattr(native_series, "__narwhals_series__")
+        or is_polars_series(native_series)
+        or is_pyarrow_chunked_array(native_series)
+        or is_pandas_like_series(native_series)
+    )
+
+
+def is_into_dataframe(native_dataframe: Any) -> bool:
+    """
+    Check whether `native_dataframe` can be converted to a Narwhals DataFrame.
+
+    Arguments:
+        native_dataframe: The object to check.
+
+    Returns:
+        `True` if `native_dataframe` can be converted to a Narwhals DataFrame, `False` otherwise.
+
+    Examples:
+        >>> import pandas as pd
+        >>> import polars as pl
+        >>> import numpy as np
+        >>> from narwhals.dependencies import is_into_dataframe
+
+        >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        >>> np_arr = np.array([[1, 4], [2, 5], [3, 6]])
+
+        >>> is_into_dataframe(df_pd)
+        True
+        >>> is_into_dataframe(df_pl)
+        True
+        >>> is_into_dataframe(np_arr)
+        False
+    """
+    from narwhals.dataframe import DataFrame
+
+    return (
+        isinstance(native_dataframe, DataFrame)
+        or hasattr(native_dataframe, "__narwhals_dataframe__")
+        or is_polars_dataframe(native_dataframe)
+        or is_pyarrow_table(native_dataframe)
+        or is_pandas_like_dataframe(native_dataframe)
+    )
 
 
 __all__ = [
@@ -205,4 +313,6 @@ def is_pandas_like_series(arr: Any) -> bool:
     "is_dask_dataframe",
     "is_pandas_like_dataframe",
     "is_pandas_like_series",
+    "is_into_dataframe",
+    "is_into_series",
 ]
diff --git a/narwhals/expr.py b/narwhals/expr.py
index 97c2e2d36f..6c2d28962c 100644
--- a/narwhals/expr.py
+++ b/narwhals/expr.py
@@ -3,9 +3,11 @@
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
+from typing import Generic
 from typing import Iterable
 from typing import Literal
 from typing import Sequence
+from typing import TypeVar
 
 from narwhals.dependencies import is_numpy_array
 from narwhals.utils import flatten
@@ -48,9 +50,11 @@ def alias(self, name: str) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
             >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 5]})
+            >>> df_pa = pa.table({"a": [1, 2], "b": [4, 5]})
 
             Let's define a dataframe-agnostic function:
 
@@ -58,7 +62,7 @@ def alias(self, name: str) -> Self:
             ... def func(df):
             ...     return df.select((nw.col("b") + 10).alias("c"))
 
-            We can then pass either pandas or Polars to `func`:
+            We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                 c
@@ -74,6 +78,12 @@ def alias(self, name: str) -> Self:
             │ 14  │
             │ 15  │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            c: int64
+            ----
+            c: [[14,15]]
+
         """
         return self.__class__(lambda plx: self._call(plx).alias(name))
 
@@ -84,10 +94,12 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> data = {"a": [1, 2, 3, 4]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Lets define a library-agnostic function:
 
@@ -95,7 +107,7 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se
             ... def func(df):
             ...     return df.select(nw.col("a").pipe(lambda x: x + 1))
 
-            We can then pass any supported library:
+            We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a
@@ -115,6 +127,11 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se
             │ 4   │
             │ 5   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[2,3,4,5]]
         """
         return function(self, *args, **kwargs)
 
@@ -131,10 +148,12 @@ def cast(
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> from datetime import date
             >>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
             >>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+            >>> df_pa = pa.table({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
 
             Let's define a dataframe-agnostic function:
 
@@ -144,7 +163,7 @@ def cast(
             ...         nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
             ...     )
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                foo  bar
@@ -162,6 +181,13 @@ def cast(
             │ 2.0 ┆ 7   │
             │ 3.0 ┆ 8   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            foo: float
+            bar: uint8
+            ----
+            foo: [[1,2,3]]
+            bar: [[6,7,8]]
         """
         return self.__class__(
             lambda plx: self._call(plx).cast(dtype),
@@ -299,9 +325,11 @@ def any(self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
             >>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
+            >>> df_pa = pa.table({"a": [True, False], "b": [True, True]})
 
             We define a dataframe-agnostic function:
 
@@ -309,7 +337,7 @@ def any(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").any())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                   a     b
@@ -323,6 +351,13 @@ def any(self) -> Self:
             ╞══════╪══════╡
             │ true ┆ true │
             └──────┴──────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[true]]
+            b: [[true]]
         """
         return self.__class__(lambda plx: self._call(plx).any())
 
@@ -333,9 +368,11 @@ def all(self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
             >>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
+            >>> df_pa = pa.table({"a": [True, False], "b": [True, True]})
 
             Let's define a dataframe-agnostic function:
 
@@ -343,7 +380,7 @@ def all(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").all())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a     b
@@ -357,6 +394,13 @@ def all(self) -> Self:
             ╞═══════╪══════╡
             │ false ┆ true │
             └───────┴──────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[false]]
+            b: [[true]]
         """
         return self.__class__(lambda plx: self._call(plx).all())
 
@@ -379,7 +423,7 @@ def mean(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").mean())
 
-            We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a    b
@@ -414,9 +458,11 @@ def std(self, *, ddof: int = 1) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
             >>> df_pl = pl.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+            >>> df_pa = pa.table({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
 
             Let's define a dataframe-agnostic function:
 
@@ -424,7 +470,7 @@ def std(self, *, ddof: int = 1) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").std(ddof=0))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                       a         b
@@ -438,6 +484,13 @@ def std(self, *, ddof: int = 1) -> Self:
             ╞══════════╪══════════╡
             │ 17.79513 ┆ 1.265789 │
             └──────────┴──────────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: double
+            b: double
+            ----
+            a: [[17.795130420052185]]
+            b: [[1.2657891697365016]]
 
         """
         return self.__class__(lambda plx: self._call(plx).std(ddof=ddof))
@@ -449,9 +502,11 @@ def sum(self) -> Expr:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [5, 10], "b": [50, 100]})
             >>> df_pl = pl.DataFrame({"a": [5, 10], "b": [50, 100]})
+            >>> df_pa = pa.table({"a": [5, 10], "b": [50, 100]})
 
             Let's define a dataframe-agnostic function:
 
@@ -459,7 +514,7 @@ def sum(self) -> Expr:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").sum())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                 a    b
@@ -473,6 +528,13 @@ def sum(self) -> Expr:
             ╞═════╪═════╡
             │ 15  ┆ 150 │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[15]]
+            b: [[150]]
         """
         return self.__class__(lambda plx: self._call(plx).sum())
 
@@ -483,9 +545,11 @@ def min(self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
             >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 3]})
+            >>> df_pa = pa.table({"a": [1, 2], "b": [4, 3]})
 
             Let's define a dataframe-agnostic function:
 
@@ -493,7 +557,7 @@ def min(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.min("a", "b"))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -507,6 +571,13 @@ def min(self) -> Self:
             ╞═════╪═════╡
             │ 1   ┆ 3   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[1]]
+            b: [[3]]
         """
         return self.__class__(lambda plx: self._call(plx).min())
 
@@ -517,9 +588,11 @@ def max(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
             >>> df_pl = pl.DataFrame({"a": [10, 20], "b": [50, 100]})
+            >>> df_pa = pa.table({"a": [10, 20], "b": [50, 100]})
 
             Let's define a dataframe-agnostic function:
 
@@ -527,7 +600,7 @@ def max(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.max("a", "b"))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                 a    b
@@ -541,6 +614,13 @@ def max(self) -> Self:
             ╞═════╪═════╡
             │ 20  ┆ 100 │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[20]]
+            b: [[100]]
         """
         return self.__class__(lambda plx: self._call(plx).max())
 
@@ -551,9 +631,11 @@ def count(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
             >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
+            >>> df_pa = pa.table({"a": [1, 2, 3], "b": [None, 4, 4]})
 
             Let's define a dataframe-agnostic function:
 
@@ -561,7 +643,7 @@ def count(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().count())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -575,6 +657,13 @@ def count(self) -> Self:
             ╞═════╪═════╡
             │ 3   ┆ 2   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[3]]
+            b: [[2]]
         """
         return self.__class__(lambda plx: self._call(plx).count())
 
@@ -585,9 +674,11 @@ def n_unique(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
             >>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+            >>> df_pa = pa.table({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
 
             Let's define a dataframe-agnostic function:
 
@@ -595,7 +686,7 @@ def n_unique(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").n_unique())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -609,6 +700,13 @@ def n_unique(self) -> Self:
             ╞═════╪═════╡
             │ 5   ┆ 3   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[5]]
+            b: [[3]]
         """
         return self.__class__(lambda plx: self._call(plx).n_unique())
 
@@ -619,9 +717,11 @@ def unique(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
             >>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+            >>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
 
             Let's define a dataframe-agnostic function:
 
@@ -629,7 +729,7 @@ def unique(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").unique())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -647,6 +747,13 @@ def unique(self) -> Self:
             │ 3   ┆ 4   │
             │ 5   ┆ 6   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[1,3,5]]
+            b: [[2,4,6]]
         """
         return self.__class__(lambda plx: self._call(plx).unique())
 
@@ -657,10 +764,12 @@ def abs(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> data = {"a": [1, -2], "b": [-3, 4]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -668,7 +777,7 @@ def abs(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").abs())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -684,6 +793,13 @@ def abs(self) -> Self:
             │ 1   ┆ 3   │
             │ 2   ┆ 4   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[1,2]]
+            b: [[3,4]]
         """
         return self.__class__(lambda plx: self._call(plx).abs())
 
@@ -694,9 +810,11 @@ def cum_sum(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
             >>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+            >>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
 
             Let's define a dataframe-agnostic function:
 
@@ -704,7 +822,7 @@ def cum_sum(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a", "b").cum_sum())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                 a   b
@@ -726,6 +844,13 @@ def cum_sum(self) -> Self:
             │ 10  ┆ 16  │
             │ 15  ┆ 22  │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[1,2,5,10,15]]
+            b: [[2,6,10,16,22]]
         """
         return self.__class__(lambda plx: self._call(plx).cum_sum())
 
@@ -745,9 +870,11 @@ def diff(self) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
             >>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+            >>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})
 
             Let's define a dataframe-agnostic function:
 
@@ -755,7 +882,7 @@ def diff(self) -> Self:
             ... def func(df):
             ...     return df.select(a_diff=nw.col("a").diff())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a_diff
@@ -777,6 +904,11 @@ def diff(self) -> Self:
             │ 2      │
             │ 0      │
             └────────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a_diff: int64
+            ----
+            a_diff: [[null,0,2,2,0]]
         """
         return self.__class__(lambda plx: self._call(plx).diff())
 
@@ -796,9 +928,11 @@ def shift(self, n: int) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
             >>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+            >>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})
 
             Let's define a dataframe-agnostic function:
 
@@ -806,7 +940,7 @@ def shift(self, n: int) -> Self:
             ... def func(df):
             ...     return df.select(a_shift=nw.col("a").shift(n=1))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a_shift
@@ -828,6 +962,11 @@ def shift(self, n: int) -> Self:
             │ 3       │
             │ 5       │
             └─────────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a_shift: int64
+            ----
+            a_shift: [[null,1,1,3,5]]
         """
         return self.__class__(lambda plx: self._call(plx).shift(n))
 
@@ -843,9 +982,10 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
-
+            >>> import pyarrow as pa
             >>> df_pd = pd.DataFrame({"a": [5, None, 1, 2]})
             >>> df_pl = pl.DataFrame({"a": [5, None, 1, 2]})
+            >>> df_pa = pa.table({"a": [5, None, 1, 2]})
 
             Let's define dataframe-agnostic functions:
 
@@ -858,7 +998,7 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
             ...     df = df.select(nw.col("a").sort(descending=True))
             ...     return nw.to_native(df)
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a
@@ -878,6 +1018,11 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
             │ 2    │
             │ 5    │
             └──────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[null,1,2,5]]
 
             >>> func_descend(df_pd)
                  a
@@ -897,6 +1042,11 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self:
             │ 2    │
             │ 1    │
             └──────┘
+            >>> func_descend(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[null,5,2,1]]
         """
         return self.__class__(
             lambda plx: self._call(plx).sort(descending=descending, nulls_last=nulls_last)
@@ -919,9 +1069,11 @@ def is_between(
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
             >>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
+            >>> df_pa = pa.table({"a": [1, 2, 3, 4, 5]})
 
             Let's define a dataframe-agnostic function:
 
@@ -929,7 +1081,7 @@ def is_between(
             ... def func(df):
             ...     return df.select(nw.col("a").is_between(2, 4, "right"))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a
@@ -951,6 +1103,11 @@ def is_between(
             │ true  │
             │ false │
             └───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            ----
+            a: [[false,false,true,true,false]]
         """
         return self.__class__(
             lambda plx: self._call(plx).is_between(lower_bound, upper_bound, closed)
@@ -966,9 +1123,11 @@ def is_in(self, other: Any) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [1, 2, 9, 10]})
             >>> df_pl = pl.DataFrame({"a": [1, 2, 9, 10]})
+            >>> df_pa = pa.table({"a": [1, 2, 9, 10]})
 
             Let's define a dataframe-agnostic function:
 
@@ -976,7 +1135,7 @@ def is_in(self, other: Any) -> Self:
             ... def func(df):
             ...     return df.with_columns(b=nw.col("a").is_in([1, 2]))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                 a      b
@@ -997,6 +1156,13 @@ def is_in(self, other: Any) -> Self:
             │ 9   ┆ false │
             │ 10  ┆ false │
             └─────┴───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: bool
+            ----
+            a: [[1,2,9,10]]
+            b: [[true,true,false,false]]
         """
         if isinstance(other, Iterable) and not isinstance(other, (str, bytes)):
             return self.__class__(lambda plx: self._call(plx).is_in(other))
@@ -1011,9 +1177,11 @@ def filter(self, *predicates: Any) -> Self:
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
             >>> df_pl = pl.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
+            >>> df_pa = pa.table({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
 
             Let's define a dataframe-agnostic function:
 
@@ -1024,7 +1192,7 @@ def filter(self, *predicates: Any) -> Self:
             ...         nw.col("b").filter(nw.col("b") < 13),
             ...     )
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a   b
@@ -1042,6 +1210,13 @@ def filter(self, *predicates: Any) -> Self:
             │ 6   ┆ 11  │
             │ 7   ┆ 12  │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[5,6,7]]
+            b: [[10,11,12]]
         """
         return self.__class__(
             lambda plx: self._call(plx).filter(
@@ -1060,6 +1235,7 @@ def is_null(self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame(
             ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
@@ -1067,6 +1243,9 @@ def is_null(self) -> Self:
             >>> df_pl = pl.DataFrame(
             ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
             ... )
+            >>> df_pa = pa.table(
+            ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+            ... )
 
             Let's define a dataframe-agnostic function:
 
@@ -1076,7 +1255,7 @@ def is_null(self) -> Self:
             ...         a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
             ...     )
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a    b  a_is_null  b_is_null
@@ -1099,6 +1278,18 @@ def is_null(self) -> Self:
             │ 3    ┆ 3.0 ┆ false     ┆ false     │
             │ 5    ┆ 5.0 ┆ false     ┆ false     │
             └──────┴─────┴───────────┴───────────┘
+
+            >>> func(df_pa)  # nan != null for pyarrow
+            pyarrow.Table
+            a: int64
+            b: double
+            a_is_null: bool
+            b_is_null: bool
+            ----
+            a: [[2,4,null,3,5]]
+            b: [[2,4,nan,3,5]]
+            a_is_null: [[false,false,true,false,false]]
+            b_is_null: [[false,false,false,false,false]]
         """
         return self.__class__(lambda plx: self._call(plx).is_null())
 
@@ -1109,10 +1300,12 @@ def arg_true(self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> data = {"a": [1, None, None, 2]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             We define a library agnostic function:
 
@@ -1120,7 +1313,7 @@ def arg_true(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").is_null().arg_true())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a
@@ -1136,6 +1329,11 @@ def arg_true(self) -> Self:
             │ 1   │
             │ 2   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[1,2]]
         """
         return self.__class__(lambda plx: self._call(plx).arg_true())
 
@@ -1150,6 +1348,7 @@ def fill_null(self, value: Any) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> df_pd = pd.DataFrame(
             ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
@@ -1157,6 +1356,9 @@ def fill_null(self, value: Any) -> Self:
             >>> df_pl = pl.DataFrame(
             ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
             ... )
+            >>> df_pa = pa.table(
+            ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+            ... )
 
             Let's define a dataframe-agnostic function:
 
@@ -1164,7 +1366,7 @@ def fill_null(self, value: Any) -> Self:
             ... def func(df):
             ...     return df.with_columns(nw.col("a", "b").fill_null(0))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a    b
@@ -1187,6 +1389,14 @@ def fill_null(self, value: Any) -> Self:
             │ 3   ┆ 3.0 │
             │ 5   ┆ 5.0 │
             └─────┴─────┘
+
+            >>> func(df_pa)  # nan != null for pyarrow
+            pyarrow.Table
+            a: int64
+            b: double
+            ----
+            a: [[2,4,0,3,5]]
+            b: [[2,4,nan,3,5]]
         """
         return self.__class__(lambda plx: self._call(plx).fill_null(value))
 
@@ -1203,9 +1413,11 @@ def drop_nulls(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
 
             >>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
             >>> df_pl = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+            >>> df_pa = pa.table({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
 
             Let's define a dataframe-agnostic function:
 
@@ -1213,7 +1425,7 @@ def drop_nulls(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").drop_nulls())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a
@@ -1234,6 +1446,11 @@ def drop_nulls(self) -> Self:
             │ 3.0 │
             │ 5.0 │
             └─────┘
+            >>> func(df_pa)  # nan != null for pyarrow
+            pyarrow.Table
+            a: double
+            ----
+            a: [[2,4,nan,3,5]]
         """
         return self.__class__(lambda plx: self._call(plx).drop_nulls())
 
@@ -1259,9 +1476,10 @@ def sample(
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
-
+            >>> import pyarrow as pa
             >>> df_pd = pd.DataFrame({"a": [1, 2, 3]})
             >>> df_pl = pl.DataFrame({"a": [1, 2, 3]})
+            >>> df_pa = pa.table({"a": [1, 2, 3]})
 
             Let's define a dataframe-agnostic function:
 
@@ -1269,7 +1487,7 @@ def sample(
             ... def func(df):
             ...     return df.select(nw.col("a").sample(fraction=1.0, with_replacement=True))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)  # doctest: +SKIP
                a
@@ -1287,6 +1505,11 @@ def sample(
             │ 3   │
             │ 3   │
             └─────┘
+            >>> func(df_pa)  # doctest: +SKIP
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[1,3,3]]
         """
         return self.__class__(
             lambda plx: self._call(plx).sample(
@@ -1307,9 +1530,11 @@ def over(self, *keys: str | Iterable[str]) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1317,7 +1542,7 @@ def over(self, *keys: str | Iterable[str]) -> Self:
             ... def func(df):
             ...     return df.with_columns(a_min_per_group=nw.col("a").min().over("b"))
 
-            We can then pass either pandas or Polars:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b  a_min_per_group
@@ -1335,6 +1560,15 @@ def over(self, *keys: str | Iterable[str]) -> Self:
             │ 2   ┆ 1   ┆ 1               │
             │ 3   ┆ 2   ┆ 3               │
             └─────┴─────┴─────────────────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            a_min_per_group: int64
+            ----
+            a: [[1,2,3]]
+            b: [[1,1,2]]
+            a_min_per_group: [[1,1,3]]
         """
         return self.__class__(lambda plx: self._call(plx).over(flatten(keys)))
 
@@ -1346,9 +1580,11 @@ def is_duplicated(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1356,7 +1592,7 @@ def is_duplicated(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().is_duplicated())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a      b
@@ -1376,6 +1612,13 @@ def is_duplicated(self) -> Self:
             │ false ┆ false │
             │ true  ┆ false │
             └───────┴───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[true,false,false,true]]
+            b: [[true,true,false,false]]
         """
         return self.__class__(lambda plx: self._call(plx).is_duplicated())
 
@@ -1387,9 +1630,11 @@ def is_unique(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1397,7 +1642,7 @@ def is_unique(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().is_unique())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a      b
@@ -1417,6 +1662,13 @@ def is_unique(self) -> Self:
             │ true  ┆ true  │
             │ false ┆ true  │
             └───────┴───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[false,true,true,false]]
+            b: [[false,false,true,true]]
         """
         return self.__class__(lambda plx: self._call(plx).is_unique())
 
@@ -1432,9 +1684,11 @@ def null_count(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1442,7 +1696,7 @@ def null_count(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().null_count())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a  b
@@ -1456,6 +1710,13 @@ def null_count(self) -> Self:
             ╞═════╪═════╡
             │ 1   ┆ 2   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            b: int64
+            ----
+            a: [[1]]
+            b: [[2]]
         """
         return self.__class__(lambda plx: self._call(plx).null_count())
 
@@ -1467,9 +1728,11 @@ def is_first_distinct(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1477,7 +1740,7 @@ def is_first_distinct(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().is_first_distinct())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a      b
@@ -1497,6 +1760,13 @@ def is_first_distinct(self) -> Self:
             │ true  ┆ true  │
             │ false ┆ true  │
             └───────┴───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[true,true,true,false]]
+            b: [[true,false,true,true]]
         """
         return self.__class__(lambda plx: self._call(plx).is_first_distinct())
 
@@ -1507,9 +1777,11 @@ def is_last_distinct(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1517,7 +1789,7 @@ def is_last_distinct(self) -> Self:
             ... def func(df):
             ...     return df.select(nw.all().is_last_distinct())
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                    a      b
@@ -1537,6 +1809,13 @@ def is_last_distinct(self) -> Self:
             │ true  ┆ true  │
             │ true  ┆ true  │
             └───────┴───────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: bool
+            b: bool
+            ----
+            a: [[false,true,true,true]]
+            b: [[false,true,true,true]]
         """
         return self.__class__(lambda plx: self._call(plx).is_last_distinct())
 
@@ -1548,23 +1827,23 @@ def quantile(
         r"""Get quantile value.
 
         Note:
-            * pandas and Polars may have implementation differences for a given interpolation method.
-            * [dask](https://docs.dask.org/en/stable/generated/dask.dataframe.Series.quantile.html) has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint'
+            - pandas and Polars may have implementation differences for a given interpolation method.
+            - [dask](https://docs.dask.org/en/stable/generated/dask.dataframe.Series.quantile.html) has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint'
             as interpolation method - use 'linear' which is closest to the native 'dask' - method.
 
         Arguments:
-            quantile : float
-                Quantile between 0.0 and 1.0.
-            interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear'}
-                Interpolation method.
+            quantile: Quantile between 0.0 and 1.0.
+            interpolation: Interpolation method.
 
         Examples:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": list(range(50)), "b": list(range(50, 100))}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1572,7 +1851,7 @@ def quantile(
             ... def func(df):
             ...     return df.select(nw.col("a", "b").quantile(0.5, interpolation="linear"))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                   a     b
@@ -1587,6 +1866,13 @@ def quantile(
             ╞══════╪══════╡
             │ 24.5 ┆ 74.5 │
             └──────┴──────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: double
+            b: double
+            ----
+            a: [[24.5]]
+            b: [[74.5]]
         """
         return self.__class__(
             lambda plx: self._call(plx).quantile(quantile, interpolation)
@@ -1603,9 +1889,11 @@ def head(self, n: int = 10) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": list(range(10))}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function that returns the first 3 rows:
 
@@ -1613,7 +1901,7 @@ def head(self, n: int = 10) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").head(3))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a
@@ -1631,6 +1919,11 @@ def head(self, n: int = 10) -> Self:
             │ 1   │
             │ 2   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[0,1,2]]
         """
         return self.__class__(lambda plx: self._call(plx).head(n))
 
@@ -1645,9 +1938,11 @@ def tail(self, n: int = 10) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": list(range(10))}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function that returns the last 3 rows:
 
@@ -1655,7 +1950,7 @@ def tail(self, n: int = 10) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").tail(3))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a
@@ -1673,6 +1968,11 @@ def tail(self, n: int = 10) -> Self:
             │ 8   │
             │ 9   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[7,8,9]]
         """
         return self.__class__(lambda plx: self._call(plx).tail(n))
 
@@ -1696,9 +1996,11 @@ def round(self, decimals: int = 0) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1.12345, 2.56789, 3.901234]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function that rounds to the first decimal:
 
@@ -1706,7 +2008,7 @@ def round(self, decimals: int = 0) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").round(1))
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                  a
@@ -1724,6 +2026,11 @@ def round(self, decimals: int = 0) -> Self:
             │ 2.6 │
             │ 3.9 │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: double
+            ----
+            a: [[1.1,2.6,3.9]]
         """
         return self.__class__(lambda plx: self._call(plx).round(decimals))
 
@@ -1737,9 +2044,11 @@ def len(self) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function that computes the len over different values of "b" column:
 
@@ -1750,7 +2059,7 @@ def len(self) -> Self:
             ...         nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
             ...     )
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a1  a2
@@ -1764,6 +2073,13 @@ def len(self) -> Self:
             ╞═════╪═════╡
             │ 2   ┆ 1   │
             └─────┴─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a1: int64
+            a2: int64
+            ----
+            a1: [[2]]
+            a2: [[1]]
         """
         return self.__class__(lambda plx: self._call(plx).len())
 
@@ -1779,9 +2095,11 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
             >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function in which gather every 2 rows,
             starting from a offset of 1:
@@ -1790,11 +2108,12 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").gather_every(n=2, offset=1))
 
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+
             >>> func(df_pd)
                a
             1  2
             3  4
-
             >>> func(df_pl)
             shape: (2, 1)
             ┌─────┐
@@ -1805,6 +2124,11 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
             │ 2   │
             │ 4   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[2,4]]
         """
         return self.__class__(
             lambda plx: self._call(plx).gather_every(n=n, offset=offset)
@@ -1827,11 +2151,13 @@ def clip(
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
 
             >>> s = [1, 2, 3]
             >>> df_pd = pd.DataFrame({"s": s})
             >>> df_pl = pl.DataFrame({"s": s})
+            >>> df_pa = pa.table({"s": s})
 
             We define a library agnostic function:
 
@@ -1839,7 +2165,7 @@ def clip(
             ... def func_lower(df):
             ...     return df.select(nw.col("s").clip(2))
 
-            We can then pass either pandas or Polars to `func_lower`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func_lower`:
 
             >>> func_lower(df_pd)
                s
@@ -1857,6 +2183,11 @@ def clip(
             │ 2   │
             │ 3   │
             └─────┘
+            >>> func_lower(df_pa)
+            pyarrow.Table
+            s: int64
+            ----
+            s: [[2,2,3]]
 
             We define another library agnostic function:
 
@@ -1864,7 +2195,7 @@ def clip(
             ... def func_upper(df):
             ...     return df.select(nw.col("s").clip(upper_bound=2))
 
-            We can then pass either pandas or Polars to `func_upper`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func_upper`:
 
             >>> func_upper(df_pd)
                s
@@ -1882,12 +2213,18 @@ def clip(
             │ 2   │
             │ 2   │
             └─────┘
+            >>> func_upper(df_pa)
+            pyarrow.Table
+            s: int64
+            ----
+            s: [[1,2,2]]
 
             We can have both at the same time
 
             >>> s = [-1, 1, -3, 3, -5, 5]
             >>> df_pd = pd.DataFrame({"s": s})
             >>> df_pl = pl.DataFrame({"s": s})
+            >>> df_pa = pa.table({"s": s})
 
             We define a library agnostic function:
 
@@ -1895,7 +2232,7 @@ def clip(
             ... def func(df):
             ...     return df.select(nw.col("s").clip(-1, 3))
 
-            We can pass either pandas or Polars to `func`:
+            We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                s
@@ -1919,6 +2256,11 @@ def clip(
             │ -1  │
             │ 3   │
             └─────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            s: int64
+            ----
+            s: [[-1,1,-1,3,-1,3]]
         """
         return self.__class__(lambda plx: self._call(plx).clip(lower_bound, upper_bound))
 
@@ -1930,6 +2272,7 @@ def mode(self: Self) -> Self:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
 
             >>> data = {
@@ -1938,6 +2281,7 @@ def mode(self: Self) -> Self:
             ... }
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             We define a library agnostic function:
 
@@ -1945,7 +2289,7 @@ def mode(self: Self) -> Self:
             ... def func(df):
             ...     return df.select(nw.col("a").mode()).sort("a")
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
             >>> func(df_pd)
                a
@@ -1960,31 +2304,40 @@ def mode(self: Self) -> Self:
             ╞═════╡
             │ 1   │
             └─────┘
+
+            >>> func(df_pa)
+            pyarrow.Table
+            a: int64
+            ----
+            a: [[1]]
         """
         return self.__class__(lambda plx: self._call(plx).mode())
 
     @property
-    def str(self: Self) -> ExprStringNamespace:
+    def str(self: Self) -> ExprStringNamespace[Self]:
         return ExprStringNamespace(self)
 
     @property
-    def dt(self: Self) -> ExprDateTimeNamespace:
+    def dt(self: Self) -> ExprDateTimeNamespace[Self]:
         return ExprDateTimeNamespace(self)
 
     @property
-    def cat(self: Self) -> ExprCatNamespace:
+    def cat(self: Self) -> ExprCatNamespace[Self]:
         return ExprCatNamespace(self)
 
     @property
-    def name(self: Self) -> ExprNameNamespace:
+    def name(self: Self) -> ExprNameNamespace[Self]:
         return ExprNameNamespace(self)
 
 
-class ExprCatNamespace:
-    def __init__(self, expr: Expr) -> None:
+T = TypeVar("T", bound=Expr)
+
+
+class ExprCatNamespace(Generic[T]):
+    def __init__(self: Self, expr: T) -> None:
         self._expr = expr
 
-    def get_categories(self) -> Expr:
+    def get_categories(self: Self) -> T:
         """
         Get unique categories from column.
 
@@ -2027,11 +2380,11 @@ def get_categories(self) -> Expr:
         )
 
 
-class ExprStringNamespace:
-    def __init__(self, expr: Expr) -> None:
+class ExprStringNamespace(Generic[T]):
+    def __init__(self: Self, expr: T) -> None:
         self._expr = expr
 
-    def len_chars(self) -> Expr:
+    def len_chars(self: Self) -> T:
         r"""
         Return the length of each string as the number of characters.
 
@@ -2077,7 +2430,7 @@ def len_chars(self) -> Expr:
 
     def replace(
         self, pattern: str, value: str, *, literal: bool = False, n: int = 1
-    ) -> Expr:
+    ) -> T:
         r"""
         Replace first matching regex/literal substring with a new string value.
 
@@ -2117,7 +2470,7 @@ def replace(
             )
         )
 
-    def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> Expr:
+    def replace_all(self: Self, pattern: str, value: str, *, literal: bool = False) -> T:
         r"""
         Replace all matching regex/literal substring with a new string value.
 
@@ -2156,7 +2509,7 @@ def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> Exp
             )
         )
 
-    def strip_chars(self, characters: str | None = None) -> Expr:
+    def strip_chars(self: Self, characters: str | None = None) -> T:
         r"""
         Remove leading and trailing characters.
 
@@ -2190,7 +2543,7 @@ def strip_chars(self, characters: str | None = None) -> Expr:
             lambda plx: self._expr._call(plx).str.strip_chars(characters)
         )
 
-    def starts_with(self, prefix: str) -> Expr:
+    def starts_with(self: Self, prefix: str) -> T:
         r"""
         Check if string values start with a substring.
 
@@ -2235,7 +2588,7 @@ def starts_with(self, prefix: str) -> Expr:
             lambda plx: self._expr._call(plx).str.starts_with(prefix)
         )
 
-    def ends_with(self, suffix: str) -> Expr:
+    def ends_with(self: Self, suffix: str) -> T:
         r"""
         Check if string values end with a substring.
 
@@ -2280,7 +2633,7 @@ def ends_with(self, suffix: str) -> Expr:
             lambda plx: self._expr._call(plx).str.ends_with(suffix)
         )
 
-    def contains(self, pattern: str, *, literal: bool = False) -> Expr:
+    def contains(self: Self, pattern: str, *, literal: bool = False) -> T:
         r"""
         Check if string contains a substring that matches a pattern.
 
@@ -2336,7 +2689,7 @@ def contains(self, pattern: str, *, literal: bool = False) -> Expr:
             lambda plx: self._expr._call(plx).str.contains(pattern, literal=literal)
         )
 
-    def slice(self, offset: int, length: int | None = None) -> Expr:
+    def slice(self: Self, offset: int, length: int | None = None) -> T:
         r"""
         Create subslices of the string values of an expression.
 
@@ -2411,7 +2764,7 @@ def slice(self, offset: int, length: int | None = None) -> Expr:
             lambda plx: self._expr._call(plx).str.slice(offset=offset, length=length)
         )
 
-    def head(self, n: int = 5) -> Expr:
+    def head(self: Self, n: int = 5) -> T:
         r"""
         Take the first n elements of each string.
 
@@ -2459,7 +2812,7 @@ def head(self, n: int = 5) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).str.slice(0, n))
 
-    def tail(self, n: int = 5) -> Expr:
+    def tail(self: Self, n: int = 5) -> T:
         r"""
         Take the last n elements of each string.
 
@@ -2507,7 +2860,7 @@ def tail(self, n: int = 5) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).str.slice(-n))
 
-    def to_datetime(self: Self, format: str | None = None) -> Expr:  # noqa: A002
+    def to_datetime(self: Self, format: str | None = None) -> T:  # noqa: A002
         """
         Convert to Datetime dtype.
 
@@ -2567,7 +2920,7 @@ def to_datetime(self: Self, format: str | None = None) -> Expr:  # noqa: A002
             lambda plx: self._expr._call(plx).str.to_datetime(format=format)
         )
 
-    def to_uppercase(self) -> Expr:
+    def to_uppercase(self: Self) -> T:
         r"""
         Transform string to uppercase variant.
 
@@ -2613,7 +2966,7 @@ def to_uppercase(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).str.to_uppercase())
 
-    def to_lowercase(self) -> Expr:
+    def to_lowercase(self: Self) -> T:
         r"""
         Transform string to lowercase variant.
 
@@ -2654,11 +3007,11 @@ def to_lowercase(self) -> Expr:
         return self._expr.__class__(lambda plx: self._expr._call(plx).str.to_lowercase())
 
 
-class ExprDateTimeNamespace:
-    def __init__(self, expr: Expr) -> None:
+class ExprDateTimeNamespace(Generic[T]):
+    def __init__(self: Self, expr: T) -> None:
         self._expr = expr
 
-    def date(self) -> Expr:
+    def date(self: Self) -> T:
         """
         Extract the date from underlying DateTime representation.
 
@@ -2700,7 +3053,7 @@ def date(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.date())
 
-    def year(self) -> Expr:
+    def year(self: Self) -> T:
         """
         Extract year from underlying DateTime representation.
 
@@ -2748,7 +3101,7 @@ def year(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.year())
 
-    def month(self) -> Expr:
+    def month(self: Self) -> T:
         """
         Extract month from underlying DateTime representation.
 
@@ -2799,7 +3152,7 @@ def month(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.month())
 
-    def day(self) -> Expr:
+    def day(self: Self) -> T:
         """
         Extract day from underlying DateTime representation.
 
@@ -2851,7 +3204,7 @@ def day(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.day())
 
-    def hour(self) -> Expr:
+    def hour(self: Self) -> T:
         """
         Extract hour from underlying DateTime representation.
 
@@ -2899,7 +3252,7 @@ def hour(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.hour())
 
-    def minute(self) -> Expr:
+    def minute(self: Self) -> T:
         """
         Extract minutes from underlying DateTime representation.
 
@@ -2950,7 +3303,7 @@ def minute(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.minute())
 
-    def second(self) -> Expr:
+    def second(self: Self) -> T:
         """
         Extract seconds from underlying DateTime representation.
 
@@ -3000,7 +3353,7 @@ def second(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.second())
 
-    def millisecond(self) -> Expr:
+    def millisecond(self: Self) -> T:
         """
         Extract milliseconds from underlying DateTime representation.
 
@@ -3051,7 +3404,7 @@ def millisecond(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.millisecond())
 
-    def microsecond(self) -> Expr:
+    def microsecond(self: Self) -> T:
         """
         Extract microseconds from underlying DateTime representation.
 
@@ -3102,7 +3455,7 @@ def microsecond(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.microsecond())
 
-    def nanosecond(self) -> Expr:
+    def nanosecond(self: Self) -> T:
         """
         Extract Nanoseconds from underlying DateTime representation
 
@@ -3153,7 +3506,7 @@ def nanosecond(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.nanosecond())
 
-    def ordinal_day(self) -> Expr:
+    def ordinal_day(self: Self) -> T:
         """
         Get ordinal day.
 
@@ -3191,7 +3544,7 @@ def ordinal_day(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.ordinal_day())
 
-    def total_minutes(self) -> Expr:
+    def total_minutes(self: Self) -> T:
         """
         Get total minutes.
 
@@ -3234,7 +3587,7 @@ def total_minutes(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.total_minutes())
 
-    def total_seconds(self) -> Expr:
+    def total_seconds(self: Self) -> T:
         """
         Get total seconds.
 
@@ -3277,7 +3630,7 @@ def total_seconds(self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).dt.total_seconds())
 
-    def total_milliseconds(self) -> Expr:
+    def total_milliseconds(self: Self) -> T:
         """
         Get total milliseconds.
 
@@ -3329,7 +3682,7 @@ def total_milliseconds(self) -> Expr:
             lambda plx: self._expr._call(plx).dt.total_milliseconds()
         )
 
-    def total_microseconds(self) -> Expr:
+    def total_microseconds(self: Self) -> T:
         """
         Get total microseconds.
 
@@ -3381,7 +3734,7 @@ def total_microseconds(self) -> Expr:
             lambda plx: self._expr._call(plx).dt.total_microseconds()
         )
 
-    def total_nanoseconds(self) -> Expr:
+    def total_nanoseconds(self: Self) -> T:
         """
         Get total nanoseconds.
 
@@ -3430,7 +3783,7 @@ def total_nanoseconds(self) -> Expr:
             lambda plx: self._expr._call(plx).dt.total_nanoseconds()
         )
 
-    def to_string(self, format: str) -> Expr:  # noqa: A002
+    def to_string(self: Self, format: str) -> T:  # noqa: A002
         """
         Convert a Date/Time/Datetime column into a String column with the given format.
 
@@ -3508,7 +3861,7 @@ def to_string(self, format: str) -> Expr:  # noqa: A002
             lambda plx: self._expr._call(plx).dt.to_string(format)
         )
 
-    def replace_time_zone(self, time_zone: str | None) -> Expr:
+    def replace_time_zone(self: Self, time_zone: str | None) -> T:
         """
         Replace time zone.
 
@@ -3563,7 +3916,7 @@ def replace_time_zone(self, time_zone: str | None) -> Expr:
             lambda plx: self._expr._call(plx).dt.replace_time_zone(time_zone)
         )
 
-    def convert_time_zone(self, time_zone: str) -> Expr:
+    def convert_time_zone(self: Self, time_zone: str) -> T:
         """
         Convert to a new time zone.
 
@@ -3621,12 +3974,78 @@ def convert_time_zone(self, time_zone: str) -> Expr:
             lambda plx: self._expr._call(plx).dt.convert_time_zone(time_zone)
         )
 
+    def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> T:
+        """
+        Return a timestamp in the given time unit.
+
+        Arguments:
+            time_unit: {'ns', 'us', 'ms'}
+                Time unit.
+
+        Examples:
+            >>> from datetime import date
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> data = {"date": [date(2001, 1, 1), None, date(2001, 1, 3)]}
+            >>> df_pd = pd.DataFrame(data, dtype="datetime64[ns]")
+            >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
+
+            Let's define a dataframe-agnostic function:
+
+            >>> @nw.narwhalify
+            ... def func(df):
+            ...     return df.with_columns(
+            ...         nw.col("date").dt.timestamp().alias("timestamp_us"),
+            ...         nw.col("date").dt.timestamp("ms").alias("timestamp_ms"),
+            ...     )
+
+            We can then pass pandas / PyArrow / Polars / any other supported library:
+
+            >>> func(df_pd)
+                    date  timestamp_us  timestamp_ms
+            0 2001-01-01  9.783072e+14  9.783072e+11
+            1        NaT           NaN           NaN
+            2 2001-01-03  9.784800e+14  9.784800e+11
+            >>> func(df_pl)
+            shape: (3, 3)
+            ┌────────────┬─────────────────┬──────────────┐
+            │ date       ┆ timestamp_us    ┆ timestamp_ms │
+            │ ---        ┆ ---             ┆ ---          │
+            │ date       ┆ i64             ┆ i64          │
+            ╞════════════╪═════════════════╪══════════════╡
+            │ 2001-01-01 ┆ 978307200000000 ┆ 978307200000 │
+            │ null       ┆ null            ┆ null         │
+            │ 2001-01-03 ┆ 978480000000000 ┆ 978480000000 │
+            └────────────┴─────────────────┴──────────────┘
+            >>> func(df_pa)
+            pyarrow.Table
+            date: date32[day]
+            timestamp_us: int64
+            timestamp_ms: int64
+            ----
+            date: [[2001-01-01,null,2001-01-03]]
+            timestamp_us: [[978307200000000,null,978480000000000]]
+            timestamp_ms: [[978307200000,null,978480000000]]
+        """
+        if time_unit not in {"ns", "us", "ms"}:
+            msg = (
+                "invalid `time_unit`"
+                f"\n\nExpected one of {{'ns', 'us', 'ms'}}, got {time_unit!r}."
+            )
+            raise ValueError(msg)
+        return self._expr.__class__(
+            lambda plx: self._expr._call(plx).dt.timestamp(time_unit)
+        )
+
 
-class ExprNameNamespace:
-    def __init__(self: Self, expr: Expr) -> None:
+class ExprNameNamespace(Generic[T]):
+    def __init__(self: Self, expr: T) -> None:
         self._expr = expr
 
-    def keep(self: Self) -> Expr:
+    def keep(self: Self) -> T:
         r"""
         Keep the original root name of the expression.
 
@@ -3658,7 +4077,7 @@ def keep(self: Self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).name.keep())
 
-    def map(self: Self, function: Callable[[str], str]) -> Expr:
+    def map(self: Self, function: Callable[[str], str]) -> T:
         r"""
         Rename the output of an expression by mapping a function over the root name.
 
@@ -3694,7 +4113,7 @@ def map(self: Self, function: Callable[[str], str]) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).name.map(function))
 
-    def prefix(self: Self, prefix: str) -> Expr:
+    def prefix(self: Self, prefix: str) -> T:
         r"""
         Add a prefix to the root column name of the expression.
 
@@ -3730,7 +4149,7 @@ def prefix(self: Self, prefix: str) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).name.prefix(prefix))
 
-    def suffix(self: Self, suffix: str) -> Expr:
+    def suffix(self: Self, suffix: str) -> T:
         r"""
         Add a suffix to the root column name of the expression.
 
@@ -3765,7 +4184,7 @@ def suffix(self: Self, suffix: str) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).name.suffix(suffix))
 
-    def to_lowercase(self: Self) -> Expr:
+    def to_lowercase(self: Self) -> T:
         r"""
         Make the root column name lowercase.
 
@@ -3797,7 +4216,7 @@ def to_lowercase(self: Self) -> Expr:
         """
         return self._expr.__class__(lambda plx: self._expr._call(plx).name.to_lowercase())
 
-    def to_uppercase(self: Self) -> Expr:
+    def to_uppercase(self: Self) -> T:
         r"""
         Make the root column name uppercase.
 
@@ -3839,9 +4258,11 @@ def col(*names: str | Iterable[str]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [3, 4]})
 
         We define a dataframe-agnostic function:
 
@@ -3849,7 +4270,7 @@ def col(*names: str | Iterable[str]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.col("a") * nw.col("b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -3865,6 +4286,11 @@ def col(*names: str | Iterable[str]) -> Expr:
         │ 3   │
         │ 8   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[3,8]]
     """
 
     def func(plx: Any) -> Any:
@@ -3899,7 +4325,7 @@ def nth(*indices: int | Sequence[int]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.nth(0) * 2)
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -3936,9 +4362,11 @@ def all_() -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
         >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        >>> df_pa = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})
 
         Let's define a dataframe-agnostic function:
 
@@ -3946,7 +4374,7 @@ def all_() -> Expr:
         ... def func(df):
         ...     return df.select(nw.all() * 2)
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a   b
@@ -3964,6 +4392,13 @@ def all_() -> Expr:
         │ 4   ┆ 10  │
         │ 6   ┆ 12  │
         └─────┴─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[2,4,6]]
+        b: [[8,10,12]]
     """
     return Expr(lambda plx: plx.all())
 
@@ -3976,9 +4411,11 @@ def len_() -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -3986,7 +4423,7 @@ def len_() -> Expr:
         ... def func(df):
         ...     return df.select(nw.len())
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            len
@@ -4000,6 +4437,11 @@ def len_() -> Expr:
         ╞═════╡
         │ 2   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        len: int64
+        ----
+        len: [[2]]
     """
 
     def func(plx: Any) -> Any:
@@ -4021,9 +4463,11 @@ def sum(*columns: str) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2]})
         >>> df_pd = pd.DataFrame({"a": [1, 2]})
+        >>> df_pa = pa.table({"a": [1, 2]})
 
         We define a dataframe-agnostic function:
 
@@ -4031,7 +4475,7 @@ def sum(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.sum("a"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -4045,6 +4489,11 @@ def sum(*columns: str) -> Expr:
         ╞═════╡
         │ 3   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[3]]
     """
 
     return Expr(lambda plx: plx.sum(*columns))
@@ -4112,9 +4561,11 @@ def min(*columns: str) -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -4122,7 +4573,7 @@ def min(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.min("b"))
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            b
@@ -4136,6 +4587,11 @@ def min(*columns: str) -> Expr:
         ╞═════╡
         │ 5   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        b: int64
+        ----
+        b: [[5]]
     """
     return Expr(lambda plx: plx.min(*columns))
 
@@ -4153,9 +4609,11 @@ def max(*columns: str) -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -4163,7 +4621,7 @@ def max(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.max("a"))
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -4177,6 +4635,11 @@ def max(*columns: str) -> Expr:
         ╞═════╡
         │ 2   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[2]]
     """
     return Expr(lambda plx: plx.max(*columns))
 
@@ -4195,10 +4658,12 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> data = {"a": [1, 2, 3], "b": [5, 10, None]}
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -4206,7 +4671,7 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.sum_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
               a
@@ -4224,6 +4689,11 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ 12  │
         │ 3   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[6,12,3]]
     """
     if not exprs:
         msg = "At least one expression must be passed to `sum_horizontal`"
@@ -4264,7 +4734,7 @@ def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.min_horizontal("a", "b"))
 
-        We can then pass either pandas, polars or pyarrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
              a
@@ -4327,7 +4797,7 @@ def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.max_horizontal("a", "b"))
 
-        We can then pass either pandas, polars or pyarrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
              a
@@ -4398,9 +4868,11 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
         >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
+        >>> df_pa = pa.table({"a": [1, 2, 3], "b": [5, 10, 15]})
 
         We define a dataframe-agnostic function:
 
@@ -4410,7 +4882,7 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
         ...         nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")
         ...     )
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a   b  a_when
@@ -4428,6 +4900,15 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
         │ 2   ┆ 10  ┆ 5      │
         │ 3   ┆ 15  ┆ 6      │
         └─────┴─────┴────────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        a_when: int64
+        ----
+        a: [[1,2,3]]
+        b: [[5,10,15]]
+        a_when: [[5,5,6]]
     """
     return When(*predicates)
 
@@ -4445,6 +4926,7 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> data = {
         ...     "a": [False, False, True, True, False, None],
@@ -4452,6 +4934,7 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -4459,7 +4942,7 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select("a", "b", all=nw.all_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
                a      b    all
@@ -4484,6 +4967,16 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ false ┆ null  ┆ false │
         │ null  ┆ null  ┆ null  │
         └───────┴───────┴───────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: bool
+        b: bool
+        all: bool
+        ----
+        a: [[false,false,true,true,false,null]]
+        b: [[false,true,true,null,null,null]]
+        all: [[false,false,true,null,false,null]]
     """
     if not exprs:
         msg = "At least one expression must be passed to `all_horizontal`"
@@ -4506,9 +4999,11 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2]})
         >>> df_pd = pd.DataFrame({"a": [1, 2]})
+        >>> df_pa = pa.table({"a": [1, 2]})
 
         We define a dataframe-agnostic function:
 
@@ -4516,7 +5011,7 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
         ... def func(df):
         ...     return df.with_columns(nw.lit(3).alias("b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a  b
@@ -4532,7 +5027,13 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
         │ 1   ┆ 3   │
         │ 2   ┆ 3   │
         └─────┴─────┘
-
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[1,2]]
+        b: [[3,3]]
     """
     if is_numpy_array(value):
         msg = (
@@ -4561,6 +5062,7 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> data = {
         ...     "a": [False, False, True, True, False, None],
@@ -4568,6 +5070,7 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -4575,7 +5078,7 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select("a", "b", any=nw.any_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
                a      b    any
@@ -4600,6 +5103,16 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ false ┆ null  ┆ null  │
         │ null  ┆ null  ┆ null  │
         └───────┴───────┴───────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: bool
+        b: bool
+        any: bool
+        ----
+        a: [[false,false,true,true,false,null]]
+        b: [[false,true,true,null,null,null]]
+        any: [[false,true,true,true,null,null]]
     """
     if not exprs:
         msg = "At least one expression must be passed to `any_horizontal`"
@@ -4622,6 +5135,7 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> data = {
         ...     "a": [1, 8, 3],
@@ -4630,6 +5144,7 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function that computes the horizontal mean of "a"
         and "b" columns:
@@ -4638,13 +5153,14 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.mean_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
              a
         0  2.5
         1  6.5
         2  3.0
+
         >>> func(df_pl)
         shape: (3, 1)
         ┌─────┐
@@ -4656,6 +5172,12 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ 6.5 │
         │ 3.0 │
         └─────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: double
+        ----
+        a: [[2.5,6.5,3]]
     """
     if not exprs:
         msg = "At least one expression must be passed to `mean_horizontal`"
@@ -4714,7 +5236,7 @@ def concat_str(
         ...         ).alias("full_sentence")
         ...     )
 
-        We can then pass either pandas, Polars or PyArrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
           full_sentence
diff --git a/narwhals/functions.py b/narwhals/functions.py
index 395da97ca7..b8dfffbeb4 100644
--- a/narwhals/functions.py
+++ b/narwhals/functions.py
@@ -294,6 +294,7 @@ def from_dict(
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
+        >>> import pyarrow as pa
         >>> import narwhals as nw
         >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
 
@@ -305,7 +306,7 @@ def from_dict(
         ...     native_namespace = nw.get_native_namespace(df)
         ...     return nw.from_dict(new_data, native_namespace=native_namespace)
 
-        Let's see what happens when passing pandas / Polars input:
+        Let's see what happens when passing Pandas, Polars or PyArrow input:
 
         >>> func(pd.DataFrame(data))
            c  d
@@ -321,6 +322,13 @@ def from_dict(
         │ 5   ┆ 1   │
         │ 2   ┆ 4   │
         └─────┴─────┘
+        >>> func(pa.table(data))
+        pyarrow.Table
+        c: int64
+        d: int64
+        ----
+        c: [[5,2]]
+        d: [[1,4]]
     """
     from narwhals import dtypes
 
diff --git a/narwhals/group_by.py b/narwhals/group_by.py
index 797442e3c8..9ec14c4d75 100644
--- a/narwhals/group_by.py
+++ b/narwhals/group_by.py
@@ -20,10 +20,12 @@
 
 
 class GroupBy(Generic[DataFrameT]):
-    def __init__(self, df: DataFrameT, *keys: str) -> None:
+    def __init__(self, df: DataFrameT, *keys: str, drop_null_keys: bool) -> None:
         self._df = cast(DataFrame[Any], df)
         self._keys = keys
-        self._grouped = self._df._compliant_frame.group_by(*self._keys)
+        self._grouped = self._df._compliant_frame.group_by(
+            *self._keys, drop_null_keys=drop_null_keys
+        )
 
     def agg(
         self, *aggs: IntoExpr | Iterable[IntoExpr], **named_aggs: IntoExpr
@@ -119,10 +121,12 @@ def __iter__(self) -> Iterator[tuple[Any, DataFrameT]]:
 
 
 class LazyGroupBy(Generic[LazyFrameT]):
-    def __init__(self, df: LazyFrameT, *keys: str) -> None:
+    def __init__(self, df: LazyFrameT, *keys: str, drop_null_keys: bool) -> None:
         self._df = cast(LazyFrame[Any], df)
         self._keys = keys
-        self._grouped = self._df._compliant_frame.group_by(*self._keys)
+        self._grouped = self._df._compliant_frame.group_by(
+            *self._keys, drop_null_keys=drop_null_keys
+        )
 
     def agg(
         self, *aggs: IntoExpr | Iterable[IntoExpr], **named_aggs: IntoExpr
diff --git a/narwhals/series.py b/narwhals/series.py
index 36ecf50fff..6f5223202e 100644
--- a/narwhals/series.py
+++ b/narwhals/series.py
@@ -3,9 +3,11 @@
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
+from typing import Generic
 from typing import Iterator
 from typing import Literal
 from typing import Sequence
+from typing import TypeVar
 from typing import overload
 
 from narwhals.utils import parse_version
@@ -40,7 +42,7 @@ def _dataframe(self) -> type[DataFrame[Any]]:
         return DataFrame
 
     def __init__(
-        self,
+        self: Self,
         series: Any,
         *,
         level: Literal["full", "interchange"],
@@ -52,16 +54,16 @@ def __init__(
             msg = f"Expected Polars Series or an object which implements `__narwhals_series__`, got: {type(series)}."
             raise AssertionError(msg)
 
-    def __array__(self, dtype: Any = None, copy: bool | None = None) -> np.ndarray:
+    def __array__(self: Self, dtype: Any = None, copy: bool | None = None) -> np.ndarray:
         return self._compliant_series.__array__(dtype=dtype, copy=copy)
 
     @overload
-    def __getitem__(self, idx: int) -> Any: ...
+    def __getitem__(self: Self, idx: int) -> Any: ...
 
     @overload
-    def __getitem__(self, idx: slice | Sequence[int]) -> Self: ...
+    def __getitem__(self: Self, idx: slice | Sequence[int]) -> Self: ...
 
-    def __getitem__(self, idx: int | slice | Sequence[int]) -> Any | Self:
+    def __getitem__(self: Self, idx: int | slice | Sequence[int]) -> Any | Self:
         if isinstance(idx, int):
             return self._compliant_series[idx]
         return self._from_compliant_series(self._compliant_series[idx])
@@ -1201,6 +1203,25 @@ def alias(self, name: str) -> Self:
         """
         Rename the Series.
 
+        Notes:
+            This method is very cheap, but does not guarantee that data
+            will be copied. For example:
+
+            ```python
+            s1: nw.Series
+            s2 = s1.alias("foo")
+            arr = s2.to_numpy()
+            arr[0] = 999
+            ```
+
+            may (depending on the backend, and on the version) result in
+            `s1`'s data being modified. We recommend:
+
+                - if you need to alias an object and don't need the original
+                  one around any more, just use `alias` without worrying about it.
+                - if you were expecting `alias` to copy data, then explicily call
+                  `.clone` before calling `alias`.
+
         Arguments:
             name: The new name.
 
@@ -1253,6 +1274,25 @@ def rename(self, name: str) -> Self:
 
         Alias for `Series.alias()`.
 
+        Notes:
+            This method is very cheap, but does not guarantee that data
+            will be copied. For example:
+
+            ```python
+            s1: nw.Series
+            s2 = s1.rename("foo")
+            arr = s2.to_numpy()
+            arr[0] = 999
+            ```
+
+            may (depending on the backend, and on the version) result in
+            `s1`'s data being modified. We recommend:
+
+                - if you need to rename an object and don't need the original
+                  one around any more, just use `rename` without worrying about it.
+                - if you were expecting `rename` to copy data, then explicily call
+                  `.clone` before calling `rename`.
+
         Arguments:
             name: The new name.
 
@@ -2058,10 +2098,8 @@ def quantile(
             pandas and Polars may have implementation differences for a given interpolation method.
 
         Arguments:
-            quantile : float
-                Quantile between 0.0 and 1.0.
-            interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear'}
-                Interpolation method.
+            quantile: Quantile between 0.0 and 1.0.
+            interpolation: Interpolation method.
 
         Examples:
             >>> import narwhals as nw
@@ -2491,23 +2529,26 @@ def __iter__(self: Self) -> Iterator[Any]:
         yield from self._compliant_series.__iter__()
 
     @property
-    def str(self) -> SeriesStringNamespace:
+    def str(self: Self) -> SeriesStringNamespace[Self]:
         return SeriesStringNamespace(self)
 
     @property
-    def dt(self) -> SeriesDateTimeNamespace:
+    def dt(self: Self) -> SeriesDateTimeNamespace[Self]:
         return SeriesDateTimeNamespace(self)
 
     @property
-    def cat(self) -> SeriesCatNamespace:
+    def cat(self: Self) -> SeriesCatNamespace[Self]:
         return SeriesCatNamespace(self)
 
 
-class SeriesCatNamespace:
-    def __init__(self, series: Series) -> None:
+T = TypeVar("T", bound=Series)
+
+
+class SeriesCatNamespace(Generic[T]):
+    def __init__(self: Self, series: T) -> None:
         self._narwhals_series = series
 
-    def get_categories(self) -> Series:
+    def get_categories(self: Self) -> T:
         """
         Get unique categories from column.
 
@@ -2547,11 +2588,11 @@ def get_categories(self) -> Series:
         )
 
 
-class SeriesStringNamespace:
-    def __init__(self, series: Series) -> None:
+class SeriesStringNamespace(Generic[T]):
+    def __init__(self: Self, series: T) -> None:
         self._narwhals_series = series
 
-    def len_chars(self) -> Series:
+    def len_chars(self: Self) -> T:
         r"""
         Return the length of each string as the number of characters.
 
@@ -2595,8 +2636,8 @@ def len_chars(self) -> Series:
         )
 
     def replace(
-        self, pattern: str, value: str, *, literal: bool = False, n: int = 1
-    ) -> Series:
+        self: Self, pattern: str, value: str, *, literal: bool = False, n: int = 1
+    ) -> T:
         r"""
         Replace first matching regex/literal substring with a new string value.
 
@@ -2635,7 +2676,7 @@ def replace(
             )
         )
 
-    def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> Series:
+    def replace_all(self: Self, pattern: str, value: str, *, literal: bool = False) -> T:
         r"""
         Replace all matching regex/literal substring with a new string value.
 
@@ -2673,7 +2714,7 @@ def replace_all(self, pattern: str, value: str, *, literal: bool = False) -> Ser
             )
         )
 
-    def strip_chars(self, characters: str | None = None) -> Series:
+    def strip_chars(self: Self, characters: str | None = None) -> T:
         r"""
         Remove leading and trailing characters.
 
@@ -2707,7 +2748,7 @@ def strip_chars(self, characters: str | None = None) -> Series:
             self._narwhals_series._compliant_series.str.strip_chars(characters)
         )
 
-    def starts_with(self, prefix: str) -> Series:
+    def starts_with(self: Self, prefix: str) -> T:
         r"""
         Check if string values start with a substring.
 
@@ -2749,7 +2790,7 @@ def starts_with(self, prefix: str) -> Series:
             self._narwhals_series._compliant_series.str.starts_with(prefix)
         )
 
-    def ends_with(self, suffix: str) -> Series:
+    def ends_with(self: Self, suffix: str) -> T:
         r"""
         Check if string values end with a substring.
 
@@ -2791,7 +2832,7 @@ def ends_with(self, suffix: str) -> Series:
             self._narwhals_series._compliant_series.str.ends_with(suffix)
         )
 
-    def contains(self, pattern: str, *, literal: bool = False) -> Series:
+    def contains(self: Self, pattern: str, *, literal: bool = False) -> T:
         r"""
         Check if string contains a substring that matches a pattern.
 
@@ -2839,7 +2880,7 @@ def contains(self, pattern: str, *, literal: bool = False) -> Series:
             self._narwhals_series._compliant_series.str.contains(pattern, literal=literal)
         )
 
-    def slice(self, offset: int, length: int | None = None) -> Series:
+    def slice(self: Self, offset: int, length: int | None = None) -> T:
         r"""
         Create subslices of the string values of a Series.
 
@@ -2910,7 +2951,7 @@ def slice(self, offset: int, length: int | None = None) -> Series:
             )
         )
 
-    def head(self, n: int = 5) -> Series:
+    def head(self: Self, n: int = 5) -> T:
         r"""
         Take the first n elements of each string.
 
@@ -2958,7 +2999,7 @@ def head(self, n: int = 5) -> Series:
             self._narwhals_series._compliant_series.str.slice(0, n)
         )
 
-    def tail(self, n: int = 5) -> Series:
+    def tail(self: Self, n: int = 5) -> T:
         r"""
         Take the last n elements of each string.
 
@@ -3006,7 +3047,7 @@ def tail(self, n: int = 5) -> Series:
             self._narwhals_series._compliant_series.str.slice(-n)
         )
 
-    def to_uppercase(self) -> Series:
+    def to_uppercase(self) -> T:
         r"""
         Transform string to uppercase variant.
 
@@ -3054,7 +3095,7 @@ def to_uppercase(self) -> Series:
             self._narwhals_series._compliant_series.str.to_uppercase()
         )
 
-    def to_lowercase(self) -> Series:
+    def to_lowercase(self) -> T:
         r"""
         Transform string to lowercase variant.
 
@@ -3097,7 +3138,7 @@ def to_lowercase(self) -> Series:
             self._narwhals_series._compliant_series.str.to_lowercase()
         )
 
-    def to_datetime(self: Self, format: str | None = None) -> Series:  # noqa: A002
+    def to_datetime(self: Self, format: str | None = None) -> T:  # noqa: A002
         """
         Parse Series with strings to a Series with Datetime dtype.
 
@@ -3158,11 +3199,11 @@ def to_datetime(self: Self, format: str | None = None) -> Series:  # noqa: A002
         )
 
 
-class SeriesDateTimeNamespace:
-    def __init__(self, series: Series) -> None:
+class SeriesDateTimeNamespace(Generic[T]):
+    def __init__(self: Self, series: T) -> None:
         self._narwhals_series = series
 
-    def date(self) -> Series:
+    def date(self: Self) -> T:
         """
         Get the date in a datetime series.
 
@@ -3203,7 +3244,7 @@ def date(self) -> Series:
             self._narwhals_series._compliant_series.dt.date()
         )
 
-    def year(self) -> Series:
+    def year(self: Self) -> T:
         """
         Get the year in a datetime series.
 
@@ -3240,7 +3281,7 @@ def year(self) -> Series:
             self._narwhals_series._compliant_series.dt.year()
         )
 
-    def month(self) -> Series:
+    def month(self: Self) -> T:
         """
         Gets the month in a datetime series.
 
@@ -3277,7 +3318,7 @@ def month(self) -> Series:
             self._narwhals_series._compliant_series.dt.month()
         )
 
-    def day(self) -> Series:
+    def day(self: Self) -> T:
         """
         Extracts the day in a datetime series.
 
@@ -3314,7 +3355,7 @@ def day(self) -> Series:
             self._narwhals_series._compliant_series.dt.day()
         )
 
-    def hour(self) -> Series:
+    def hour(self: Self) -> T:
         """
          Extracts the hour in a datetime series.
 
@@ -3351,7 +3392,7 @@ def hour(self) -> Series:
             self._narwhals_series._compliant_series.dt.hour()
         )
 
-    def minute(self) -> Series:
+    def minute(self: Self) -> T:
         """
         Extracts the minute in a datetime series.
 
@@ -3388,7 +3429,7 @@ def minute(self) -> Series:
             self._narwhals_series._compliant_series.dt.minute()
         )
 
-    def second(self) -> Series:
+    def second(self: Self) -> T:
         """
         Extracts the second(s) in a datetime series.
 
@@ -3425,7 +3466,7 @@ def second(self) -> Series:
             self._narwhals_series._compliant_series.dt.second()
         )
 
-    def millisecond(self) -> Series:
+    def millisecond(self: Self) -> T:
         """
         Extracts the milliseconds in a datetime series.
 
@@ -3475,7 +3516,7 @@ def millisecond(self) -> Series:
             self._narwhals_series._compliant_series.dt.millisecond()
         )
 
-    def microsecond(self) -> Series:
+    def microsecond(self: Self) -> T:
         """
         Extracts the microseconds in a datetime series.
 
@@ -3525,7 +3566,7 @@ def microsecond(self) -> Series:
             self._narwhals_series._compliant_series.dt.microsecond()
         )
 
-    def nanosecond(self) -> Series:
+    def nanosecond(self: Self) -> T:
         """
         Extracts the nanosecond(s) in a date series.
 
@@ -3565,7 +3606,7 @@ def nanosecond(self) -> Series:
             self._narwhals_series._compliant_series.dt.nanosecond()
         )
 
-    def ordinal_day(self) -> Series:
+    def ordinal_day(self: Self) -> T:
         """
         Get ordinal day.
 
@@ -3602,7 +3643,7 @@ def ordinal_day(self) -> Series:
             self._narwhals_series._compliant_series.dt.ordinal_day()
         )
 
-    def total_minutes(self) -> Series:
+    def total_minutes(self: Self) -> T:
         """
         Get total minutes.
 
@@ -3644,7 +3685,7 @@ def total_minutes(self) -> Series:
             self._narwhals_series._compliant_series.dt.total_minutes()
         )
 
-    def total_seconds(self) -> Series:
+    def total_seconds(self: Self) -> T:
         """
         Get total seconds.
 
@@ -3686,7 +3727,7 @@ def total_seconds(self) -> Series:
             self._narwhals_series._compliant_series.dt.total_seconds()
         )
 
-    def total_milliseconds(self) -> Series:
+    def total_milliseconds(self: Self) -> T:
         """
         Get total milliseconds.
 
@@ -3731,7 +3772,7 @@ def total_milliseconds(self) -> Series:
             self._narwhals_series._compliant_series.dt.total_milliseconds()
         )
 
-    def total_microseconds(self) -> Series:
+    def total_microseconds(self: Self) -> T:
         """
         Get total microseconds.
 
@@ -3776,7 +3817,7 @@ def total_microseconds(self) -> Series:
             self._narwhals_series._compliant_series.dt.total_microseconds()
         )
 
-    def total_nanoseconds(self) -> Series:
+    def total_nanoseconds(self: Self) -> T:
         """
         Get total nanoseconds.
 
@@ -3818,7 +3859,7 @@ def total_nanoseconds(self) -> Series:
             self._narwhals_series._compliant_series.dt.total_nanoseconds()
         )
 
-    def to_string(self, format: str) -> Series:  # noqa: A002
+    def to_string(self: Self, format: str) -> T:  # noqa: A002
         """
         Convert a Date/Time/Datetime series into a String series with the given format.
 
@@ -3893,7 +3934,7 @@ def to_string(self, format: str) -> Series:  # noqa: A002
             self._narwhals_series._compliant_series.dt.to_string(format)
         )
 
-    def replace_time_zone(self, time_zone: str | None) -> Series:
+    def replace_time_zone(self: Self, time_zone: str | None) -> T:
         """
         Replace time zone.
 
@@ -3946,7 +3987,7 @@ def replace_time_zone(self, time_zone: str | None) -> Series:
             self._narwhals_series._compliant_series.dt.replace_time_zone(time_zone)
         )
 
-    def convert_time_zone(self, time_zone: str) -> Series:
+    def convert_time_zone(self: Self, time_zone: str) -> T:
         """
         Convert time zone.
 
@@ -4001,3 +4042,63 @@ def convert_time_zone(self, time_zone: str) -> Series:
         return self._narwhals_series._from_compliant_series(
             self._narwhals_series._compliant_series.dt.convert_time_zone(time_zone)
         )
+
+    def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> T:
+        """
+        Return a timestamp in the given time unit.
+
+        Arguments:
+            time_unit: {'ns', 'us', 'ms'}
+                Time unit.
+
+        Examples:
+            >>> from datetime import date
+            >>> import narwhals as nw
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> data = [date(2001, 1, 1), None, date(2001, 1, 3)]
+            >>> s_pd = pd.Series(data, dtype="datetime64[ns]")
+            >>> s_pl = pl.Series(data)
+            >>> s_pa = pa.chunked_array([data])
+
+            Let's define a dataframe-agnostic function:
+
+            >>> @nw.narwhalify
+            ... def func(s):
+            ...     return s.dt.timestamp("ms")
+
+            We can then pass pandas / PyArrow / Polars / any other supported library:
+
+            >>> func(s_pd)
+            0    9.783072e+11
+            1             NaN
+            2    9.784800e+11
+            dtype: float64
+            >>> func(s_pl)  # doctest: +NORMALIZE_WHITESPACE
+            shape: (3,)
+            Series: '' [i64]
+            [
+                    978307200000
+                    null
+                    978480000000
+            ]
+            >>> func(s_pa)
+            <pyarrow.lib.ChunkedArray object at ...>
+            [
+              [
+                978307200000,
+                null,
+                978480000000
+              ]
+            ]
+        """
+        if time_unit not in {"ns", "us", "ms"}:
+            msg = (
+                "invalid `time_unit`"
+                f"\n\nExpected one of {{'ns', 'us', 'ms'}}, got {time_unit!r}."
+            )
+            raise ValueError(msg)
+        return self._narwhals_series._from_compliant_series(
+            self._narwhals_series._compliant_series.dt.timestamp(time_unit)
+        )
diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py
index c09b0f2b37..68aca7706e 100644
--- a/narwhals/stable/v1/__init__.py
+++ b/narwhals/stable/v1/__init__.py
@@ -22,6 +22,7 @@
 from narwhals.functions import _from_dict_impl
 from narwhals.functions import _new_series_impl
 from narwhals.functions import from_arrow as nw_from_arrow
+from narwhals.functions import get_level
 from narwhals.functions import show_versions
 from narwhals.schema import Schema as NwSchema
 from narwhals.series import Series as NwSeries
@@ -49,18 +50,19 @@
 from narwhals.stable.v1.dtypes import UInt64
 from narwhals.stable.v1.dtypes import Unknown
 from narwhals.translate import _from_native_impl
-from narwhals.translate import get_native_namespace as nw_get_native_namespace
+from narwhals.translate import get_native_namespace
 from narwhals.translate import to_native
-from narwhals.translate import to_py_scalar as nw_to_py_scalar
+from narwhals.translate import to_py_scalar
 from narwhals.typing import IntoDataFrameT
 from narwhals.typing import IntoFrameT
 from narwhals.typing import IntoSeriesT
-from narwhals.utils import is_ordered_categorical as nw_is_ordered_categorical
-from narwhals.utils import maybe_align_index as nw_maybe_align_index
-from narwhals.utils import maybe_convert_dtypes as nw_maybe_convert_dtypes
-from narwhals.utils import maybe_get_index as nw_maybe_get_index
-from narwhals.utils import maybe_reset_index as nw_maybe_reset_index
-from narwhals.utils import maybe_set_index as nw_maybe_set_index
+from narwhals.utils import generate_temporary_column_name
+from narwhals.utils import is_ordered_categorical
+from narwhals.utils import maybe_align_index
+from narwhals.utils import maybe_convert_dtypes
+from narwhals.utils import maybe_get_index
+from narwhals.utils import maybe_reset_index
+from narwhals.utils import maybe_set_index
 
 if TYPE_CHECKING:
     from types import ModuleType
@@ -144,7 +146,7 @@ def lazy(self) -> LazyFrame[Any]:
             >>> import pandas as pd
             >>> import polars as pl
             >>> import pyarrow as pa
-            >>> import narwhals.stable.v1 as nw
+            >>> import narwhals as nw
             >>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
             >>> df_pd = pd.DataFrame(df)
             >>> df_pl = pl.DataFrame(df)
@@ -199,7 +201,7 @@ def to_dict(
             >>> import pandas as pd
             >>> import polars as pl
             >>> import pyarrow as pa
-            >>> import narwhals.stable.v1 as nw
+            >>> import narwhals as nw
             >>> df = {
             ...     "A": [1, 2, 3, 4, 5],
             ...     "fruits": ["banana", "banana", "apple", "apple", "banana"],
@@ -233,7 +235,7 @@ def is_duplicated(self: Self) -> Series:
         Get a mask of all duplicated rows in this DataFrame.
 
         Examples:
-            >>> import narwhals.stable.v1 as nw
+            >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
             >>> df_pd = pd.DataFrame(
@@ -281,7 +283,7 @@ def is_unique(self: Self) -> Series:
         Get a mask of all unique rows in this DataFrame.
 
         Examples:
-            >>> import narwhals.stable.v1 as nw
+            >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
             >>> df_pd = pd.DataFrame(
@@ -351,7 +353,7 @@ def collect(self) -> DataFrame[Any]:
             DataFrame
 
         Examples:
-            >>> import narwhals.stable.v1 as nw
+            >>> import narwhals as nw
             >>> import polars as pl
             >>> lf_pl = pl.LazyFrame(
             ...     {
@@ -411,7 +413,7 @@ def to_frame(self) -> DataFrame[Any]:
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
-            >>> import narwhals.stable.v1 as nw
+            >>> import narwhals as nw
             >>> s = [1, 2, 3]
             >>> s_pd = pd.Series(s, name="a")
             >>> s_pl = pl.Series("a", s)
@@ -463,7 +465,7 @@ def value_counts(
             normalize: If true gives relative frequencies of the unique values
 
         Examples:
-            >>> import narwhals.stable.v1 as nw
+            >>> import narwhals as nw
             >>> import pandas as pd
             >>> import polars as pl
             >>> s_pd = pd.Series([1, 1, 2, 3, 2], name="s")
@@ -517,7 +519,7 @@ class Schema(NwSchema):
     Examples:
         Define a schema by passing *instantiated* data types.
 
-        >>> import narwhals.stable.v1 as nw
+        >>> import narwhals as nw
         >>> schema = nw.Schema({"foo": nw.Int8(), "bar": nw.String()})
         >>> schema
         Schema({'foo': Int8, 'bar': String})
@@ -864,7 +866,7 @@ def narwhalify(
     Instead of writing
 
     ```python
-    import narwhals.stable.v1 as nw
+    import narwhals as nw
 
 
     def func(df):
@@ -876,7 +878,7 @@ def func(df):
     you can just write
 
     ```python
-    import narwhals.stable.v1 as nw
+    import narwhals as nw
 
 
     @nw.narwhalify
@@ -953,28 +955,6 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
         return decorator(func)
 
 
-def to_py_scalar(scalar: Any) -> Any:
-    """If a scalar is not Python native, converts it to Python native.
-
-    Raises:
-        ValueError: If the object is not convertible to a scalar.
-
-    Examples:
-        >>> import narwhals.stable.v1 as nw
-        >>> import pandas as pd
-        >>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
-        >>> nw.to_py_scalar(df["a"].item(0))
-        1
-        >>> import pyarrow as pa
-        >>> df = nw.from_native(pa.table({"a": [1, 2, 3]}))
-        >>> nw.to_py_scalar(df["a"].item(0))
-        1
-        >>> nw.to_py_scalar(1)
-        1
-    """
-    return _stableify(nw_to_py_scalar(scalar))
-
-
 def all() -> Expr:
     """
     Instantiate an expression representing all columns.
@@ -982,9 +962,11 @@ def all() -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
         >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        >>> df_pa = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})
 
         Let's define a dataframe-agnostic function:
 
@@ -992,7 +974,7 @@ def all() -> Expr:
         ... def func(df):
         ...     return df.select(nw.all() * 2)
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a   b
@@ -1010,6 +992,13 @@ def all() -> Expr:
         │ 4   ┆ 10  │
         │ 6   ┆ 12  │
         └─────┴─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[2,4,6]]
+        b: [[8,10,12]]
     """
     return _stableify(nw.all())
 
@@ -1024,9 +1013,11 @@ def col(*names: str | Iterable[str]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [3, 4]})
 
         We define a dataframe-agnostic function:
 
@@ -1034,7 +1025,7 @@ def col(*names: str | Iterable[str]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.col("a") * nw.col("b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -1050,6 +1041,11 @@ def col(*names: str | Iterable[str]) -> Expr:
         │ 3   │
         │ 8   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[3,8]]
     """
     return _stableify(nw.col(*names))
 
@@ -1068,7 +1064,7 @@ def nth(*indices: int | Sequence[int]) -> Expr:
         >>> import pandas as pd
         >>> import polars as pl
         >>> import pyarrow as pa
-        >>> import narwhals.stable.v1 as nw
+        >>> import narwhals as nw
         >>> data = {"a": [1, 2], "b": [3, 4]}
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
@@ -1080,7 +1076,7 @@ def nth(*indices: int | Sequence[int]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.nth(0) * 2)
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -1112,9 +1108,11 @@ def len() -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -1122,7 +1120,7 @@ def len() -> Expr:
         ... def func(df):
         ...     return df.select(nw.len())
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            len
@@ -1136,6 +1134,11 @@ def len() -> Expr:
         ╞═════╡
         │ 2   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        len: int64
+        ----
+        len: [[2]]
     """
     return _stableify(nw.len())
 
@@ -1151,9 +1154,11 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2]})
         >>> df_pd = pd.DataFrame({"a": [1, 2]})
+        >>> df_pa = pa.table({"a": [1, 2]})
 
         We define a dataframe-agnostic function:
 
@@ -1161,7 +1166,7 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
         ... def func(df):
         ...     return df.with_columns(nw.lit(3).alias("b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a  b
@@ -1177,7 +1182,13 @@ def lit(value: Any, dtype: DType | None = None) -> Expr:
         │ 1   ┆ 3   │
         │ 2   ┆ 3   │
         └─────┴─────┘
-
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        ----
+        a: [[1,2]]
+        b: [[3,3]]
     """
     return _stableify(nw.lit(value, dtype))
 
@@ -1195,9 +1206,11 @@ def min(*columns: str) -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -1205,7 +1218,7 @@ def min(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.min("b"))
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            b
@@ -1219,6 +1232,11 @@ def min(*columns: str) -> Expr:
         ╞═════╡
         │ 5   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        b: int64
+        ----
+        b: [[5]]
     """
     return _stableify(nw.min(*columns))
 
@@ -1236,9 +1254,11 @@ def max(*columns: str) -> Expr:
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
         >>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+        >>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
 
         Let's define a dataframe-agnostic function:
 
@@ -1246,7 +1266,7 @@ def max(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.max("a"))
 
-        We can then pass either pandas or Polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -1260,6 +1280,11 @@ def max(*columns: str) -> Expr:
         ╞═════╡
         │ 2   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[2]]
     """
     return _stableify(nw.max(*columns))
 
@@ -1278,7 +1303,7 @@ def mean(*columns: str) -> Expr:
         >>> import pandas as pd
         >>> import polars as pl
         >>> import pyarrow as pa
-        >>> import narwhals.stable.v1 as nw
+        >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 8, 3]})
         >>> df_pd = pd.DataFrame({"a": [1, 8, 3]})
         >>> df_pa = pa.table({"a": [1, 8, 3]})
@@ -1325,9 +1350,11 @@ def sum(*columns: str) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2]})
         >>> df_pd = pd.DataFrame({"a": [1, 2]})
+        >>> df_pa = pa.table({"a": [1, 2]})
 
         We define a dataframe-agnostic function:
 
@@ -1335,7 +1362,7 @@ def sum(*columns: str) -> Expr:
         ... def func(df):
         ...     return df.select(nw.sum("a"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a
@@ -1349,6 +1376,11 @@ def sum(*columns: str) -> Expr:
         ╞═════╡
         │ 3   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[3]]
     """
     return _stableify(nw.sum(*columns))
 
@@ -1367,10 +1399,12 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> data = {"a": [1, 2, 3], "b": [5, 10, None]}
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -1378,7 +1412,7 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.sum_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
               a
@@ -1396,6 +1430,11 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ 12  │
         │ 3   │
         └─────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        ----
+        a: [[6,12,3]]
     """
     return _stableify(nw.sum_horizontal(*exprs))
 
@@ -1413,13 +1452,15 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> data = {
         ...     "a": [False, False, True, True, False, None],
         ...     "b": [False, True, True, None, None, None],
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -1427,7 +1468,7 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select("a", "b", all=nw.all_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
                a      b    all
@@ -1452,6 +1493,16 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ false ┆ null  ┆ false │
         │ null  ┆ null  ┆ null  │
         └───────┴───────┴───────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: bool
+        b: bool
+        all: bool
+        ----
+        a: [[false,false,true,true,false,null]]
+        b: [[false,true,true,null,null,null]]
+        all: [[false,false,true,null,false,null]]
     """
     return _stableify(nw.all_horizontal(*exprs))
 
@@ -1469,13 +1520,15 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> data = {
         ...     "a": [False, False, True, True, False, None],
         ...     "b": [False, True, True, None, None, None],
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function:
 
@@ -1483,7 +1536,7 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select("a", "b", any=nw.any_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
                a      b    any
@@ -1508,6 +1561,16 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ false ┆ null  ┆ null  │
         │ null  ┆ null  ┆ null  │
         └───────┴───────┴───────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: bool
+        b: bool
+        any: bool
+        ----
+        a: [[false,false,true,true,false,null]]
+        b: [[false,true,true,null,null,null]]
+        any: [[false,true,true,true,null,null]]
     """
     return _stableify(nw.any_horizontal(*exprs))
 
@@ -1523,7 +1586,8 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> data = {
         ...     "a": [1, 8, 3],
         ...     "b": [4, 5, None],
@@ -1531,6 +1595,7 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... }
         >>> df_pl = pl.DataFrame(data)
         >>> df_pd = pd.DataFrame(data)
+        >>> df_pa = pa.table(data)
 
         We define a dataframe-agnostic function that computes the horizontal mean of "a"
         and "b" columns:
@@ -1539,13 +1604,14 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.mean_horizontal("a", "b"))
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
              a
         0  2.5
         1  6.5
         2  3.0
+
         >>> func(df_pl)
         shape: (3, 1)
         ┌─────┐
@@ -1557,6 +1623,12 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         │ 6.5 │
         │ 3.0 │
         └─────┘
+
+        >>> func(df_pa)
+        pyarrow.Table
+        a: double
+        ----
+        a: [[2.5,6.5,3]]
     """
     return _stableify(nw.mean_horizontal(*exprs))
 
@@ -1573,7 +1645,7 @@ def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
             expression input.
 
     Examples:
-        >>> import narwhals.stable.v1 as nw
+        >>> import narwhals as nw
         >>> import pandas as pd
         >>> import polars as pl
         >>> import pyarrow as pa
@@ -1590,7 +1662,7 @@ def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.min_horizontal("a", "b"))
 
-        We can then pass either pandas, polars or pyarrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
              a
@@ -1629,7 +1701,7 @@ def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
             expression input.
 
     Examples:
-        >>> import narwhals.stable.v1 as nw
+        >>> import narwhals as nw
         >>> import pandas as pd
         >>> import polars as pl
         >>> import pyarrow as pa
@@ -1646,7 +1718,7 @@ def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr:
         ... def func(df):
         ...     return df.select(nw.max_horizontal("a", "b"))
 
-        We can then pass either pandas, polars or pyarrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
              a
@@ -1718,7 +1790,7 @@ def concat(
 
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import narwhals as nw
         >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]}
         >>> data_2 = {"a": [5, 2], "b": [1, 4]}
 
@@ -1758,7 +1830,7 @@ def concat(
 
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import narwhals as nw
         >>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]}
         >>> data_2 = {"c": [5, 2], "d": [1, 4]}
 
@@ -1816,7 +1888,7 @@ def concat_str(
             null values, the output is null.
 
     Examples:
-        >>> import narwhals.stable.v1 as nw
+        >>> import narwhals as nw
         >>> import pandas as pd
         >>> import polars as pl
         >>> import pyarrow as pa
@@ -1842,7 +1914,7 @@ def concat_str(
         ...         ).alias("full_sentence")
         ...     )
 
-        We can then pass either pandas, Polars or PyArrow to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(pd.DataFrame(data))
           full_sentence
@@ -1874,221 +1946,6 @@ def concat_str(
     )
 
 
-def is_ordered_categorical(series: Series) -> bool:
-    """
-    Return whether indices of categories are semantically meaningful.
-
-    This is a convenience function to accessing what would otherwise be
-    the `is_ordered` property from the DataFrame Interchange Protocol,
-    see https://data-apis.org/dataframe-protocol/latest/API.html.
-
-    - For Polars:
-      - Enums are always ordered.
-      - Categoricals are ordered if `dtype.ordering == "physical"`.
-    - For pandas-like APIs:
-      - Categoricals are ordered if `dtype.cat.ordered == True`.
-    - For PyArrow table:
-      - Categoricals are ordered if `dtype.type.ordered == True`.
-
-    Examples:
-        >>> import narwhals.stable.v1 as nw
-        >>> import pandas as pd
-        >>> import polars as pl
-        >>> data = ["x", "y"]
-        >>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True))
-        >>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="physical"))
-
-        Let's define a library-agnostic function:
-
-        >>> @nw.narwhalify
-        ... def func(s):
-        ...     return nw.is_ordered_categorical(s)
-
-        Then, we can pass any supported library to `func`:
-
-        >>> func(s_pd)
-        True
-        >>> func(s_pl)
-        True
-    """
-    return nw_is_ordered_categorical(series)
-
-
-def maybe_align_index(lhs: T, rhs: Series | DataFrame[Any] | LazyFrame[Any]) -> T:
-    """
-    Align `lhs` to the Index of `rhs`, if they're both pandas-like.
-
-    Notes:
-        This is only really intended for backwards-compatibility purposes,
-        for example if your library already aligns indices for users.
-        If you're designing a new library, we highly encourage you to not
-        rely on the Index.
-        For non-pandas-like inputs, this only checks that `lhs` and `rhs`
-        are the same length.
-
-    Examples:
-        >>> import pandas as pd
-        >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
-        >>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4])
-        >>> s_pd = pd.Series([6, 7], index=[4, 3])
-        >>> df = nw.from_native(df_pd)
-        >>> s = nw.from_native(s_pd, series_only=True)
-        >>> nw.to_native(nw.maybe_align_index(df, s))
-           a
-        4  2
-        3  1
-    """
-    return nw_maybe_align_index(lhs, rhs)
-
-
-def maybe_convert_dtypes(df: T, *args: bool, **kwargs: bool | str) -> T:
-    """
-    Convert columns or series to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like.
-
-    Arguments:
-        obj: DataFrame or Series.
-        *args: Additional arguments which gets passed through.
-        **kwargs: Additional arguments which gets passed through.
-
-    Notes:
-        For non-pandas-like inputs, this is a no-op.
-        Also, `args` and `kwargs` just get passed down to the underlying library as-is.
-
-    Examples:
-        >>> import pandas as pd
-        >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
-        >>> import numpy as np
-        >>> df_pd = pd.DataFrame(
-        ...     {
-        ...         "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
-        ...         "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
-        ...     }
-        ... )
-        >>> df = nw.from_native(df_pd)
-        >>> nw.to_native(nw.maybe_convert_dtypes(df)).dtypes  # doctest: +NORMALIZE_WHITESPACE
-        a             Int32
-        b           boolean
-        dtype: object
-    """
-    return nw_maybe_convert_dtypes(df, *args, **kwargs)
-
-
-def maybe_get_index(obj: T) -> Any | None:
-    """
-    Get the index of a DataFrame or a Series, if it's pandas-like.
-
-    Notes:
-        This is only really intended for backwards-compatibility purposes,
-        for example if your library already aligns indices for users.
-        If you're designing a new library, we highly encourage you to not
-        rely on the Index.
-        For non-pandas-like inputs, this returns `None`.
-
-    Examples:
-        >>> import pandas as pd
-        >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
-        >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
-        >>> df = nw.from_native(df_pd)
-        >>> nw.maybe_get_index(df)
-        RangeIndex(start=0, stop=2, step=1)
-        >>> series_pd = pd.Series([1, 2])
-        >>> series = nw.from_native(series_pd, series_only=True)
-        >>> nw.maybe_get_index(series)
-        RangeIndex(start=0, stop=2, step=1)
-    """
-    return nw_maybe_get_index(obj)
-
-
-def maybe_set_index(df: T, column_names: str | list[str]) -> T:
-    """
-    Set columns `columns` to be the index of `df`, if `df` is pandas-like.
-
-    Notes:
-        This is only really intended for backwards-compatibility purposes,
-        for example if your library already aligns indices for users.
-        If you're designing a new library, we highly encourage you to not
-        rely on the Index.
-        For non-pandas-like inputs, this is a no-op.
-
-    Examples:
-        >>> import pandas as pd
-        >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
-        >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
-        >>> df = nw.from_native(df_pd)
-        >>> nw.to_native(nw.maybe_set_index(df, "b"))  # doctest: +NORMALIZE_WHITESPACE
-           a
-        b
-        4  1
-        5  2
-    """
-    return nw_maybe_set_index(df, column_names)
-
-
-def maybe_reset_index(obj: T) -> T:
-    """
-    Reset the index to the default integer index of a DataFrame or a Series, if it's pandas-like.
-
-    Notes:
-        This is only really intended for backwards-compatibility purposes,
-        for example if your library already resets the index for users.
-        If you're designing a new library, we highly encourage you to not
-        rely on the Index.
-        For non-pandas-like inputs, this is a no-op.
-
-    Examples:
-        >>> import pandas as pd
-        >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
-        >>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=([6, 7]))
-        >>> df = nw.from_native(df_pd)
-        >>> nw.to_native(nw.maybe_reset_index(df))
-           a  b
-        0  1  4
-        1  2  5
-        >>> series_pd = pd.Series([1, 2])
-        >>> series = nw.from_native(series_pd, series_only=True)
-        >>> nw.maybe_get_index(series)
-        RangeIndex(start=0, stop=2, step=1)
-    """
-    return nw_maybe_reset_index(obj)
-
-
-def get_native_namespace(obj: Any) -> Any:
-    """
-    Get native namespace from object.
-
-    Examples:
-        >>> import polars as pl
-        >>> import pandas as pd
-        >>> import narwhals.stable.v1 as nw
-        >>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
-        >>> nw.get_native_namespace(df)
-        <module 'pandas'...>
-        >>> df = nw.from_native(pl.DataFrame({"a": [1, 2, 3]}))
-        >>> nw.get_native_namespace(df)
-        <module 'polars'...>
-    """
-    return nw_get_native_namespace(obj)
-
-
-def get_level(
-    obj: DataFrame[Any] | LazyFrame[Any] | Series,
-) -> Literal["full", "interchange"]:
-    """
-    Level of support Narwhals has for current object.
-
-    This can be one of:
-
-    - 'full': full Narwhals API support
-    - 'metadata': only metadata operations are supported (`df.schema`)
-    """
-    return nw.get_level(obj)
-
-
 class When(NwWhen):
     @classmethod
     def from_when(cls, when: NwWhen) -> Self:
@@ -2121,9 +1978,11 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
         >>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
+        >>> df_pa = pa.table({"a": [1, 2, 3], "b": [5, 10, 15]})
 
         We define a dataframe-agnostic function:
 
@@ -2133,7 +1992,7 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
         ...         nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")
         ...     )
 
-        We can then pass either pandas or polars to `func`:
+        We can pass any supported library such as Pandas, Polars, or PyArrow to `func`:
 
         >>> func(df_pd)
            a   b  a_when
@@ -2151,6 +2010,15 @@ def when(*predicates: IntoExpr | Iterable[IntoExpr]) -> When:
         │ 2   ┆ 10  ┆ 5      │
         │ 3   ┆ 15  ┆ 6      │
         └─────┴─────┴────────┘
+        >>> func(df_pa)
+        pyarrow.Table
+        a: int64
+        b: int64
+        a_when: int64
+        ----
+        a: [[1,2,3]]
+        b: [[5,10,15]]
+        a_when: [[5,5,6]]
     """
     return When.from_when(nw_when(*predicates))
 
@@ -2175,7 +2043,7 @@ def new_series(
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import narwhals as nw
         >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
 
         Let's define a dataframe-agnostic function:
@@ -2229,7 +2097,7 @@ def from_arrow(
         >>> import pandas as pd
         >>> import polars as pl
         >>> import pyarrow as pa
-        >>> import narwhals.stable.v1 as nw
+        >>> import narwhals as nw
         >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
 
         Let's define a dataframe-agnostic function which creates a PyArrow
@@ -2283,7 +2151,8 @@ def from_dict(
     Examples:
         >>> import pandas as pd
         >>> import polars as pl
-        >>> import narwhals.stable.v1 as nw
+        >>> import pyarrow as pa
+        >>> import narwhals as nw
         >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
 
         Let's create a new dataframe of the same class as the dataframe we started with, from a dict of new data:
@@ -2294,7 +2163,7 @@ def from_dict(
         ...     native_namespace = nw.get_native_namespace(df)
         ...     return nw.from_dict(new_data, native_namespace=native_namespace)
 
-        Let's see what happens when passing pandas / Polars input:
+        Let's see what happens when passing Pandas, Polars or PyArrow input:
 
         >>> func(pd.DataFrame(data))
            c  d
@@ -2310,6 +2179,13 @@ def from_dict(
         │ 5   ┆ 1   │
         │ 2   ┆ 4   │
         └─────┴─────┘
+        >>> func(pa.table(data))
+        pyarrow.Table
+        c: int64
+        d: int64
+        ----
+        c: [[5,2]]
+        d: [[1,4]]
     """
     from narwhals.stable.v1 import dtypes
 
@@ -2336,6 +2212,7 @@ def from_dict(
     "maybe_get_index",
     "maybe_reset_index",
     "maybe_set_index",
+    "generate_temporary_column_name",
     "get_native_namespace",
     "get_level",
     "all",
diff --git a/narwhals/translate.py b/narwhals/translate.py
index 331b87d884..075f9b3a18 100644
--- a/narwhals/translate.py
+++ b/narwhals/translate.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import numbers
 from datetime import datetime
 from datetime import timedelta
 from functools import wraps
@@ -46,6 +45,15 @@
 
 T = TypeVar("T")
 
+NON_TEMPORAL_SCALAR_TYPES = (
+    bool,
+    bytes,
+    str,
+    int,
+    float,
+    complex,
+)
+
 
 @overload
 def to_native(
@@ -395,27 +403,35 @@ def _from_native_impl(  # noqa: PLR0915
     # Extensions
     if hasattr(native_object, "__narwhals_dataframe__"):
         if series_only:
-            msg = "Cannot only use `series_only` with dataframe"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with dataframe"
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             native_object.__narwhals_dataframe__(),
             level="full",
         )
     elif hasattr(native_object, "__narwhals_lazyframe__"):
         if series_only:
-            msg = "Cannot only use `series_only` with lazyframe"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with lazyframe"
+                raise TypeError(msg)
+            return native_object
         if eager_only or eager_or_interchange_only:
-            msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with lazyframe"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with lazyframe"
+                raise TypeError(msg)
+            return native_object
         return LazyFrame(
             native_object.__narwhals_lazyframe__(),
             level="full",
         )
     elif hasattr(native_object, "__narwhals_series__"):
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True` or `series_only=True`"
+                raise TypeError(msg)
+            return native_object
         return Series(
             native_object.__narwhals_series__(),
             level="full",
@@ -424,8 +440,10 @@ def _from_native_impl(  # noqa: PLR0915
     # Polars
     elif is_polars_dataframe(native_object):
         if series_only:
-            msg = "Cannot only use `series_only` with polars.DataFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with polars.DataFrame"
+                raise TypeError(msg)
+            return native_object
         pl = get_polars()
         return DataFrame(
             PolarsDataFrame(
@@ -437,11 +455,15 @@ def _from_native_impl(  # noqa: PLR0915
         )
     elif is_polars_lazyframe(native_object):
         if series_only:
-            msg = "Cannot only use `series_only` with polars.LazyFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with polars.LazyFrame"
+                raise TypeError(msg)
+            return native_object
         if eager_only or eager_or_interchange_only:
-            msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with polars.LazyFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with polars.LazyFrame"
+                raise TypeError(msg)
+            return native_object
         pl = get_polars()
         return LazyFrame(
             PolarsLazyFrame(
@@ -454,8 +476,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_polars_series(native_object):
         pl = get_polars()
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True` or `series_only=True`"
+                raise TypeError(msg)
+            return native_object
         return Series(
             PolarsSeries(
                 native_object,
@@ -468,8 +492,10 @@ def _from_native_impl(  # noqa: PLR0915
     # pandas
     elif is_pandas_dataframe(native_object):
         if series_only:
-            msg = "Cannot only use `series_only` with dataframe"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with dataframe"
+                raise TypeError(msg)
+            return native_object
         pd = get_pandas()
         return DataFrame(
             PandasLikeDataFrame(
@@ -482,8 +508,10 @@ def _from_native_impl(  # noqa: PLR0915
         )
     elif is_pandas_series(native_object):
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True` or `series_only=True`"
+                raise TypeError(msg)
+            return native_object
         pd = get_pandas()
         return Series(
             PandasLikeSeries(
@@ -499,8 +527,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_modin_dataframe(native_object):  # pragma: no cover
         mpd = get_modin()
         if series_only:
-            msg = "Cannot only use `series_only` with modin.DataFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with modin.DataFrame"
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             PandasLikeDataFrame(
                 native_object,
@@ -513,8 +543,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_modin_series(native_object):  # pragma: no cover
         mpd = get_modin()
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True` or `series_only=True`"
+                raise TypeError(msg)
+            return native_object
         return Series(
             PandasLikeSeries(
                 native_object,
@@ -529,8 +561,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_cudf_dataframe(native_object):  # pragma: no cover
         cudf = get_cudf()
         if series_only:
-            msg = "Cannot only use `series_only` with cudf.DataFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with cudf.DataFrame"
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             PandasLikeDataFrame(
                 native_object,
@@ -543,8 +577,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_cudf_series(native_object):  # pragma: no cover
         cudf = get_cudf()
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True` or `series_only=True`"
+                raise TypeError(msg)
+            return native_object
         return Series(
             PandasLikeSeries(
                 native_object,
@@ -559,8 +595,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_pyarrow_table(native_object):
         pa = get_pyarrow()
         if series_only:
-            msg = "Cannot only use `series_only` with arrow table"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with arrow table"
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             ArrowDataFrame(
                 native_object,
@@ -572,8 +610,10 @@ def _from_native_impl(  # noqa: PLR0915
     elif is_pyarrow_chunked_array(native_object):
         pa = get_pyarrow()
         if not allow_series:
-            msg = "Please set `allow_series=True`"
-            raise TypeError(msg)
+            if strict:
+                msg = "Please set `allow_series=True` or `series_only=True`"
+                raise TypeError(msg)
+            return native_object
         return Series(
             ArrowSeries(
                 native_object,
@@ -587,11 +627,15 @@ def _from_native_impl(  # noqa: PLR0915
     # Dask
     elif is_dask_dataframe(native_object):
         if series_only:
-            msg = "Cannot only use `series_only` with dask DataFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `series_only` with dask DataFrame"
+                raise TypeError(msg)
+            return native_object
         if eager_only or eager_or_interchange_only:
-            msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with dask DataFrame"
-            raise TypeError(msg)
+            if strict:
+                msg = "Cannot only use `eager_only` or `eager_or_interchange_only` with dask DataFrame"
+                raise TypeError(msg)
+            return native_object
         if get_dask_expr() is None:  # pragma: no cover
             msg = "Please install dask-expr"
             raise ImportError(msg)
@@ -607,10 +651,13 @@ def _from_native_impl(  # noqa: PLR0915
     # DuckDB
     elif is_duckdb_relation(native_object):
         if eager_only or series_only:  # pragma: no cover
-            msg = (
-                "Cannot only use `series_only=True` or `eager_only=False` "
-                "with DuckDB Relation"
-            )
+            if strict:
+                msg = (
+                    "Cannot only use `series_only=True` or `eager_only=False` "
+                    "with DuckDB Relation"
+                )
+            else:
+                return native_object
             raise TypeError(msg)
         return DataFrame(
             DuckDBInterchangeFrame(native_object, dtypes=dtypes),
@@ -620,11 +667,13 @@ def _from_native_impl(  # noqa: PLR0915
     # Ibis
     elif is_ibis_table(native_object):  # pragma: no cover
         if eager_only or series_only:
-            msg = (
-                "Cannot only use `series_only=True` or `eager_only=False` "
-                "with Ibis table"
-            )
-            raise TypeError(msg)
+            if strict:
+                msg = (
+                    "Cannot only use `series_only=True` or `eager_only=False` "
+                    "with Ibis table"
+                )
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             IbisInterchangeFrame(native_object, dtypes=dtypes),
             level="interchange",
@@ -633,11 +682,13 @@ def _from_native_impl(  # noqa: PLR0915
     # Interchange protocol
     elif hasattr(native_object, "__dataframe__"):
         if eager_only or series_only:
-            msg = (
-                "Cannot only use `series_only=True` or `eager_only=False` "
-                "with object which only implements __dataframe__"
-            )
-            raise TypeError(msg)
+            if strict:
+                msg = (
+                    "Cannot only use `series_only=True` or `eager_only=False` "
+                    "with object which only implements __dataframe__"
+                )
+                raise TypeError(msg)
+            return native_object
         return DataFrame(
             InterchangeFrame(native_object, dtypes=dtypes),
             level="interchange",
@@ -800,16 +851,10 @@ def to_py_scalar(scalar_like: Any) -> Any:
         >>> nw.to_py_scalar(1)
         1
     """
-
-    pa = get_pyarrow()
-    if pa and isinstance(scalar_like, pa.Scalar):
-        return scalar_like.as_py()
-
-    cupy = get_cupy()
-    if (  # pragma: no cover
-        cupy and isinstance(scalar_like, cupy.ndarray) and scalar_like.size == 1
-    ):
-        return scalar_like.item()
+    if scalar_like is None:
+        return None
+    if isinstance(scalar_like, NON_TEMPORAL_SCALAR_TYPES):
+        return scalar_like
 
     np = get_numpy()
     if np and np.isscalar(scalar_like) and hasattr(scalar_like, "item"):
@@ -820,21 +865,30 @@ def to_py_scalar(scalar_like: Any) -> Any:
         return scalar_like.to_pydatetime()
     if pd and isinstance(scalar_like, pd.Timedelta):
         return scalar_like.to_pytimedelta()
-
-    all_scalar_types = (
-        int,
-        float,
-        complex,
-        bool,
-        bytes,
-        str,
-        datetime,
-        timedelta,
-        numbers.Number,
-    )
-    if isinstance(scalar_like, all_scalar_types):
+    if pd and pd.api.types.is_scalar(scalar_like):
+        try:
+            is_na = pd.isna(scalar_like)
+        except Exception:  # pragma: no cover  # noqa: BLE001, S110
+            pass
+        else:
+            if is_na:
+                return None
+
+    # pd.Timestamp and pd.Timedelta subclass datetime and timedelta,
+    # so we need to check this separately
+    if isinstance(scalar_like, (datetime, timedelta)):
         return scalar_like
 
+    pa = get_pyarrow()
+    if pa and isinstance(scalar_like, pa.Scalar):
+        return scalar_like.as_py()
+
+    cupy = get_cupy()
+    if (  # pragma: no cover
+        cupy and isinstance(scalar_like, cupy.ndarray) and scalar_like.size == 1
+    ):
+        return scalar_like.item()
+
     msg = (
         f"Expected object convertible to a scalar, found {type(scalar_like)}. "
         "Please report a bug to https://github.com/narwhals-dev/narwhals/issues"
diff --git a/narwhals/utils.py b/narwhals/utils.py
index 37cce17d30..66c2badee2 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -10,6 +10,7 @@
 from typing import Sequence
 from typing import TypeVar
 from typing import cast
+from warnings import warn
 
 from narwhals._exceptions import ColumnNotFoundError
 from narwhals.dependencies import get_cudf
@@ -31,6 +32,7 @@
 if TYPE_CHECKING:
     from types import ModuleType
 
+    import pandas as pd
     from typing_extensions import Self
     from typing_extensions import TypeGuard
 
@@ -335,10 +337,16 @@ def maybe_reset_index(obj: T) -> T:
     obj_any = cast(Any, obj)
     native_obj = to_native(obj_any)
     if is_pandas_like_dataframe(native_obj):
+        native_namespace = obj_any.__native_namespace__()
+        if _has_default_index(native_obj, native_namespace):
+            return obj_any  # type: ignore[no-any-return]
         return obj_any._from_compliant_dataframe(  # type: ignore[no-any-return]
             obj_any._compliant_frame._from_native_frame(native_obj.reset_index(drop=True))
         )
     if is_pandas_like_series(native_obj):
+        native_namespace = obj_any.__native_namespace__()
+        if _has_default_index(native_obj, native_namespace):
+            return obj_any  # type: ignore[no-any-return]
         return obj_any._from_compliant_series(  # type: ignore[no-any-return]
             obj_any._compliant_series._from_native_series(
                 native_obj.reset_index(drop=True)
@@ -347,6 +355,18 @@ def maybe_reset_index(obj: T) -> T:
     return obj_any  # type: ignore[no-any-return]
 
 
+def _has_default_index(
+    native_frame_or_series: pd.Series | pd.DataFrame, native_namespace: Any
+) -> bool:
+    index = native_frame_or_series.index
+    return (
+        isinstance(index, native_namespace.RangeIndex)
+        and index.start == 0
+        and index.stop == len(index)
+        and index.step == 1
+    )
+
+
 def maybe_convert_dtypes(obj: T, *args: bool, **kwargs: bool | str) -> T:
     """
     Convert columns or series to the best possible dtypes using dtypes supporting ``pd.NA``, if df is pandas-like.
@@ -462,17 +482,37 @@ def is_ordered_categorical(series: Series) -> bool:
 
 
 def generate_unique_token(n_bytes: int, columns: list[str]) -> str:  # pragma: no cover
-    """Generates a unique token of specified n_bytes that is not present in the given list of columns.
+    warn(
+        "Use `generate_temporary_column_name` instead. `generate_unique_token` is "
+        "deprecated and it will be removed in future versions",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return generate_temporary_column_name(n_bytes=n_bytes, columns=columns)
+
+
+def generate_temporary_column_name(n_bytes: int, columns: list[str]) -> str:
+    """Generates a unique token of specified `n_bytes` that is not present in the given
+    list of columns.
+
+    It relies on [python secrets token_hex](https://docs.python.org/3/library/secrets.html#secrets.token_hex)
+    function to return a string nbytes random bytes.
 
     Arguments:
-        n_bytes : The number of bytes to generate for the token.
-        columns : The list of columns to check for uniqueness.
+        n_bytes: The number of bytes to generate for the token.
+        columns: The list of columns to check for uniqueness.
 
     Returns:
         A unique token that is not present in the given list of columns.
 
     Raises:
         AssertionError: If a unique token cannot be generated after 100 attempts.
+
+    Examples:
+        >>> import narwhals as nw
+        >>> columns = ["abc", "xyz"]
+        >>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns
+        True
     """
     counter = 0
     while True:
@@ -483,8 +523,8 @@ def generate_unique_token(n_bytes: int, columns: list[str]) -> str:  # pragma: n
         counter += 1
         if counter > 100:
             msg = (
-                "Internal Error: Narwhals was not able to generate a column name to perform given "
-                "join operation"
+                "Internal Error: Narwhals was not able to generate a column name with "
+                f"{n_bytes=} and not in {columns}"
             )
             raise AssertionError(msg)
 
diff --git a/pyproject.toml b/pyproject.toml
index d525fb6777..dc15a7f81d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "narwhals"
-version = "1.9.4"
+version = "1.12.1"
 authors = [
   { name="Marco Gorelli", email="33491632+MarcoGorelli@users.noreply.github.com" },
 ]
diff --git a/tests/conftest.py b/tests/conftest.py
index 18ef366cc1..d40d1027e4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -13,7 +13,7 @@
 from narwhals.dependencies import get_cudf
 from narwhals.dependencies import get_dask_dataframe
 from narwhals.dependencies import get_modin
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from narwhals.typing import IntoDataFrame
@@ -92,7 +92,7 @@ def pyarrow_table_constructor(obj: Any) -> IntoDataFrame:
     return pa.table(obj)  # type: ignore[no-any-return]
 
 
-if parse_version(pd.__version__) >= parse_version("2.0.0"):
+if PANDAS_VERSION >= (2, 0, 0):
     eager_constructors = [
         pandas_constructor,
         pandas_nullable_constructor,
@@ -109,7 +109,9 @@ def pyarrow_table_constructor(obj: Any) -> IntoDataFrame:
 if get_cudf() is not None:
     eager_constructors.append(cudf_constructor)  # pragma: no cover
 if get_dask_dataframe() is not None:  # pragma: no cover
-    lazy_constructors.extend([dask_lazy_p1_constructor, dask_lazy_p2_constructor])  # type: ignore  # noqa: PGH003
+    # TODO(unassigned): reinstate both dask constructors once if/when we have a dask use-case
+    # lazy_constructors.extend([dask_lazy_p1_constructor, dask_lazy_p2_constructor])  # noqa: ERA001
+    lazy_constructors.append(dask_lazy_p2_constructor)  # type: ignore  # noqa: PGH003
 
 
 @pytest.fixture(params=eager_constructors)
diff --git a/tests/dependencies/is_into_dataframe_test.py b/tests/dependencies/is_into_dataframe_test.py
new file mode 100644
index 0000000000..77f5e7d4e2
--- /dev/null
+++ b/tests/dependencies/is_into_dataframe_test.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing import Any
+
+import numpy as np
+import pandas as pd
+import polars as pl
+import pyarrow as pa
+
+import narwhals as nw
+from narwhals.dependencies import is_into_dataframe
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+
+class DictDataFrame:
+    def __init__(self, data: dict[str, list[Any]]) -> None:
+        self._data = data
+
+    def __len__(self) -> int:  # pragma: no cover
+        return len(next(iter(self._data.values())))
+
+    def __narwhals_dataframe__(self) -> Self:  # pragma: no cover
+        return self
+
+
+def test_is_into_dataframe() -> None:
+    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    assert is_into_dataframe(pa.table(data))
+    assert is_into_dataframe(pl.DataFrame(data))
+    assert is_into_dataframe(pd.DataFrame(data))
+    assert is_into_dataframe(nw.from_native(pd.DataFrame(data)))
+    assert is_into_dataframe(DictDataFrame(data))
+    assert not is_into_dataframe(np.array([[1, 4], [2, 5], [3, 6]]))
+    assert not is_into_dataframe(data)
diff --git a/tests/dependencies/is_into_series_test.py b/tests/dependencies/is_into_series_test.py
new file mode 100644
index 0000000000..a4d4a827f5
--- /dev/null
+++ b/tests/dependencies/is_into_series_test.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing import Any
+
+import numpy as np
+import pandas as pd
+import polars as pl
+import pyarrow as pa
+
+import narwhals as nw
+from narwhals.dependencies import is_into_series
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+
+class ListBackedSeries:
+    def __init__(self, name: str, data: list[Any]) -> None:
+        self._data = data
+        self._name = name
+
+    def __len__(self) -> int:  # pragma: no cover
+        return len(self._data)
+
+    def __narwhals_series__(self) -> Self:  # pragma: no cover
+        return self
+
+
+def test_is_into_series() -> None:
+    assert is_into_series(pa.chunked_array([["a", "b"]]))
+    assert is_into_series(pl.Series([1, 2, 3]))
+    assert is_into_series(pd.Series([1, 2, 3]))
+    assert is_into_series(nw.from_native(pd.Series([1, 2, 3]), series_only=True))
+    assert is_into_series(ListBackedSeries("a", [1, 4, 2]))
+    assert not is_into_series(np.array([1, 2, 3]))
+    assert not is_into_series([1, 2, 3])
diff --git a/tests/dependencies/is_pandas_index_test.py b/tests/dependencies/is_pandas_index_test.py
new file mode 100644
index 0000000000..1d97dd8240
--- /dev/null
+++ b/tests/dependencies/is_pandas_index_test.py
@@ -0,0 +1,12 @@
+from __future__ import annotations
+
+import pandas as pd
+
+from narwhals.dependencies import is_pandas_index
+
+
+def test_is_pandas_index() -> None:
+    data = [1, 2]
+    s_pd = pd.Series(data)
+    assert is_pandas_index(s_pd.index)
+    assert not is_pandas_index(data)
diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py
index b2006f6c14..0d6363aee9 100644
--- a/tests/dtypes_test.py
+++ b/tests/dtypes_test.py
@@ -12,7 +12,8 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import POLARS_VERSION
 
 
 @pytest.mark.parametrize("time_unit", ["us", "ns", "ms"])
@@ -125,7 +126,7 @@ def test_struct_hashes() -> None:
 
 
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1,) or parse_version(pd.__version__) < (2, 2),
+    POLARS_VERSION < (1,) or PANDAS_VERSION < (2, 2),
     reason="`shape` is only available after 1.0",
 )
 def test_polars_2d_array() -> None:
@@ -144,7 +145,7 @@ def test_polars_2d_array() -> None:
 def test_second_time_unit() -> None:
     s = pd.Series(np.array([np.datetime64("2020-01-01", "s")]))
     result = nw.from_native(s, series_only=True)
-    if parse_version(pd.__version__) < (2,):  # pragma: no cover
+    if PANDAS_VERSION < (2,):  # pragma: no cover
         assert result.dtype == nw.Datetime("ns")
     else:
         assert result.dtype == nw.Datetime("s")
@@ -153,10 +154,25 @@ def test_second_time_unit() -> None:
     assert result.dtype == nw.Datetime("s")
     s = pd.Series(np.array([np.timedelta64(1, "s")]))
     result = nw.from_native(s, series_only=True)
-    if parse_version(pd.__version__) < (2,):  # pragma: no cover
+    if PANDAS_VERSION < (2,):  # pragma: no cover
         assert result.dtype == nw.Duration("ns")
     else:
         assert result.dtype == nw.Duration("s")
     s = pa.chunked_array([pa.array([timedelta(1)], type=pa.duration("s"))])
     result = nw.from_native(s, series_only=True)
     assert result.dtype == nw.Duration("s")
+
+
+@pytest.mark.filterwarnings("ignore:Setting an item of incompatible")
+def test_pandas_inplace_modification_1267(request: pytest.FixtureRequest) -> None:
+    if PANDAS_VERSION >= (3,):
+        # pandas 3.0+ won't allow this kind of inplace modification
+        request.applymarker(pytest.mark.xfail)
+    if PANDAS_VERSION < (1, 4):
+        # pandas pre 1.4 wouldn't change the type?
+        request.applymarker(pytest.mark.xfail)
+    s = pd.Series([1, 2, 3])
+    snw = nw.from_native(s, series_only=True)
+    assert snw.dtype == nw.Int64
+    s[0] = 999.5
+    assert snw.dtype == nw.Float64
diff --git a/tests/expr_and_series/abs_test.py b/tests/expr_and_series/abs_test.py
index c324a9cfd4..098f0e8945 100644
--- a/tests/expr_and_series/abs_test.py
+++ b/tests/expr_and_series/abs_test.py
@@ -3,18 +3,18 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_abs(constructor: Constructor) -> None:
     df = nw.from_native(constructor({"a": [1, 2, 3, -4, 5]}))
     result = df.select(b=nw.col("a").abs())
     expected = {"b": [1, 2, 3, 4, 5]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_abs_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2, 3, -4, 5]}), eager_only=True)
     result = {"b": df["a"].abs()}
     expected = {"b": [1, 2, 3, 4, 5]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/all_horizontal_test.py b/tests/expr_and_series/all_horizontal_test.py
index beeaecca7a..706c42bafc 100644
--- a/tests/expr_and_series/all_horizontal_test.py
+++ b/tests/expr_and_series/all_horizontal_test.py
@@ -2,14 +2,13 @@
 
 from typing import Any
 
-import polars as pl
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("expr1", ["a", nw.col("a")])
@@ -23,7 +22,7 @@ def test_allh(constructor: Constructor, expr1: Any, expr2: Any) -> None:
     result = df.select(all=nw.all_horizontal(expr1, expr2))
 
     expected = {"all": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_allh_series(constructor_eager: ConstructorEager) -> None:
@@ -35,7 +34,7 @@ def test_allh_series(constructor_eager: ConstructorEager) -> None:
     result = df.select(all=nw.all_horizontal(df["a"], df["b"]))
 
     expected = {"all": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_allh_all(constructor: Constructor) -> None:
@@ -46,14 +45,17 @@ def test_allh_all(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(all=nw.all_horizontal(nw.all()))
     expected = {"all": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(nw.all_horizontal(nw.all()))
     expected = {"a": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_allh_nth(constructor: Constructor, request: pytest.FixtureRequest) -> None:
-    if "polars" in str(constructor) and parse_version(pl.__version__) < (1, 0):
+def test_allh_nth(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "polars" in str(constructor) and POLARS_VERSION < (1, 0):
         request.applymarker(pytest.mark.xfail)
     data = {
         "a": [False, False, True],
@@ -62,10 +64,10 @@ def test_allh_nth(constructor: Constructor, request: pytest.FixtureRequest) -> N
     df = nw.from_native(constructor(data))
     result = df.select(nw.all_horizontal(nw.nth(0, 1)))
     expected = {"a": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(nw.all_horizontal(nw.col("a"), nw.nth(0)))
     expected = {"a": [False, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_horizontal_expressions_empty(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/any_all_test.py b/tests/expr_and_series/any_all_test.py
index 2406cdcff1..c5f22ad9af 100644
--- a/tests/expr_and_series/any_all_test.py
+++ b/tests/expr_and_series/any_all_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_any_all(constructor: Constructor) -> None:
@@ -18,10 +18,10 @@ def test_any_all(constructor: Constructor) -> None:
     )
     result = df.select(nw.col("a", "b", "c").all())
     expected = {"a": [False], "b": [True], "c": [False]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(nw.all().any())
     expected = {"a": [True], "b": [True], "c": [False]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_any_all_series(constructor_eager: ConstructorEager) -> None:
@@ -37,7 +37,7 @@ def test_any_all_series(constructor_eager: ConstructorEager) -> None:
     )
     result = {"a": [df["a"].all()], "b": [df["b"].all()], "c": [df["c"].all()]}
     expected = {"a": [False], "b": [True], "c": [False]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"a": [df["a"].any()], "b": [df["b"].any()], "c": [df["c"].any()]}
     expected = {"a": [True], "b": [True], "c": [False]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/any_horizontal_test.py b/tests/expr_and_series/any_horizontal_test.py
index d98cd34d6a..4eb082b515 100644
--- a/tests/expr_and_series/any_horizontal_test.py
+++ b/tests/expr_and_series/any_horizontal_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("expr1", ["a", nw.col("a")])
@@ -20,7 +20,7 @@ def test_anyh(constructor: Constructor, expr1: Any, expr2: Any) -> None:
     result = df.select(any=nw.any_horizontal(expr1, expr2))
 
     expected = {"any": [False, True, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_anyh_all(constructor: Constructor) -> None:
@@ -31,7 +31,7 @@ def test_anyh_all(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(any=nw.any_horizontal(nw.all()))
     expected = {"any": [False, True, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(nw.any_horizontal(nw.all()))
     expected = {"a": [False, True, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/arg_true_test.py b/tests/expr_and_series/arg_true_test.py
index 37d65b7d17..1c9c976f6e 100644
--- a/tests/expr_and_series/arg_true_test.py
+++ b/tests/expr_and_series/arg_true_test.py
@@ -3,19 +3,19 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_arg_true(constructor: Constructor) -> None:
     df = nw.from_native(constructor({"a": [1, None, None, 3]}))
     result = df.select(nw.col("a").is_null().arg_true())
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_arg_true_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager({"a": [1, None, None, 3]}), eager_only=True)
     result = df.select(df["a"].is_null().arg_true())
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     assert "a" in df  # cheeky test to hit `__contains__` method
diff --git a/tests/expr_and_series/arithmetic_test.py b/tests/expr_and_series/arithmetic_test.py
index eb283667fe..95172bd2cb 100644
--- a/tests/expr_and_series/arithmetic_test.py
+++ b/tests/expr_and_series/arithmetic_test.py
@@ -11,10 +11,10 @@
 from hypothesis import given
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -45,7 +45,7 @@ def test_arithmetic_expr(
     data = {"a": [1.0, 2, 3]}
     df = nw.from_native(constructor(data))
     result = df.select(getattr(nw.col("a"), attr)(rhs))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -75,7 +75,7 @@ def test_right_arithmetic_expr(
     data = {"a": [1, 2, 3]}
     df = nw.from_native(constructor(data))
     result = df.select(a=getattr(nw.col("a"), attr)(rhs))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -106,7 +106,7 @@ def test_arithmetic_series(
     data = {"a": [1, 2, 3]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(getattr(df["a"], attr)(rhs))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -136,7 +136,7 @@ def test_right_arithmetic_series(
     data = {"a": [1, 2, 3]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(a=getattr(df["a"], attr)(rhs))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 def test_truediv_same_dims(
@@ -148,9 +148,9 @@ def test_truediv_same_dims(
     s_left = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
     s_right = nw.from_native(constructor_eager({"a": [2, 2, 1]}), eager_only=True)["a"]
     result = s_left / s_right
-    compare_dicts({"a": result}, {"a": [0.5, 1.0, 3.0]})
+    assert_equal_data({"a": result}, {"a": [0.5, 1.0, 3.0]})
     result = s_left.__rtruediv__(s_right)
-    compare_dicts({"a": result}, {"a": [2, 1, 1 / 3]})
+    assert_equal_data({"a": result}, {"a": [2, 1, 1 / 3]})
 
 
 @pytest.mark.slow
@@ -158,9 +158,7 @@ def test_truediv_same_dims(
     left=st.integers(-100, 100),
     right=st.integers(-100, 100),
 )
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < (2, 0), reason="convert_dtypes not available"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="convert_dtypes not available")
 def test_floordiv(left: int, right: int) -> None:
     # hypothesis complains if we add `constructor` as an argument, so this
     # test is a bit manual unfortunately
@@ -169,8 +167,8 @@ def test_floordiv(left: int, right: int) -> None:
     result = nw.from_native(pd.DataFrame({"a": [left]}), eager_only=True).select(
         nw.col("a") // right
     )
-    compare_dicts(result, expected)
-    if parse_version(pd.__version__) < (2, 2):  # pragma: no cover
+    assert_equal_data(result, expected)
+    if PANDAS_VERSION < (2, 2):  # pragma: no cover
         # Bug in old version of pandas
         pass
     else:
@@ -178,19 +176,19 @@ def test_floordiv(left: int, right: int) -> None:
             pd.DataFrame({"a": [left]}).convert_dtypes(dtype_backend="pyarrow"),
             eager_only=True,
         ).select(nw.col("a") // right)
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
     result = nw.from_native(
         pd.DataFrame({"a": [left]}).convert_dtypes(), eager_only=True
     ).select(nw.col("a") // right)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = nw.from_native(pl.DataFrame({"a": [left]}), eager_only=True).select(
         nw.col("a") // right
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = nw.from_native(pa.table({"a": [left]}), eager_only=True).select(
         nw.col("a") // right
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.slow
@@ -198,9 +196,7 @@ def test_floordiv(left: int, right: int) -> None:
     left=st.integers(-100, 100),
     right=st.integers(-100, 100),
 )
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < (2, 0), reason="convert_dtypes not available"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="convert_dtypes not available")
 def test_mod(left: int, right: int) -> None:
     # hypothesis complains if we add `constructor` as an argument, so this
     # test is a bit manual unfortunately
@@ -209,16 +205,16 @@ def test_mod(left: int, right: int) -> None:
     result = nw.from_native(pd.DataFrame({"a": [left]}), eager_only=True).select(
         nw.col("a") % right
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = nw.from_native(
         pd.DataFrame({"a": [left]}).convert_dtypes(), eager_only=True
     ).select(nw.col("a") % right)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = nw.from_native(pl.DataFrame({"a": [left]}), eager_only=True).select(
         nw.col("a") % right
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = nw.from_native(pa.table({"a": [left]}), eager_only=True).select(
         nw.col("a") % right
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/binary_test.py b/tests/expr_and_series/binary_test.py
index 6826cda37d..3693ccebd8 100644
--- a/tests/expr_and_series/binary_test.py
+++ b/tests/expr_and_series/binary_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_expr_binary(constructor: Constructor) -> None:
@@ -43,4 +43,4 @@ def test_expr_binary(constructor: Constructor) -> None:
         "l": [0, 1, 1],
         "m": [1, 9, 4],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py
index 2229c8abb1..11c20d0a7a 100644
--- a/tests/expr_and_series/cast_test.py
+++ b/tests/expr_and_series/cast_test.py
@@ -5,13 +5,13 @@
 from datetime import timezone
 
 import pandas as pd
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 from tests.utils import is_windows
 
 data = {
@@ -53,10 +53,13 @@
 
 
 @pytest.mark.filterwarnings("ignore:casting period[M] values to int64:FutureWarning")
-def test_cast(constructor: Constructor, request: pytest.FixtureRequest) -> None:
-    if "pyarrow_table_constructor" in str(constructor) and parse_version(
-        pa.__version__
-    ) <= (15,):  # pragma: no cover
+def test_cast(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "pyarrow_table_constructor" in str(constructor) and PYARROW_VERSION <= (
+        15,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
     if "modin" in str(constructor):
         # TODO(unassigned): in modin, we end up with `'<U0'` dtype
@@ -103,10 +106,13 @@ def test_cast(constructor: Constructor, request: pytest.FixtureRequest) -> None:
     assert dict(result.collect_schema()) == expected
 
 
-def test_cast_series(constructor: Constructor, request: pytest.FixtureRequest) -> None:
-    if "pyarrow_table_constructor" in str(constructor) and parse_version(
-        pa.__version__
-    ) <= (15,):  # pragma: no cover
+def test_cast_series(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "pyarrow_table_constructor" in str(constructor) and PYARROW_VERSION <= (
+        15,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
     if "modin" in str(constructor):
         # TODO(unassigned): in modin, we end up with `'<U0'` dtype
@@ -157,10 +163,7 @@ def test_cast_series(constructor: Constructor, request: pytest.FixtureRequest) -
     assert result.schema == expected
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("1.0.0"),
-    reason="too old for convert_dtypes",
-)
+@pytest.mark.skipif(PANDAS_VERSION < (1, 0, 0), reason="too old for convert_dtypes")
 def test_cast_string() -> None:
     s_pd = pd.Series([1, 2]).convert_dtypes()
     s = nw.from_native(s_pd, series_only=True)
@@ -170,11 +173,12 @@ def test_cast_string() -> None:
 
 
 def test_cast_raises_for_unknown_dtype(
-    constructor: Constructor, request: pytest.FixtureRequest
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
 ) -> None:
-    if "pyarrow_table_constructor" in str(constructor) and parse_version(
-        pa.__version__
-    ) <= (15,):  # pragma: no cover
+    if "pyarrow_table_constructor" in str(constructor) and PYARROW_VERSION <= (
+        15,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
     if "polars" in str(constructor):
         request.applymarker(pytest.mark.xfail)
@@ -217,4 +221,4 @@ def test_cast_datetime_tz_aware(
         .cast(nw.String())
         .str.slice(offset=0, length=19)
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/cat/get_categories_test.py b/tests/expr_and_series/cat/get_categories_test.py
index a5b093d90c..9be209ab27 100644
--- a/tests/expr_and_series/cat/get_categories_test.py
+++ b/tests/expr_and_series/cat/get_categories_test.py
@@ -4,19 +4,18 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import PYARROW_VERSION
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": ["one", "two", "two"]}
 
 
 def test_get_categories(
-    request: pytest.FixtureRequest, constructor_eager: Constructor
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("15.0.0"):
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (15, 0, 0):
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
@@ -24,10 +23,10 @@ def test_get_categories(
     expected = {"a": ["one", "two"]}
 
     result_expr = df.select(nw.col("a").cat.get_categories())
-    compare_dicts(result_expr, expected)
+    assert_equal_data(result_expr, expected)
 
     result_series = df["a"].cat.get_categories()
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
 
 
 def test_get_categories_pyarrow() -> None:
@@ -40,7 +39,7 @@ def test_get_categories_pyarrow() -> None:
     expected = {"a": ["a", "b", "d"]}
 
     result_expr = df.select(nw.col("a").cat.get_categories())
-    compare_dicts(result_expr, expected)
+    assert_equal_data(result_expr, expected)
 
     result_series = df["a"].cat.get_categories()
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
diff --git a/tests/expr_and_series/clip_test.py b/tests/expr_and_series/clip_test.py
index 14496fc490..86fe7dadb0 100644
--- a/tests/expr_and_series/clip_test.py
+++ b/tests/expr_and_series/clip_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_clip(constructor: Constructor) -> None:
@@ -18,7 +18,7 @@ def test_clip(constructor: Constructor) -> None:
         "upper_only": [1, 2, 3, -4, 4],
         "both": [3, 3, 3, 3, 4],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_clip_series(constructor_eager: ConstructorEager) -> None:
@@ -34,4 +34,4 @@ def test_clip_series(constructor_eager: ConstructorEager) -> None:
         "upper_only": [1, 2, 3, -4, 4],
         "both": [3, 3, 3, 3, 4],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/concat_str_test.py b/tests/expr_and_series/concat_str_test.py
index 5a28085a8c..26366d2f29 100644
--- a/tests/expr_and_series/concat_str_test.py
+++ b/tests/expr_and_series/concat_str_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 2, 3],
@@ -40,7 +40,7 @@ def test_concat_str(
         .sort("a")
         .select("full_sentence")
     )
-    compare_dicts(result, {"full_sentence": expected})
+    assert_equal_data(result, {"full_sentence": expected})
     result = (
         df.select(
             "a",
@@ -55,4 +55,4 @@ def test_concat_str(
         .sort("a")
         .select("full_sentence")
     )
-    compare_dicts(result, {"full_sentence": expected})
+    assert_equal_data(result, {"full_sentence": expected})
diff --git a/tests/expr_and_series/convert_time_zone_test.py b/tests/expr_and_series/convert_time_zone_test.py
index 7914c8b56e..fbe33f9a26 100644
--- a/tests/expr_and_series/convert_time_zone_test.py
+++ b/tests/expr_and_series/convert_time_zone_test.py
@@ -4,15 +4,14 @@
 from datetime import timezone
 from typing import TYPE_CHECKING
 
-import pandas as pd
-import polars as pl
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import POLARS_VERSION
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 from tests.utils import is_windows
 
 if TYPE_CHECKING:
@@ -20,14 +19,12 @@
 
 
 def test_convert_time_zone(
-    constructor: Constructor, request: pytest.FixtureRequest
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
 ) -> None:
     if (
         (any(x in str(constructor) for x in ("pyarrow", "modin")) and is_windows())
-        or (
-            "pandas_pyarrow" in str(constructor)
-            and parse_version(pd.__version__) < (2, 1)
-        )
+        or ("pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1))
         or ("cudf" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
@@ -44,18 +41,16 @@ def test_convert_time_zone(
     assert result_dtype.time_zone == "Asia/Kathmandu"  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M%z"))
     expected = {"a": ["2020-01-01T05:45+0545", "2020-01-02T05:45+0545"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_convert_time_zone_series(
-    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
+    constructor_eager: ConstructorEager,
+    request: pytest.FixtureRequest,
 ) -> None:
     if (
         (any(x in str(constructor_eager) for x in ("pyarrow", "modin")) and is_windows())
-        or (
-            "pandas_pyarrow" in str(constructor_eager)
-            and parse_version(pd.__version__) < (2, 1)
-        )
+        or ("pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2, 1))
         or ("cudf" in str(constructor_eager))
     ):
         request.applymarker(pytest.mark.xfail)
@@ -72,7 +67,7 @@ def test_convert_time_zone_series(
     assert result_dtype.time_zone == "Asia/Kathmandu"  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M%z"))
     expected = {"a": ["2020-01-01T05:45+0545", "2020-01-02T05:45+0545"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_convert_time_zone_from_none(
@@ -80,15 +75,12 @@ def test_convert_time_zone_from_none(
 ) -> None:
     if (
         (any(x in str(constructor) for x in ("pyarrow", "modin")) and is_windows())
-        or (
-            "pandas_pyarrow" in str(constructor)
-            and parse_version(pd.__version__) < (2, 1)
-        )
-        or ("pyarrow_table" in str(constructor) and parse_version(pa.__version__) < (12,))
+        or ("pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1))
+        or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,))
         or ("cudf" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
-    if "polars" in str(constructor) and parse_version(pl.__version__) < (0, 20, 7):
+    if "polars" in str(constructor) and POLARS_VERSION < (0, 20, 7):
         # polars used to disallow this
         request.applymarker(pytest.mark.xfail)
     data = {
@@ -106,7 +98,7 @@ def test_convert_time_zone_from_none(
     assert result_dtype.time_zone == "Asia/Kathmandu"  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M%z"))
     expected = {"a": ["2020-01-01T05:45+0545", "2020-01-02T05:45+0545"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_convert_time_zone_to_none(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/count_test.py b/tests/expr_and_series/count_test.py
index 603a6daf88..d2048db332 100644
--- a/tests/expr_and_series/count_test.py
+++ b/tests/expr_and_series/count_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_count(constructor: Constructor) -> None:
@@ -11,7 +11,7 @@ def test_count(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a", "b", "z").count())
     expected = {"a": [3], "b": [2], "z": [1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_count_series(constructor_eager: ConstructorEager) -> None:
@@ -19,4 +19,4 @@ def test_count_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = {"a": [df["a"].count()], "b": [df["b"].count()], "z": [df["z"].count()]}
     expected = {"a": [3], "b": [2], "z": [1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/cum_sum_test.py b/tests/expr_and_series/cum_sum_test.py
index e94bd168cf..b60e36065b 100644
--- a/tests/expr_and_series/cum_sum_test.py
+++ b/tests/expr_and_series/cum_sum_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [0, 1, 2, 3, 4],
@@ -20,7 +20,7 @@ def test_cum_sum_simple(constructor: Constructor) -> None:
         "b": [1, 3, 6, 11, 14],
         "c": [5, 9, 12, 14, 15],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_cum_sum_simple_series(constructor_eager: ConstructorEager) -> None:
@@ -35,4 +35,4 @@ def test_cum_sum_simple_series(constructor_eager: ConstructorEager) -> None:
         df["b"].cum_sum(),
         df["c"].cum_sum(),
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/diff_test.py b/tests/expr_and_series/diff_test.py
index c62b68d40a..da433f7adc 100644
--- a/tests/expr_and_series/diff_test.py
+++ b/tests/expr_and_series/diff_test.py
@@ -1,13 +1,12 @@
 from __future__ import annotations
 
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "i": [0, 1, 2, 3, 4],
@@ -16,10 +15,11 @@
 }
 
 
-def test_diff(constructor: Constructor, request: pytest.FixtureRequest) -> None:
-    if "pyarrow_table_constructor" in str(constructor) and parse_version(
-        pa.__version__
-    ) < (13,):
+def test_diff(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "pyarrow_table_constructor" in str(constructor) and PYARROW_VERSION < (13,):
         # pc.pairwisediff is available since pyarrow 13.0.0
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor(data))
@@ -30,15 +30,14 @@ def test_diff(constructor: Constructor, request: pytest.FixtureRequest) -> None:
         "c": [4, 3, 2, 1],
         "c_diff": [-1, -1, -1, -1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_diff_series(
-    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
+    constructor_eager: ConstructorEager,
+    request: pytest.FixtureRequest,
 ) -> None:
-    if "pyarrow_table_constructor" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < (13,):
+    if "pyarrow_table_constructor" in str(constructor_eager) and PYARROW_VERSION < (13,):
         # pc.pairwisediff is available since pyarrow 13.0.0
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor_eager(data), eager_only=True)
@@ -49,4 +48,4 @@ def test_diff_series(
         "c_diff": [-1, -1, -1, -1],
     }
     result = df.with_columns(c_diff=df["c"].diff())[1:]
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/double_selected_test.py b/tests/expr_and_series/double_selected_test.py
index 001e1f8487..9eb918924b 100644
--- a/tests/expr_and_series/double_selected_test.py
+++ b/tests/expr_and_series/double_selected_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_double_selected(constructor: Constructor) -> None:
@@ -11,12 +11,12 @@ def test_double_selected(constructor: Constructor) -> None:
 
     result = df.select(nw.col("a", "b") * 2)
     expected = {"a": [2, 6, 4], "b": [8, 8, 12]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.select("z", nw.col("a", "b") * 2)
     expected = {"z": [7, 8, 9], "a": [2, 6, 4], "b": [8, 8, 12]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.select("a").select(nw.col("a") + nw.all())
     expected = {"a": [2, 6, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/double_test.py b/tests/expr_and_series/double_test.py
index 66af086db5..321defad2c 100644
--- a/tests/expr_and_series/double_test.py
+++ b/tests/expr_and_series/double_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_double(constructor: Constructor) -> None:
@@ -10,7 +10,7 @@ def test_double(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.with_columns(nw.all() * 2)
     expected = {"a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_double_alias(constructor: Constructor) -> None:
@@ -23,4 +23,4 @@ def test_double_alias(constructor: Constructor) -> None:
         "b": [8, 8, 12],
         "z": [14.0, 16.0, 18.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/drop_nulls_test.py b/tests/expr_and_series/drop_nulls_test.py
index e0da67f66c..b0e05f5618 100644
--- a/tests/expr_and_series/drop_nulls_test.py
+++ b/tests/expr_and_series/drop_nulls_test.py
@@ -7,7 +7,7 @@
 from narwhals.utils import parse_version
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 2, None],
@@ -30,10 +30,10 @@ def test_drop_nulls(constructor: Constructor) -> None:
     expected_c = {"c": []}  # type: ignore[var-annotated]
     expected_d = {"d": [6]}
 
-    compare_dicts(result_a, expected_a)
-    compare_dicts(result_b, expected_b)
-    compare_dicts(result_c, expected_c)
-    compare_dicts(result_d, expected_d)
+    assert_equal_data(result_a, expected_a)
+    assert_equal_data(result_b, expected_b)
+    assert_equal_data(result_c, expected_c)
+    assert_equal_data(result_d, expected_d)
 
 
 def test_drop_nulls_broadcast(
@@ -46,7 +46,7 @@ def test_drop_nulls_broadcast(
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a").drop_nulls(), nw.col("d").drop_nulls())
     expected = {"a": [1.0, 2.0], "d": [6, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_drop_nulls_invalid(constructor: Constructor) -> None:
@@ -68,7 +68,7 @@ def test_drop_nulls_series(constructor_eager: ConstructorEager) -> None:
     expected_c = {"c": []}  # type: ignore[var-annotated]
     expected_d = {"d": [6]}
 
-    compare_dicts(result_a, expected_a)
-    compare_dicts(result_b, expected_b)
-    compare_dicts(result_c, expected_c)
-    compare_dicts(result_d, expected_d)
+    assert_equal_data(result_a, expected_a)
+    assert_equal_data(result_b, expected_b)
+    assert_equal_data(result_c, expected_c)
+    assert_equal_data(result_d, expected_d)
diff --git a/tests/expr_and_series/dt/datetime_attributes_test.py b/tests/expr_and_series/dt/datetime_attributes_test.py
index 017daace60..0e4c7c992f 100644
--- a/tests/expr_and_series/dt/datetime_attributes_test.py
+++ b/tests/expr_and_series/dt/datetime_attributes_test.py
@@ -8,7 +8,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [
@@ -51,7 +51,7 @@ def test_datetime_attributes(
 
     df = nw.from_native(constructor(data))
     result = df.select(getattr(nw.col("a").dt, attribute)())
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -87,7 +87,7 @@ def test_datetime_attributes_series(
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(getattr(df["a"].dt, attribute)())
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 def test_datetime_chained_attributes(
@@ -100,19 +100,22 @@ def test_datetime_chained_attributes(
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(df["a"].dt.date().dt.year())
-    compare_dicts(result, {"a": [2021, 2020]})
+    assert_equal_data(result, {"a": [2021, 2020]})
 
     result = df.select(nw.col("a").dt.date().dt.year())
-    compare_dicts(result, {"a": [2021, 2020]})
+    assert_equal_data(result, {"a": [2021, 2020]})
 
 
 def test_to_date(request: pytest.FixtureRequest, constructor: Constructor) -> None:
     if any(
         x in str(constructor)
-        for x in ("pandas_constructor", "pandas_nullable_constructor", "dask")
+        for x in ("pandas_constructor", "pandas_nullable_constructor", "cudf")
     ):
         request.applymarker(pytest.mark.xfail)
     dates = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]}
-    df = nw.from_native(constructor(dates))
+    if "dask" in str(constructor):
+        df = nw.from_native(constructor(dates).astype({"a": "timestamp[ns][pyarrow]"}))  # type: ignore[union-attr]
+    else:
+        df = nw.from_native(constructor(dates))
     result = df.select(nw.col("a").dt.date())
     assert result.collect_schema() == {"a": nw.Date}
diff --git a/tests/expr_and_series/dt/datetime_duration_test.py b/tests/expr_and_series/dt/datetime_duration_test.py
index 3e4894b0be..09f227c79e 100644
--- a/tests/expr_and_series/dt/datetime_duration_test.py
+++ b/tests/expr_and_series/dt/datetime_duration_test.py
@@ -3,16 +3,15 @@
 from datetime import timedelta
 
 import numpy as np
-import pandas as pd
 import pyarrow as pa
 import pyarrow.compute as pc
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [
@@ -45,21 +44,19 @@ def test_duration_attributes(
     expected_b: list[int],
     expected_c: list[int],
 ) -> None:
-    if parse_version(pd.__version__) < (2, 2) and "pandas_pyarrow" in str(constructor):
-        request.applymarker(pytest.mark.xfail)
-    if "cudf" in str(constructor):
+    if PANDAS_VERSION < (2, 2) and "pandas_pyarrow" in str(constructor):
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor(data))
 
     result_a = df.select(getattr(nw.col("a").dt, attribute)().fill_null(0))
-    compare_dicts(result_a, {"a": expected_a})
+    assert_equal_data(result_a, {"a": expected_a})
 
     result_b = df.select(getattr(nw.col("b").dt, attribute)().fill_null(0))
-    compare_dicts(result_b, {"b": expected_b})
+    assert_equal_data(result_b, {"b": expected_b})
 
     result_c = df.select(getattr(nw.col("c").dt, attribute)().fill_null(0))
-    compare_dicts(result_c, {"c": expected_c})
+    assert_equal_data(result_c, {"c": expected_c})
 
 
 @pytest.mark.parametrize(
@@ -80,23 +77,19 @@ def test_duration_attributes_series(
     expected_b: list[int],
     expected_c: list[int],
 ) -> None:
-    if parse_version(pd.__version__) < (2, 2) and "pandas_pyarrow" in str(
-        constructor_eager
-    ):
-        request.applymarker(pytest.mark.xfail)
-    if "cudf" in str(constructor_eager):
+    if PANDAS_VERSION < (2, 2) and "pandas_pyarrow" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
 
     result_a = df.select(getattr(df["a"].dt, attribute)().fill_null(0))
-    compare_dicts(result_a, {"a": expected_a})
+    assert_equal_data(result_a, {"a": expected_a})
 
     result_b = df.select(getattr(df["b"].dt, attribute)().fill_null(0))
-    compare_dicts(result_b, {"b": expected_b})
+    assert_equal_data(result_b, {"b": expected_b})
 
     result_c = df.select(getattr(df["c"].dt, attribute)().fill_null(0))
-    compare_dicts(result_c, {"c": expected_c})
+    assert_equal_data(result_c, {"c": expected_c})
 
 
 @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
@@ -116,7 +109,7 @@ def test_pyarrow_units(unit: str, attribute: str, expected: int) -> None:
     df = nw.from_native(pa.table({"a": arr}), eager_only=True)
 
     result_expr = df.select(getattr(nw.col("a").dt, attribute)().fill_null(0))
-    compare_dicts(result_expr, {"a": [0, expected]})
+    assert_equal_data(result_expr, {"a": [0, expected]})
 
     result_series = df.select(getattr(df["a"].dt, attribute)().fill_null(0))
-    compare_dicts(result_series, {"a": [0, expected]})
+    assert_equal_data(result_series, {"a": [0, expected]})
diff --git a/tests/expr_and_series/dt/ordinal_day_test.py b/tests/expr_and_series/dt/ordinal_day_test.py
index 2681188dfe..82e30d8a12 100644
--- a/tests/expr_and_series/dt/ordinal_day_test.py
+++ b/tests/expr_and_series/dt/ordinal_day_test.py
@@ -9,12 +9,12 @@
 from hypothesis import given
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 
 @given(dates=st.datetimes(min_value=datetime(1960, 1, 1), max_value=datetime(1980, 1, 1)))  # type: ignore[misc]
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"),
+    PANDAS_VERSION < (2, 0, 0),
     reason="pyarrow dtype not available",
 )
 @pytest.mark.slow
diff --git a/tests/expr_and_series/dt/timestamp_test.py b/tests/expr_and_series/dt/timestamp_test.py
new file mode 100644
index 0000000000..212926628e
--- /dev/null
+++ b/tests/expr_and_series/dt/timestamp_test.py
@@ -0,0 +1,221 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Literal
+
+import hypothesis.strategies as st
+import pandas as pd
+import pyarrow as pa
+import pytest
+from hypothesis import given
+
+import narwhals.stable.v1 as nw
+from tests.utils import PANDAS_VERSION
+from tests.utils import POLARS_VERSION
+from tests.utils import PYARROW_VERSION
+from tests.utils import Constructor
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
+from tests.utils import is_windows
+
+data = {
+    "a": [
+        datetime(2021, 3, 1, 12, 34, 56, 49000),
+        datetime(2020, 1, 2, 2, 4, 14, 715000),
+    ],
+}
+
+
+@pytest.mark.parametrize(
+    ("original_time_unit", "time_unit", "expected"),
+    [
+        ("ns", "ns", [978307200000000000, None, 978480000000000000]),
+        ("ns", "us", [978307200000000, None, 978480000000000]),
+        ("ns", "ms", [978307200000, None, 978480000000]),
+        ("us", "ns", [978307200000000000, None, 978480000000000000]),
+        ("us", "us", [978307200000000, None, 978480000000000]),
+        ("us", "ms", [978307200000, None, 978480000000]),
+        ("ms", "ns", [978307200000000000, None, 978480000000000000]),
+        ("ms", "us", [978307200000000, None, 978480000000000]),
+        ("ms", "ms", [978307200000, None, 978480000000]),
+        ("s", "ns", [978307200000000000, None, 978480000000000000]),
+        ("s", "us", [978307200000000, None, 978480000000000]),
+        ("s", "ms", [978307200000, None, 978480000000]),
+    ],
+)
+def test_timestamp_datetimes(
+    request: pytest.FixtureRequest,
+    constructor: Constructor,
+    original_time_unit: Literal["us", "ns", "ms", "s"],
+    time_unit: Literal["ns", "us", "ms"],
+    expected: list[int | None],
+) -> None:
+    if original_time_unit == "s" and "polars" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+    if "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (
+        2,
+        2,
+    ):  # pragma: no cover
+        # pyarrow-backed timestamps were too inconsistent and unreliable before 2.2
+        request.applymarker(pytest.mark.xfail(strict=False))
+    datetimes = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]}
+    df = nw.from_native(constructor(datetimes))
+    result = df.select(
+        nw.col("a").cast(nw.Datetime(original_time_unit)).dt.timestamp(time_unit)
+    )
+    assert_equal_data(result, {"a": expected})
+
+
+@pytest.mark.parametrize(
+    ("original_time_unit", "time_unit", "expected"),
+    [
+        ("ns", "ns", [978307200000000000, None, 978480000000000000]),
+        ("ns", "us", [978307200000000, None, 978480000000000]),
+        ("ns", "ms", [978307200000, None, 978480000000]),
+        ("us", "ns", [978307200000000000, None, 978480000000000000]),
+        ("us", "us", [978307200000000, None, 978480000000000]),
+        ("us", "ms", [978307200000, None, 978480000000]),
+        ("ms", "ns", [978307200000000000, None, 978480000000000000]),
+        ("ms", "us", [978307200000000, None, 978480000000000]),
+        ("ms", "ms", [978307200000, None, 978480000000]),
+        ("s", "ns", [978307200000000000, None, 978480000000000000]),
+        ("s", "us", [978307200000000, None, 978480000000000]),
+        ("s", "ms", [978307200000, None, 978480000000]),
+    ],
+)
+def test_timestamp_datetimes_tz_aware(
+    request: pytest.FixtureRequest,
+    constructor: Constructor,
+    original_time_unit: Literal["us", "ns", "ms", "s"],
+    time_unit: Literal["ns", "us", "ms"],
+    expected: list[int | None],
+) -> None:
+    if (
+        (any(x in str(constructor) for x in ("pyarrow",)) and is_windows())
+        or ("pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2,))
+        or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,))
+    ):
+        request.applymarker(pytest.mark.xfail)
+    if "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (
+        2,
+        2,
+    ):  # pragma: no cover
+        # pyarrow-backed timestamps were too inconsistent and unreliable before 2.2
+        request.applymarker(pytest.mark.xfail(strict=False))
+    if "dask" in str(constructor) and PANDAS_VERSION < (
+        2,
+        1,
+    ):  # pragma: no cover
+        request.applymarker(pytest.mark.xfail)
+
+    if original_time_unit == "s" and "polars" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+    datetimes = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]}
+    df = nw.from_native(constructor(datetimes))
+    result = df.select(
+        nw.col("a")
+        .cast(nw.Datetime(original_time_unit))
+        .dt.replace_time_zone("UTC")
+        .dt.convert_time_zone("Asia/Kathmandu")
+        .dt.timestamp(time_unit)
+    )
+    assert_equal_data(result, {"a": expected})
+
+
+@pytest.mark.parametrize(
+    ("time_unit", "expected"),
+    [
+        ("ns", [978307200000000000, None, 978480000000000000]),
+        ("us", [978307200000000, None, 978480000000000]),
+        ("ms", [978307200000, None, 978480000000]),
+    ],
+)
+def test_timestamp_dates(
+    request: pytest.FixtureRequest,
+    constructor: Constructor,
+    time_unit: Literal["ns", "us", "ms"],
+    expected: list[int | None],
+) -> None:
+    if any(
+        x in str(constructor)
+        for x in ("pandas_constructor", "pandas_nullable_constructor", "cudf")
+    ):
+        request.applymarker(pytest.mark.xfail)
+
+    dates = {"a": [datetime(2001, 1, 1), None, datetime(2001, 1, 3)]}
+    if "dask" in str(constructor):
+        df = nw.from_native(
+            constructor(dates).astype({"a": "timestamp[ns][pyarrow]"})  # type: ignore[union-attr]
+        )
+    else:
+        df = nw.from_native(constructor(dates))
+    result = df.select(nw.col("a").dt.date().dt.timestamp(time_unit))
+    assert_equal_data(result, {"a": expected})
+
+
+def test_timestamp_invalid_date(
+    request: pytest.FixtureRequest, constructor: Constructor
+) -> None:
+    if "polars" in str(constructor):
+        request.applymarker(pytest.mark.xfail)
+    data_str = {"a": ["x", "y", None]}
+    data_num = {"a": [1, 2, None]}
+    df_str = nw.from_native(constructor(data_str))
+    df_num = nw.from_native(constructor(data_num))
+    msg = "Input should be either of Date or Datetime type"
+    with pytest.raises(TypeError, match=msg):
+        df_str.select(nw.col("a").dt.timestamp())
+    with pytest.raises(TypeError, match=msg):
+        df_num.select(nw.col("a").dt.timestamp())
+
+
+def test_timestamp_invalid_unit_expr(constructor: Constructor) -> None:
+    time_unit_invalid = "i"
+    msg = (
+        "invalid `time_unit`"
+        f"\n\nExpected one of {{'ns', 'us', 'ms'}}, got {time_unit_invalid!r}."
+    )
+    with pytest.raises(ValueError, match=msg):
+        nw.from_native(constructor(data)).select(
+            nw.col("a").dt.timestamp(time_unit_invalid)  # type: ignore[arg-type]
+        )
+
+
+def test_timestamp_invalid_unit_series(constructor_eager: ConstructorEager) -> None:
+    time_unit_invalid = "i"
+    msg = (
+        "invalid `time_unit`"
+        f"\n\nExpected one of {{'ns', 'us', 'ms'}}, got {time_unit_invalid!r}."
+    )
+    with pytest.raises(ValueError, match=msg):
+        nw.from_native(constructor_eager(data))["a"].dt.timestamp(time_unit_invalid)  # type: ignore[arg-type]
+
+
+@given(  # type: ignore[misc]
+    inputs=st.datetimes(min_value=datetime(1960, 1, 1), max_value=datetime(1980, 1, 1)),
+    time_unit=st.sampled_from(["ms", "us", "ns"]),
+    # We keep 'ms' out for now due to an upstream bug: https://github.com/pola-rs/polars/issues/19309
+    starting_time_unit=st.sampled_from(["us", "ns"]),
+)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 2), reason="bug in old pandas")
+@pytest.mark.skipif(POLARS_VERSION < (0, 20, 7), reason="bug in old Polars")
+def test_timestamp_hypothesis(
+    inputs: datetime,
+    time_unit: Literal["ms", "us", "ns"],
+    starting_time_unit: Literal["ms", "us", "ns"],
+) -> None:
+    import polars as pl
+
+    @nw.narwhalify
+    def func(s: nw.Series) -> nw.Series:
+        return s.dt.timestamp(time_unit)
+
+    result_pl = func(pl.Series([inputs], dtype=pl.Datetime(starting_time_unit)))
+    result_pd = func(pd.Series([inputs], dtype=f"datetime64[{starting_time_unit}]"))
+    result_pdpa = func(
+        pd.Series([inputs], dtype=f"timestamp[{starting_time_unit}][pyarrow]")
+    )
+    result_pa = func(pa.chunked_array([[inputs]], type=pa.timestamp(starting_time_unit)))
+    assert result_pl[0] == result_pd[0]
+    assert result_pl[0] == result_pdpa[0]
+    assert result_pl[0] == result_pa[0].as_py()
diff --git a/tests/expr_and_series/dt/to_string_test.py b/tests/expr_and_series/dt/to_string_test.py
index a6261b78a4..629b39806d 100644
--- a/tests/expr_and_series/dt/to_string_test.py
+++ b/tests/expr_and_series/dt/to_string_test.py
@@ -8,7 +8,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 from tests.utils import is_windows
 
 data = {
@@ -45,7 +45,7 @@ def test_dt_to_string_series(constructor_eager: ConstructorEager, fmt: str) -> N
         # the fraction of a second.
         result = {"a": input_series.dt.to_string(fmt).str.replace(r"\.\d+$", "")}
 
-    compare_dicts(result, {"a": expected_col})
+    assert_equal_data(result, {"a": expected_col})
 
 
 @pytest.mark.parametrize(
@@ -71,7 +71,7 @@ def test_dt_to_string_expr(constructor: Constructor, fmt: str) -> None:
         result = input_frame.select(
             nw.col("a").dt.to_string(fmt).str.replace(r"\.\d+$", "").alias("b")
         )
-    compare_dicts(result, {"b": expected_col})
+    assert_equal_data(result, {"b": expected_col})
 
 
 def _clean_string(result: str) -> str:
@@ -139,12 +139,12 @@ def test_dt_to_string_iso_local_datetime_expr(
     result = nw.from_native(df).with_columns(
         _clean_string_expr(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S.%f")).alias("b")
     )
-    compare_dicts(result, {"a": [data], "b": [_clean_string(expected)]})
+    assert_equal_data(result, {"a": [data], "b": [_clean_string(expected)]})
 
     result = nw.from_native(df).with_columns(
         _clean_string_expr(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M:%S%.f")).alias("b")
     )
-    compare_dicts(result, {"a": [data], "b": [_clean_string(expected)]})
+    assert_equal_data(result, {"a": [data], "b": [_clean_string(expected)]})
 
 
 @pytest.mark.parametrize(
@@ -172,4 +172,4 @@ def test_dt_to_string_iso_local_date_expr(
     result = nw.from_native(df).with_columns(
         nw.col("a").dt.to_string("%Y-%m-%d").alias("b")
     )
-    compare_dicts(result, {"a": [data], "b": [expected]})
+    assert_equal_data(result, {"a": [data], "b": [expected]})
diff --git a/tests/expr_and_series/dt/total_minutes_test.py b/tests/expr_and_series/dt/total_minutes_test.py
index bcd6644426..094c51cbfa 100644
--- a/tests/expr_and_series/dt/total_minutes_test.py
+++ b/tests/expr_and_series/dt/total_minutes_test.py
@@ -9,7 +9,7 @@
 from hypothesis import given
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 
 @given(
@@ -19,7 +19,7 @@
     )
 )  # type: ignore[misc]
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.2.0"),
+    PANDAS_VERSION < (2, 2, 0),
     reason="pyarrow dtype not available",
 )
 @pytest.mark.slow
diff --git a/tests/expr_and_series/fill_null_test.py b/tests/expr_and_series/fill_null_test.py
index a6315ae595..4711726988 100644
--- a/tests/expr_and_series/fill_null_test.py
+++ b/tests/expr_and_series/fill_null_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [0.0, None, 2, 3, 4],
@@ -21,7 +21,7 @@ def test_fill_null(constructor: Constructor) -> None:
         "b": [1.0, 99, 99, 5, 3],
         "c": [5.0, 99, 3, 2, 1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_fill_null_series(constructor_eager: ConstructorEager) -> None:
@@ -37,4 +37,4 @@ def test_fill_null_series(constructor_eager: ConstructorEager) -> None:
         b=df["b"].fill_null(99),
         c=df["c"].fill_null(99),
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/filter_test.py b/tests/expr_and_series/filter_test.py
index 37df33a7f4..4af03ec7db 100644
--- a/tests/expr_and_series/filter_test.py
+++ b/tests/expr_and_series/filter_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "i": [0, 1, 2, 3, 4],
@@ -17,14 +17,14 @@ def test_filter_expr(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a").filter(nw.col("i") < 2, nw.col("c") == 5))
     expected = {"a": [0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_filter_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.select(df["a"].filter((df["i"] < 2) & (df["c"] == 5)))
     expected = {"a": [0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result_s = df["a"].filter([True, False, False, False, False])
     expected = {"a": [0]}
-    compare_dicts({"a": result_s}, expected)
+    assert_equal_data({"a": result_s}, expected)
diff --git a/tests/expr_and_series/gather_every_test.py b/tests/expr_and_series/gather_every_test.py
index 2a2ce154bf..7ec7a62cfb 100644
--- a/tests/expr_and_series/gather_every_test.py
+++ b/tests/expr_and_series/gather_every_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": list(range(10))}
 
@@ -22,7 +22,7 @@ def test_gather_every_expr(
     result = df.select(nw.col("a").gather_every(n=n, offset=offset))
     expected = {"a": data["a"][offset::n]}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("n", [1, 2, 3])
@@ -35,4 +35,4 @@ def test_gather_every_series(
     result = series.gather_every(n=n, offset=offset)
     expected = data["a"][offset::n]
 
-    compare_dicts({"a": result}, {"a": expected})
+    assert_equal_data({"a": result}, {"a": expected})
diff --git a/tests/expr_and_series/head_test.py b/tests/expr_and_series/head_test.py
index dd2090025e..6e2cb38a0b 100644
--- a/tests/expr_and_series/head_test.py
+++ b/tests/expr_and_series/head_test.py
@@ -5,7 +5,7 @@
 import narwhals as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("n", [2, -1])
@@ -18,7 +18,7 @@ def test_head_expr(
     df = nw.from_native(constructor({"a": [1, 2, 3]}))
     result = df.select(nw.col("a").head(n))
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("n", [2, -1])
@@ -26,4 +26,4 @@ def test_head_series(constructor_eager: ConstructorEager, n: int) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)
     result = {"a": df["a"].head(n)}
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/is_between_test.py b/tests/expr_and_series/is_between_test.py
index 0550498b66..8d08c6fac6 100644
--- a/tests/expr_and_series/is_between_test.py
+++ b/tests/expr_and_series/is_between_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 4, 2, 5],
@@ -25,7 +25,7 @@ def test_is_between(constructor: Constructor, closed: str, expected: list[bool])
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a").is_between(1, 5, closed=closed))
     expected_dict = {"a": expected}
-    compare_dicts(result, expected_dict)
+    assert_equal_data(result, expected_dict)
 
 
 @pytest.mark.parametrize(
@@ -43,4 +43,4 @@ def test_is_between_series(
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df.with_columns(a=df["a"].is_between(1, 5, closed=closed))
     expected_dict = {"a": expected}
-    compare_dicts(result, expected_dict)
+    assert_equal_data(result, expected_dict)
diff --git a/tests/expr_and_series/is_duplicated_test.py b/tests/expr_and_series/is_duplicated_test.py
index d5c934a04d..2f5a8e32e7 100644
--- a/tests/expr_and_series/is_duplicated_test.py
+++ b/tests/expr_and_series/is_duplicated_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 1, 2], "b": [1, 2, 3], "index": [0, 1, 2]}
 
@@ -12,11 +12,11 @@ def test_is_duplicated_expr(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a", "b").is_duplicated(), "index").sort("index")
     expected = {"a": [True, True, False], "b": [False, False, False], "index": [0, 1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_is_duplicated_series(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = series.is_duplicated()
     expected = {"a": [True, True, False]}
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/is_first_distinct_test.py b/tests/expr_and_series/is_first_distinct_test.py
index c4ad865e38..7084fb3fb5 100644
--- a/tests/expr_and_series/is_first_distinct_test.py
+++ b/tests/expr_and_series/is_first_distinct_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 1, 2, 3, 2],
@@ -18,7 +18,7 @@ def test_is_first_distinct_expr(constructor: Constructor) -> None:
         "a": [True, False, True, True, False],
         "b": [True, True, True, False, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_is_first_distinct_series(constructor_eager: ConstructorEager) -> None:
@@ -27,4 +27,4 @@ def test_is_first_distinct_series(constructor_eager: ConstructorEager) -> None:
     expected = {
         "a": [True, False, True, True, False],
     }
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/is_in_test.py b/tests/expr_and_series/is_in_test.py
index 6a568053a9..ee0080af9e 100644
--- a/tests/expr_and_series/is_in_test.py
+++ b/tests/expr_and_series/is_in_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 4, 2, 5]}
 
@@ -15,7 +15,7 @@ def test_expr_is_in(constructor: Constructor) -> None:
     result = df.select(nw.col("a").is_in([4, 5]))
     expected = {"a": [False, True, False, True]}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_ser_is_in(constructor_eager: ConstructorEager) -> None:
@@ -23,7 +23,7 @@ def test_ser_is_in(constructor_eager: ConstructorEager) -> None:
     result = {"a": ser.is_in([4, 5])}
     expected = {"a": [False, True, False, True]}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_is_in_other(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/is_last_distinct_test.py b/tests/expr_and_series/is_last_distinct_test.py
index efad08dcb7..b91c171d34 100644
--- a/tests/expr_and_series/is_last_distinct_test.py
+++ b/tests/expr_and_series/is_last_distinct_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 1, 2, 3, 2],
@@ -18,7 +18,7 @@ def test_is_last_distinct_expr(constructor: Constructor) -> None:
         "a": [False, True, False, True, True],
         "b": [False, False, True, True, True],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_is_last_distinct_series(constructor_eager: ConstructorEager) -> None:
@@ -27,4 +27,4 @@ def test_is_last_distinct_series(constructor_eager: ConstructorEager) -> None:
     expected = {
         "a": [False, True, False, True, True],
     }
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/is_null_test.py b/tests/expr_and_series/is_null_test.py
index edc0e8953f..5d5250da99 100644
--- a/tests/expr_and_series/is_null_test.py
+++ b/tests/expr_and_series/is_null_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_null(constructor: Constructor) -> None:
@@ -12,7 +12,7 @@ def test_null(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data_na))
     result = df.select(nw.col("a").is_null(), ~nw.col("z").is_null())
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_null_series(constructor_eager: ConstructorEager) -> None:
@@ -21,4 +21,4 @@ def test_null_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data_na), eager_only=True)
     result = {"a": df["a"].is_null(), "z": ~df["z"].is_null()}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/is_unique_test.py b/tests/expr_and_series/is_unique_test.py
index 39d6fc071e..f5716c3fd4 100644
--- a/tests/expr_and_series/is_unique_test.py
+++ b/tests/expr_and_series/is_unique_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 1, 2],
@@ -20,7 +20,7 @@ def test_is_unique_expr(constructor: Constructor) -> None:
         "b": [True, True, True],
         "index": [0, 1, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_is_unique_series(constructor_eager: ConstructorEager) -> None:
@@ -29,4 +29,4 @@ def test_is_unique_series(constructor_eager: ConstructorEager) -> None:
     expected = {
         "a": [False, False, True],
     }
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/len_test.py b/tests/expr_and_series/len_test.py
index b91867abaf..9029a5b339 100644
--- a/tests/expr_and_series/len_test.py
+++ b/tests/expr_and_series/len_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_len_no_filter(constructor: Constructor) -> None:
@@ -14,7 +14,7 @@ def test_len_no_filter(constructor: Constructor) -> None:
         (nw.col("a").len() * 2).alias("l2"),
     )
 
-    compare_dicts(df, expected)
+    assert_equal_data(df, expected)
 
 
 def test_len_chaining(constructor: Constructor) -> None:
@@ -25,7 +25,7 @@ def test_len_chaining(constructor: Constructor) -> None:
         nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
     )
 
-    compare_dicts(df, expected)
+    assert_equal_data(df, expected)
 
 
 def test_namespace_len(constructor: Constructor) -> None:
@@ -33,14 +33,14 @@ def test_namespace_len(constructor: Constructor) -> None:
         nw.len(), a=nw.len()
     )
     expected = {"len": [3], "a": [3]}
-    compare_dicts(df, expected)
+    assert_equal_data(df, expected)
     df = (
         nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]}))
         .select()
         .select(nw.len(), a=nw.len())
     )
     expected = {"len": [0], "a": [0]}
-    compare_dicts(df, expected)
+    assert_equal_data(df, expected)
 
 
 def test_len_series(constructor_eager: ConstructorEager) -> None:
diff --git a/tests/expr_and_series/max_horizontal_test.py b/tests/expr_and_series/max_horizontal_test.py
index 8da95e3170..a489f9cb35 100644
--- a/tests/expr_and_series/max_horizontal_test.py
+++ b/tests/expr_and_series/max_horizontal_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, None, None], "b": [4, None, 6, None], "z": [3, 1, None, None]}
 expected_values = [4, 3, 6, float("nan")]
@@ -17,11 +17,11 @@ def test_maxh(constructor: Constructor, col_expr: Any) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(horizontal_max=nw.max_horizontal(col_expr, nw.col("b"), "z"))
     expected = {"horizontal_max": expected_values}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_maxh_all(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.max_horizontal(nw.all()), c=nw.max_horizontal(nw.all()))
     expected = {"a": expected_values, "c": expected_values}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/max_test.py b/tests/expr_and_series/max_test.py
index dcacc7d2ef..09483cb7d4 100644
--- a/tests/expr_and_series/max_test.py
+++ b/tests/expr_and_series/max_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -15,7 +15,7 @@ def test_expr_max_expr(constructor: Constructor, expr: nw.Expr) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(expr)
     expected = {"a": [3], "b": [6], "z": [9.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(("col", "expected"), [("a", 3), ("b", 6), ("z", 9.0)])
@@ -24,4 +24,4 @@ def test_expr_max_series(
 ) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)[col]
     result = series.max()
-    compare_dicts({col: [result]}, {col: [expected]})
+    assert_equal_data({col: [result]}, {col: [expected]})
diff --git a/tests/expr_and_series/mean_horizontal_test.py b/tests/expr_and_series/mean_horizontal_test.py
index eb78a868e4..31b4b21098 100644
--- a/tests/expr_and_series/mean_horizontal_test.py
+++ b/tests/expr_and_series/mean_horizontal_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("col_expr", [nw.col("a"), "a"])
@@ -15,7 +15,7 @@ def test_meanh(constructor: Constructor, col_expr: Any) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(horizontal_mean=nw.mean_horizontal(col_expr, nw.col("b")))
     expected = {"horizontal_mean": [2.5, 3.0, 6.0, float("nan")]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_meanh_all(constructor: Constructor) -> None:
@@ -25,9 +25,9 @@ def test_meanh_all(constructor: Constructor) -> None:
     expected = {
         "a": [6, 12, 18],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(c=nw.mean_horizontal(nw.all()))
     expected = {
         "c": [6, 12, 18],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/mean_test.py b/tests/expr_and_series/mean_test.py
index 0d381286ab..bab1fe821d 100644
--- a/tests/expr_and_series/mean_test.py
+++ b/tests/expr_and_series/mean_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 7], "z": [7.0, 8, 9]}
 
@@ -15,7 +15,7 @@ def test_expr_mean_expr(constructor: Constructor, expr: nw.Expr) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(expr)
     expected = {"a": [2.0], "b": [5.0], "z": [8.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(("col", "expected"), [("a", 2.0), ("b", 5.0), ("z", 8.0)])
@@ -24,4 +24,4 @@ def test_expr_mean_series(
 ) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)[col]
     result = series.mean()
-    compare_dicts({col: [result]}, {col: [expected]})
+    assert_equal_data({col: [result]}, {col: [expected]})
diff --git a/tests/expr_and_series/min_horizontal_test.py b/tests/expr_and_series/min_horizontal_test.py
index eaad0528fd..263b76e450 100644
--- a/tests/expr_and_series/min_horizontal_test.py
+++ b/tests/expr_and_series/min_horizontal_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, None, None], "b": [4, None, 6, None], "z": [3, 1, None, None]}
 expected_values = [1, 1, 6, float("nan")]
@@ -17,11 +17,11 @@ def test_minh(constructor: Constructor, col_expr: Any) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(horizontal_min=nw.min_horizontal(col_expr, nw.col("b"), "z"))
     expected = {"horizontal_min": expected_values}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_minh_all(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.min_horizontal(nw.all()), c=nw.min_horizontal(nw.all()))
     expected = {"a": expected_values, "c": expected_values}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/min_test.py b/tests/expr_and_series/min_test.py
index afd659df1d..f50facb3e8 100644
--- a/tests/expr_and_series/min_test.py
+++ b/tests/expr_and_series/min_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -15,7 +15,7 @@ def test_expr_min_expr(constructor: Constructor, expr: nw.Expr) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(expr)
     expected = {"a": [1], "b": [4], "z": [7.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(("col", "expected"), [("a", 1), ("b", 4), ("z", 7.0)])
@@ -24,4 +24,4 @@ def test_expr_min_series(
 ) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)[col]
     result = series.min()
-    compare_dicts({col: [result]}, {col: [expected]})
+    assert_equal_data({col: [result]}, {col: [expected]})
diff --git a/tests/expr_and_series/mode_test.py b/tests/expr_and_series/mode_test.py
index 2a3e8c092b..5e0d657328 100644
--- a/tests/expr_and_series/mode_test.py
+++ b/tests/expr_and_series/mode_test.py
@@ -1,13 +1,12 @@
 from __future__ import annotations
 
-import polars as pl
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 1, 2, 2, 3],
@@ -19,24 +18,25 @@ def test_mode_single_expr(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a").mode()).sort("a")
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_mode_multi_expr(
-    constructor: Constructor, request: pytest.FixtureRequest
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
 ) -> None:
     if "dask" in str(constructor) or (
-        "polars" in str(constructor) and parse_version(pl.__version__) >= (1, 7, 0)
+        "polars" in str(constructor) and POLARS_VERSION >= (1, 7, 0)
     ):
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a", "b").mode()).sort("a", "b")
     expected = {"a": [1, 2], "b": [3, 3]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_mode_series(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = series.mode().sort()
     expected = {"a": [1, 2]}
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/n_unique_test.py b/tests/expr_and_series/n_unique_test.py
index d54e815cc9..90bffb04bc 100644
--- a/tests/expr_and_series/n_unique_test.py
+++ b/tests/expr_and_series/n_unique_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1.0, None, None, 3.0],
@@ -15,11 +15,11 @@ def test_n_unique(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.all().n_unique())
     expected = {"a": [3], "b": [4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_n_unique_series(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     expected = {"a": [3], "b": [4]}
     result_series = {"a": [df["a"].n_unique()], "b": [df["b"].n_unique()]}
-    compare_dicts(result_series, expected)
+    assert_equal_data(result_series, expected)
diff --git a/tests/expr_and_series/name/keep_test.py b/tests/expr_and_series/name/keep_test.py
index be112d7162..6c89d09fc0 100644
--- a/tests/expr_and_series/name/keep_test.py
+++ b/tests/expr_and_series/name/keep_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 
@@ -16,14 +16,14 @@ def test_keep(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.keep())
     expected = {k: [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_keep_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo")).alias("alias_for_foo").name.keep())
     expected = {"foo": data["foo"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_keep_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/name/map_test.py b/tests/expr_and_series/name/map_test.py
index 5fad9f9302..5afda2ee88 100644
--- a/tests/expr_and_series/name/map_test.py
+++ b/tests/expr_and_series/name/map_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 
@@ -20,14 +20,14 @@ def test_map(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.map(function=map_func))
     expected = {map_func(k): [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_map_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo")).alias("alias_for_foo").name.map(function=map_func))
     expected = {map_func("foo"): data["foo"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_map_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/name/prefix_test.py b/tests/expr_and_series/name/prefix_test.py
index 95d72914f2..6f3fb3c9ba 100644
--- a/tests/expr_and_series/name/prefix_test.py
+++ b/tests/expr_and_series/name/prefix_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 prefix = "with_prefix_"
@@ -17,14 +17,14 @@ def test_prefix(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.prefix(prefix))
     expected = {prefix + str(k): [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_suffix_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo")).alias("alias_for_foo").name.prefix(prefix))
     expected = {prefix + "foo": data["foo"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_prefix_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/name/suffix_test.py b/tests/expr_and_series/name/suffix_test.py
index 1802f26f67..1c5816154c 100644
--- a/tests/expr_and_series/name/suffix_test.py
+++ b/tests/expr_and_series/name/suffix_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 suffix = "_with_suffix"
@@ -17,14 +17,14 @@ def test_suffix(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.suffix(suffix))
     expected = {str(k) + suffix: [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_suffix_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo")).alias("alias_for_foo").name.suffix(suffix))
     expected = {"foo" + suffix: data["foo"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_suffix_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/name/to_lowercase_test.py b/tests/expr_and_series/name/to_lowercase_test.py
index fedac9cd3f..882663f60a 100644
--- a/tests/expr_and_series/name/to_lowercase_test.py
+++ b/tests/expr_and_series/name/to_lowercase_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 
@@ -16,14 +16,14 @@ def test_to_lowercase(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.to_lowercase())
     expected = {k.lower(): [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_to_lowercase_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("BAR")).alias("ALIAS_FOR_BAR").name.to_lowercase())
     expected = {"bar": data["BAR"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_to_lowercase_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/name/to_uppercase_test.py b/tests/expr_and_series/name/to_uppercase_test.py
index 29b70bd997..785da4957a 100644
--- a/tests/expr_and_series/name/to_uppercase_test.py
+++ b/tests/expr_and_series/name/to_uppercase_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"foo": [1, 2, 3], "BAR": [4, 5, 6]}
 
@@ -16,14 +16,14 @@ def test_to_uppercase(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo", "BAR") * 2).name.to_uppercase())
     expected = {k.upper(): [e * 2 for e in v] for k, v in data.items()}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_to_uppercase_after_alias(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select((nw.col("foo")).alias("alias_for_foo").name.to_uppercase())
     expected = {"FOO": data["foo"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_to_uppercase_raise_anonymous(constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/nth_test.py b/tests/expr_and_series/nth_test.py
index 00a8b5c9d0..8179fb261c 100644
--- a/tests/expr_and_series/nth_test.py
+++ b/tests/expr_and_series/nth_test.py
@@ -1,14 +1,12 @@
 from __future__ import annotations
 
-from typing import Any
-
 import polars as pl
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8, 9]}
 
@@ -25,19 +23,17 @@ def test_nth(
     constructor: Constructor,
     idx: int | list[int],
     expected: dict[str, list[int]],
-    request: Any,
+    request: pytest.FixtureRequest,
 ) -> None:
-    if "polars" in str(constructor) and parse_version(pl.__version__) < parse_version(
-        "1.0.0"
-    ):
+    if "polars" in str(constructor) and POLARS_VERSION < (1, 0, 0):
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor(data))
     result = df.select(nw.nth(idx))
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.skipif(
-    parse_version(pl.__version__) >= parse_version("1.0.0"),
+    POLARS_VERSION >= (1, 0, 0),
     reason="1.0.0",
 )
 def test_nth_not_supported() -> None:  # pragma: no cover
diff --git a/tests/expr_and_series/null_count_test.py b/tests/expr_and_series/null_count_test.py
index 28aa66f385..0f22507134 100644
--- a/tests/expr_and_series/null_count_test.py
+++ b/tests/expr_and_series/null_count_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1.0, None, None, 3.0],
@@ -18,7 +18,7 @@ def test_null_count_expr(constructor: Constructor) -> None:
         "a": [2],
         "b": [1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_null_count_series(constructor_eager: ConstructorEager) -> None:
diff --git a/tests/expr_and_series/operators_test.py b/tests/expr_and_series/operators_test.py
index b4c3677ef0..5506e6a8d7 100644
--- a/tests/expr_and_series/operators_test.py
+++ b/tests/expr_and_series/operators_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -25,7 +25,7 @@ def test_comparand_operators_scalar_expr(
     data = {"a": [0, 1, 2]}
     df = nw.from_native(constructor(data))
     result = df.select(getattr(nw.col("a"), operator)(1))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -45,7 +45,7 @@ def test_comparand_operators_expr(
     data = {"a": [0, 1, 1], "b": [0, 0, 2]}
     df = nw.from_native(constructor(data))
     result = df.select(getattr(nw.col("a"), operator)(nw.col("b")))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -62,7 +62,7 @@ def test_logic_operators_expr(
     df = nw.from_native(constructor(data))
 
     result = df.select(getattr(nw.col("a"), operator)(nw.col("b")))
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -82,7 +82,7 @@ def test_comparand_operators_scalar_series(
     data = {"a": [0, 1, 2]}
     s = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = {"a": (getattr(s, operator)(1))}
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -103,7 +103,7 @@ def test_comparand_operators_series(
     df = nw.from_native(constructor_eager(data), eager_only=True)
     series, other = df["a"], df["b"]
     result = {"a": getattr(series, operator)(other)}
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
 
 
 @pytest.mark.parametrize(
@@ -120,4 +120,4 @@ def test_logic_operators_series(
     df = nw.from_native(constructor_eager(data), eager_only=True)
     series, other = df["a"], df["b"]
     result = {"a": getattr(series, operator)(other)}
-    compare_dicts(result, {"a": expected})
+    assert_equal_data(result, {"a": expected})
diff --git a/tests/expr_and_series/over_test.py b/tests/expr_and_series/over_test.py
index 4f89c29e5e..e4ab273c76 100644
--- a/tests/expr_and_series/over_test.py
+++ b/tests/expr_and_series/over_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": ["a", "a", "b", "b", "b"],
@@ -35,7 +35,7 @@ def test_over_single(constructor: Constructor) -> None:
 
     with context:
         result = df.with_columns(c_max=nw.col("c").max().over("a"))
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
 
 def test_over_multiple(constructor: Constructor) -> None:
@@ -58,7 +58,7 @@ def test_over_multiple(constructor: Constructor) -> None:
 
     with context:
         result = df.with_columns(c_min=nw.col("c").min().over("a", "b"))
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
 
 def test_over_invalid(request: pytest.FixtureRequest, constructor: Constructor) -> None:
diff --git a/tests/expr_and_series/pipe_test.py b/tests/expr_and_series/pipe_test.py
index 812422f7f4..0eef1cd6c7 100644
--- a/tests/expr_and_series/pipe_test.py
+++ b/tests/expr_and_series/pipe_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 input_list = {"a": [2, 4, 6, 8]}
 expected = [4, 16, 36, 64]
@@ -12,7 +12,7 @@
 def test_pipe_expr(constructor: Constructor) -> None:
     df = nw.from_native(constructor(input_list))
     e = df.select(nw.col("a").pipe(lambda x: x**2))
-    compare_dicts(e, {"a": expected})
+    assert_equal_data(e, {"a": expected})
 
 
 def test_pipe_series(
@@ -20,4 +20,4 @@ def test_pipe_series(
 ) -> None:
     s = nw.from_native(constructor_eager(input_list), eager_only=True)["a"]
     result = s.pipe(lambda x: x**2)
-    compare_dicts({"a": result}, {"a": expected})
+    assert_equal_data({"a": result}, {"a": expected})
diff --git a/tests/expr_and_series/quantile_test.py b/tests/expr_and_series/quantile_test.py
index 4fd5fa3f41..ae707e7395 100644
--- a/tests/expr_and_series/quantile_test.py
+++ b/tests/expr_and_series/quantile_test.py
@@ -8,7 +8,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -47,7 +47,7 @@ def test_quantile_expr(
 
     with context:
         result = df.select(nw.all().quantile(quantile=q, interpolation=interpolation))
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -72,4 +72,4 @@ def test_quantile_series(
         "a"
     ].alias("a")
     result = series.quantile(quantile=q, interpolation=interpolation)
-    compare_dicts({"a": [result]}, {"a": [expected]})
+    assert_equal_data({"a": [result]}, {"a": [expected]})
diff --git a/tests/expr_and_series/reduction_test.py b/tests/expr_and_series/reduction_test.py
index e22080e62d..b1dcad2327 100644
--- a/tests/expr_and_series/reduction_test.py
+++ b/tests/expr_and_series/reduction_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -32,7 +32,7 @@ def test_scalar_reduction_select(
     data = {"a": [1, 2, 3], "b": [4, 5, 6]}
     df = nw.from_native(constructor(data))
     result = df.select(*expr)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -58,4 +58,4 @@ def test_scalar_reduction_with_columns(
     data = {"a": [1, 2, 3], "b": [4, 5, 6]}
     df = nw.from_native(constructor(data))
     result = df.with_columns(*expr).select(*expected.keys())
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/replace_time_zone_test.py b/tests/expr_and_series/replace_time_zone_test.py
index 1c029a4784..76e7860422 100644
--- a/tests/expr_and_series/replace_time_zone_test.py
+++ b/tests/expr_and_series/replace_time_zone_test.py
@@ -4,14 +4,13 @@
 from datetime import timezone
 from typing import TYPE_CHECKING
 
-import pandas as pd
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 from tests.utils import is_windows
 
 if TYPE_CHECKING:
@@ -23,8 +22,8 @@ def test_replace_time_zone(
 ) -> None:
     if (
         (any(x in str(constructor) for x in ("pyarrow", "modin")) and is_windows())
-        or ("pandas_pyarrow" in str(constructor) and parse_version(pd.__version__) < (2,))
-        or ("pyarrow_table" in str(constructor) and parse_version(pa.__version__) < (12,))
+        or ("pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2,))
+        or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,))
         or ("cudf" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
@@ -41,7 +40,7 @@ def test_replace_time_zone(
     assert result_dtype.time_zone == "Asia/Kathmandu"  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M%z"))
     expected = {"a": ["2020-01-01T00:00+0545", "2020-01-02T00:00+0545"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_replace_time_zone_none(
@@ -49,8 +48,8 @@ def test_replace_time_zone_none(
 ) -> None:
     if (
         (any(x in str(constructor) for x in ("pyarrow", "modin")) and is_windows())
-        or ("pandas_pyarrow" in str(constructor) and parse_version(pd.__version__) < (2,))
-        or ("pyarrow_table" in str(constructor) and parse_version(pa.__version__) < (12,))
+        or ("pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2,))
+        or ("pyarrow_table" in str(constructor) and PYARROW_VERSION < (12,))
     ):
         request.applymarker(pytest.mark.xfail)
     data = {
@@ -66,7 +65,7 @@ def test_replace_time_zone_none(
     assert result_dtype.time_zone is None  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M"))
     expected = {"a": ["2020-01-01T00:00", "2020-01-02T00:00"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_replace_time_zone_series(
@@ -74,14 +73,8 @@ def test_replace_time_zone_series(
 ) -> None:
     if (
         (any(x in str(constructor_eager) for x in ("pyarrow", "modin")) and is_windows())
-        or (
-            "pandas_pyarrow" in str(constructor_eager)
-            and parse_version(pd.__version__) < (2,)
-        )
-        or (
-            "pyarrow_table" in str(constructor_eager)
-            and parse_version(pa.__version__) < (12,)
-        )
+        or ("pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2,))
+        or ("pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (12,))
         or ("cudf" in str(constructor_eager))
     ):
         request.applymarker(pytest.mark.xfail)
@@ -98,7 +91,7 @@ def test_replace_time_zone_series(
     assert result_dtype.time_zone == "Asia/Kathmandu"  # type: ignore[attr-defined]
     result_str = result.select(nw.col("a").dt.to_string("%Y-%m-%dT%H:%M%z"))
     expected = {"a": ["2020-01-01T00:00+0545", "2020-01-02T00:00+0545"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
 
 
 def test_replace_time_zone_none_series(
@@ -106,14 +99,8 @@ def test_replace_time_zone_none_series(
 ) -> None:
     if (
         (any(x in str(constructor_eager) for x in ("pyarrow", "modin")) and is_windows())
-        or (
-            "pandas_pyarrow" in str(constructor_eager)
-            and parse_version(pd.__version__) < (2,)
-        )
-        or (
-            "pyarrow_table" in str(constructor_eager)
-            and parse_version(pa.__version__) < (12,)
-        )
+        or ("pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2,))
+        or ("pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (12,))
     ):
         request.applymarker(pytest.mark.xfail)
     data = {
@@ -129,4 +116,4 @@ def test_replace_time_zone_none_series(
     assert result_dtype.time_zone is None  # type: ignore[attr-defined]
     result_str = result.select(df["a"].dt.to_string("%Y-%m-%dT%H:%M"))
     expected = {"a": ["2020-01-01T00:00", "2020-01-02T00:00"]}
-    compare_dicts(result_str, expected)
+    assert_equal_data(result_str, expected)
diff --git a/tests/expr_and_series/round_test.py b/tests/expr_and_series/round_test.py
index 613a82afe1..abae1d0e91 100644
--- a/tests/expr_and_series/round_test.py
+++ b/tests/expr_and_series/round_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("decimals", [0, 1, 2])
@@ -16,7 +16,7 @@ def test_round(constructor: Constructor, decimals: int) -> None:
 
     expected_data = {k: [round(e, decimals) for e in v] for k, v in data.items()}
     result_frame = df.select(nw.col("a").round(decimals))
-    compare_dicts(result_frame, expected_data)
+    assert_equal_data(result_frame, expected_data)
 
 
 @pytest.mark.parametrize("decimals", [0, 1, 2])
@@ -28,4 +28,4 @@ def test_round_series(constructor_eager: ConstructorEager, decimals: int) -> Non
     expected_data = {k: [round(e, decimals) for e in v] for k, v in data.items()}
     result_series = df["a"].round(decimals)
 
-    compare_dicts({"a": result_series}, expected_data)
+    assert_equal_data({"a": result_series}, expected_data)
diff --git a/tests/expr_and_series/sample_test.py b/tests/expr_and_series/sample_test.py
index c228ca0bd4..e8985e561e 100644
--- a/tests/expr_and_series/sample_test.py
+++ b/tests/expr_and_series/sample_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_expr_sample(constructor: Constructor, request: pytest.FixtureRequest) -> None:
@@ -59,13 +59,13 @@ def test_sample_with_seed(
         .collect()
     )
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     series = df.collect()["a"]
     seed1 = series.sample(n=n, seed=123)
     seed2 = series.sample(n=n, seed=123)
     seed3 = series.sample(n=n, seed=42)
 
-    compare_dicts(
+    assert_equal_data(
         {"res1": [(seed1 == seed2).all()], "res2": [(seed1 == seed3).all()]}, expected
     )
diff --git a/tests/expr_and_series/shift_test.py b/tests/expr_and_series/shift_test.py
index 388b8e6ab4..379f409860 100644
--- a/tests/expr_and_series/shift_test.py
+++ b/tests/expr_and_series/shift_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "i": [0, 1, 2, 3, 4],
@@ -24,7 +24,7 @@ def test_shift(constructor: Constructor) -> None:
         "b": [1, 2, 3],
         "c": [5, 4, 3],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_shift_series(constructor_eager: ConstructorEager) -> None:
@@ -40,7 +40,7 @@ def test_shift_series(constructor_eager: ConstructorEager) -> None:
         "b": [1, 2, 3],
         "c": [5, 4, 3],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_shift_multi_chunk_pyarrow() -> None:
@@ -50,12 +50,12 @@ def test_shift_multi_chunk_pyarrow() -> None:
 
     result = df.select(nw.col("a").shift(1))
     expected = {"a": [None, 1, 2, 3, 1, 2, 3, 1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.select(nw.col("a").shift(-1))
     expected = {"a": [2, 3, 1, 2, 3, 1, 2, 3, None]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.select(nw.col("a").shift(0))
     expected = {"a": [1, 2, 3, 1, 2, 3, 1, 2, 3]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/sort_test.py b/tests/expr_and_series/sort_test.py
index e5b1d493eb..4782627071 100644
--- a/tests/expr_and_series/sort_test.py
+++ b/tests/expr_and_series/sort_test.py
@@ -4,7 +4,8 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import ConstructorEager
+from tests.utils import assert_equal_data
 
 data = {"a": [0, 0, 2, -1], "b": [1, 3, 2, None]}
 
@@ -29,7 +30,7 @@ def test_sort_single_expr(
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("b").sort(descending=descending, nulls_last=nulls_last))
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -56,7 +57,7 @@ def test_sort_multiple_expr(
         "a",
         nw.col("b").sort(descending=descending, nulls_last=nulls_last),
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -69,11 +70,11 @@ def test_sort_multiple_expr(
     ],
 )
 def test_sort_series(
-    constructor_eager: Constructor,
+    constructor_eager: ConstructorEager,
     descending: bool,  # noqa: FBT001
     nulls_last: bool,  # noqa: FBT001
     expected: dict[str, float],
 ) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["b"]
     result = series.sort(descending=descending, nulls_last=nulls_last)
-    compare_dicts({"b": result}, {"b": expected})
+    assert_equal_data({"b": result}, {"b": expected})
diff --git a/tests/expr_and_series/std_test.py b/tests/expr_and_series/std_test.py
index 9ed57c5715..db51c65727 100644
--- a/tests/expr_and_series/std_test.py
+++ b/tests/expr_and_series/std_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -24,7 +24,7 @@ def test_std(constructor: Constructor) -> None:
         "b_ddof_2": [1.632993],
         "z_ddof_0": [0.816497],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_std_series(constructor_eager: ConstructorEager) -> None:
@@ -43,4 +43,4 @@ def test_std_series(constructor_eager: ConstructorEager) -> None:
         "b_ddof_2": [1.632993],
         "z_ddof_0": [0.816497],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/contains_test.py b/tests/expr_and_series/str/contains_test.py
index 98e8ceaa39..866f50ce1c 100644
--- a/tests/expr_and_series/str/contains_test.py
+++ b/tests/expr_and_series/str/contains_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"pets": ["cat", "dog", "rabbit and parrot", "dove"]}
 
@@ -24,7 +24,7 @@ def test_contains_case_insensitive(
         "pets": ["cat", "dog", "rabbit and parrot", "dove"],
         "result": [False, False, True, True],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_contains_series_case_insensitive(
@@ -41,7 +41,7 @@ def test_contains_series_case_insensitive(
         "pets": ["cat", "dog", "rabbit and parrot", "dove"],
         "case_insensitive_match": [False, False, True, True],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_contains_case_sensitive(constructor: Constructor) -> None:
@@ -51,7 +51,7 @@ def test_contains_case_sensitive(constructor: Constructor) -> None:
         "pets": ["cat", "dog", "rabbit and parrot", "dove"],
         "result": [False, False, True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_contains_series_case_sensitive(constructor_eager: ConstructorEager) -> None:
@@ -61,4 +61,4 @@ def test_contains_series_case_sensitive(constructor_eager: ConstructorEager) ->
         "pets": ["cat", "dog", "rabbit and parrot", "dove"],
         "case_sensitive_match": [False, False, True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/head_test.py b/tests/expr_and_series/str/head_test.py
index 00406e9d42..cf6cbd758b 100644
--- a/tests/expr_and_series/str/head_test.py
+++ b/tests/expr_and_series/str/head_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": ["foo", "bars"]}
 
@@ -14,7 +14,7 @@ def test_str_head(constructor: Constructor) -> None:
     expected = {
         "a": ["foo", "bar"],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_str_head_series(constructor_eager: ConstructorEager) -> None:
@@ -23,4 +23,4 @@ def test_str_head_series(constructor_eager: ConstructorEager) -> None:
         "a": ["foo", "bar"],
     }
     result = df.select(df["a"].str.head(3))
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/len_chars_test.py b/tests/expr_and_series/str/len_chars_test.py
index f95efd1a22..f9c63e01ca 100644
--- a/tests/expr_and_series/str/len_chars_test.py
+++ b/tests/expr_and_series/str/len_chars_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": ["foo", "foobar", "Café", "345", "東京"]}
 
@@ -14,7 +14,7 @@ def test_str_len_chars(constructor: Constructor) -> None:
     expected = {
         "a": [3, 6, 4, 3, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_str_len_chars_series(constructor_eager: ConstructorEager) -> None:
@@ -23,4 +23,4 @@ def test_str_len_chars_series(constructor_eager: ConstructorEager) -> None:
         "a": [3, 6, 4, 3, 2],
     }
     result = df.select(df["a"].str.len_chars())
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/replace_test.py b/tests/expr_and_series/str/replace_test.py
index 8db24c91e9..ffd8fce2e7 100644
--- a/tests/expr_and_series/str/replace_test.py
+++ b/tests/expr_and_series/str/replace_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 replace_data = [
     (
@@ -66,7 +66,7 @@ def test_str_replace_series(
     result_series = df["a"].str.replace(
         pattern=pattern, value=value, n=n, literal=literal
     )
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
 
 
 @pytest.mark.parametrize(
@@ -84,7 +84,7 @@ def test_str_replace_all_series(
     df = nw.from_native(constructor_eager(data), eager_only=True)
 
     result_series = df["a"].str.replace_all(pattern=pattern, value=value, literal=literal)
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
 
 
 @pytest.mark.parametrize(
@@ -105,7 +105,7 @@ def test_str_replace_expr(
     result_df = df.select(
         nw.col("a").str.replace(pattern=pattern, value=value, n=n, literal=literal)
     )
-    compare_dicts(result_df, expected)
+    assert_equal_data(result_df, expected)
 
 
 @pytest.mark.parametrize(
@@ -125,4 +125,4 @@ def test_str_replace_all_expr(
     result = df.select(
         nw.col("a").str.replace_all(pattern=pattern, value=value, literal=literal)
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/slice_test.py b/tests/expr_and_series/str/slice_test.py
index 3b7bb90ce1..1e7115a8aa 100644
--- a/tests/expr_and_series/str/slice_test.py
+++ b/tests/expr_and_series/str/slice_test.py
@@ -7,7 +7,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": ["fdas", "edfas"]}
 
@@ -21,7 +21,7 @@ def test_str_slice(
 ) -> None:
     df = nw.from_native(constructor(data))
     result_frame = df.select(nw.col("a").str.slice(offset, length))
-    compare_dicts(result_frame, expected)
+    assert_equal_data(result_frame, expected)
 
 
 @pytest.mark.parametrize(
@@ -34,4 +34,4 @@ def test_str_slice_series(
     df = nw.from_native(constructor_eager(data), eager_only=True)
 
     result_series = df["a"].str.slice(offset, length)
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
diff --git a/tests/expr_and_series/str/starts_with_ends_with_test.py b/tests/expr_and_series/str/starts_with_ends_with_test.py
index 3682c41828..0b11a75377 100644
--- a/tests/expr_and_series/str/starts_with_ends_with_test.py
+++ b/tests/expr_and_series/str/starts_with_ends_with_test.py
@@ -6,7 +6,7 @@
 
 # Don't move this into typechecking block, for coverage
 # purposes
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": ["fdas", "edfas"]}
 
@@ -17,7 +17,7 @@ def test_ends_with(constructor: Constructor) -> None:
     expected = {
         "a": [True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_ends_with_series(constructor_eager: ConstructorEager) -> None:
@@ -26,7 +26,7 @@ def test_ends_with_series(constructor_eager: ConstructorEager) -> None:
     expected = {
         "a": [True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_starts_with(constructor: Constructor) -> None:
@@ -35,7 +35,7 @@ def test_starts_with(constructor: Constructor) -> None:
     expected = {
         "a": [True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_starts_with_series(constructor_eager: ConstructorEager) -> None:
@@ -44,4 +44,4 @@ def test_starts_with_series(constructor_eager: ConstructorEager) -> None:
     expected = {
         "a": [True, False],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/str/strip_chars_test.py b/tests/expr_and_series/str/strip_chars_test.py
index 66b9cda0d1..d765e99e3e 100644
--- a/tests/expr_and_series/str/strip_chars_test.py
+++ b/tests/expr_and_series/str/strip_chars_test.py
@@ -7,7 +7,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": ["foobar", "bar\n", " baz"]}
 
@@ -24,7 +24,7 @@ def test_str_strip_chars(
 ) -> None:
     df = nw.from_native(constructor(data))
     result_frame = df.select(nw.col("a").str.strip_chars(characters))
-    compare_dicts(result_frame, expected)
+    assert_equal_data(result_frame, expected)
 
 
 @pytest.mark.parametrize(
@@ -40,4 +40,4 @@ def test_str_strip_chars_series(
     df = nw.from_native(constructor_eager(data), eager_only=True)
 
     result_series = df["a"].str.strip_chars(characters)
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
diff --git a/tests/expr_and_series/str/tail_test.py b/tests/expr_and_series/str/tail_test.py
index aa08210758..e2543de0a6 100644
--- a/tests/expr_and_series/str/tail_test.py
+++ b/tests/expr_and_series/str/tail_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": ["foo", "bars"]}
 
@@ -13,7 +13,7 @@ def test_str_tail(constructor: Constructor) -> None:
     expected = {"a": ["foo", "ars"]}
 
     result_frame = df.select(nw.col("a").str.tail(3))
-    compare_dicts(result_frame, expected)
+    assert_equal_data(result_frame, expected)
 
 
 def test_str_tail_series(constructor_eager: ConstructorEager) -> None:
@@ -21,4 +21,4 @@ def test_str_tail_series(constructor_eager: ConstructorEager) -> None:
     expected = {"a": ["foo", "ars"]}
 
     result_series = df["a"].str.tail(3)
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
diff --git a/tests/expr_and_series/str/to_datetime_test.py b/tests/expr_and_series/str/to_datetime_test.py
index 62afda4749..9682bbb167 100644
--- a/tests/expr_and_series/str/to_datetime_test.py
+++ b/tests/expr_and_series/str/to_datetime_test.py
@@ -1,14 +1,19 @@
 from __future__ import annotations
 
+from datetime import datetime
 from typing import TYPE_CHECKING
 
+import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
+from narwhals._arrow.utils import parse_datetime_format
+from tests.utils import assert_equal_data
 
 if TYPE_CHECKING:
     from tests.utils import Constructor
     from tests.utils import ConstructorEager
+
 data = {"a": ["2020-01-01T12:34:56"]}
 
 
@@ -42,16 +47,29 @@ def test_to_datetime_series(constructor_eager: ConstructorEager) -> None:
     assert str(result) == expected
 
 
+@pytest.mark.parametrize(
+    ("data", "expected", "expected_cudf"),
+    [
+        (
+            {"a": ["2020-01-01T12:34:56"]},
+            "2020-01-01 12:34:56",
+            "2020-01-01T12:34:56.000000000",
+        ),
+        (
+            {"a": ["2020-01-01T12:34"]},
+            "2020-01-01 12:34:00",
+            "2020-01-01T12:34:00.000000000",
+        ),
+    ],
+)
 def test_to_datetime_infer_fmt(
-    request: pytest.FixtureRequest, constructor: Constructor
+    constructor: Constructor,
+    data: dict[str, list[str]],
+    expected: str,
+    expected_cudf: str,
 ) -> None:
-    if "pyarrow_table" in str(constructor):
-        request.applymarker(pytest.mark.xfail)
-
     if "cudf" in str(constructor):  # pragma: no cover
-        expected = "2020-01-01T12:34:56.000000000"
-    else:
-        expected = "2020-01-01 12:34:56"
+        expected = expected_cudf
 
     result = (
         nw.from_native(constructor(data))
@@ -63,18 +81,76 @@ def test_to_datetime_infer_fmt(
     assert str(result) == expected
 
 
+@pytest.mark.parametrize(
+    ("data", "expected", "expected_cudf"),
+    [
+        (
+            {"a": ["2020-01-01T12:34:56"]},
+            "2020-01-01 12:34:56",
+            "2020-01-01T12:34:56.000000000",
+        ),
+        (
+            {"a": ["2020-01-01T12:34"]},
+            "2020-01-01 12:34:00",
+            "2020-01-01T12:34:00.000000000",
+        ),
+    ],
+)
 def test_to_datetime_series_infer_fmt(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    constructor_eager: ConstructorEager,
+    data: dict[str, list[str]],
+    expected: str,
+    expected_cudf: str,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager):
-        request.applymarker(pytest.mark.xfail)
-
     if "cudf" in str(constructor_eager):  # pragma: no cover
-        expected = "2020-01-01T12:34:56.000000000"
-    else:
-        expected = "2020-01-01 12:34:56"
+        expected = expected_cudf
 
     result = (
         nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_datetime()
     ).item(0)
     assert str(result) == expected
+
+
+def test_to_datetime_infer_fmt_from_date(constructor: Constructor) -> None:
+    data = {"z": ["2020-01-01", "2020-01-02", None]}
+    expected = [datetime(2020, 1, 1), datetime(2020, 1, 2), None]
+    result = (
+        nw.from_native(constructor(data))
+        .lazy()
+        .select(nw.col("z").str.to_datetime())
+        .collect()
+    )
+    assert_equal_data(result, {"z": expected})
+
+
+def test_pyarrow_infer_datetime_raise_invalid() -> None:
+    with pytest.raises(
+        NotImplementedError,
+        match="Unable to infer datetime format, provided format is not supported.",
+    ):
+        parse_datetime_format(pa.chunked_array([["2024-01-01", "abc"]]))
+
+
+@pytest.mark.parametrize(
+    ("data", "duplicate"),
+    [
+        (["2024-01-01T00:00:00", "2024-01-01 01:00:00"], "separator"),
+        (["2024-01-01 00:00:00+01:00", "2024-01-01 01:00:00+02:00"], "timezone"),
+    ],
+)
+def test_pyarrow_infer_datetime_raise_not_unique(
+    data: list[str | None], duplicate: str
+) -> None:
+    with pytest.raises(
+        ValueError,
+        match=f"Found multiple {duplicate} values while inferring datetime format.",
+    ):
+        parse_datetime_format(pa.chunked_array([data]))
+
+
+@pytest.mark.parametrize("data", [["2024-01-01", "2024-12-01", "02-02-2024"]])
+def test_pyarrow_infer_datetime_raise_inconsistent_date_fmt(
+    data: list[str | None],
+) -> None:
+    with pytest.raises(ValueError, match="Unable to infer datetime format. "):
+        parse_datetime_format(pa.chunked_array([data]))
diff --git a/tests/expr_and_series/str/to_uppercase_to_lowercase_test.py b/tests/expr_and_series/str/to_uppercase_to_lowercase_test.py
index e5b5832f66..6ab26ac410 100644
--- a/tests/expr_and_series/str/to_uppercase_to_lowercase_test.py
+++ b/tests/expr_and_series/str/to_uppercase_to_lowercase_test.py
@@ -1,13 +1,12 @@
 from __future__ import annotations
 
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -41,14 +40,14 @@ def test_str_to_uppercase(
             "pyarrow_table_constructor",
             "modin_constructor",
         )
-        or ("dask" in str(constructor) and parse_version(pa.__version__) >= (12,))
+        or ("dask" in str(constructor) and PYARROW_VERSION >= (12,))
     ):
         # We are marking it xfail for these conditions above
         # since the pyarrow backend will convert
         # smaller cap 'ß' to upper cap 'ẞ' instead of 'SS'
         request.applymarker(pytest.mark.xfail)
 
-    compare_dicts(result_frame, expected)
+    assert_equal_data(result_frame, expected)
 
 
 @pytest.mark.parametrize(
@@ -89,7 +88,7 @@ def test_str_to_uppercase_series(
         request.applymarker(pytest.mark.xfail)
 
     result_series = df["a"].str.to_uppercase()
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
 
 
 @pytest.mark.parametrize(
@@ -114,7 +113,7 @@ def test_str_to_lowercase(
 ) -> None:
     df = nw.from_native(constructor(data))
     result_frame = df.select(nw.col("a").str.to_lowercase())
-    compare_dicts(result_frame, expected)
+    assert_equal_data(result_frame, expected)
 
 
 @pytest.mark.parametrize(
@@ -140,4 +139,4 @@ def test_str_to_lowercase_series(
     df = nw.from_native(constructor_eager(data), eager_only=True)
 
     result_series = df["a"].str.to_lowercase()
-    compare_dicts({"a": result_series}, expected)
+    assert_equal_data({"a": result_series}, expected)
diff --git a/tests/expr_and_series/sum_horizontal_test.py b/tests/expr_and_series/sum_horizontal_test.py
index 91d0d3bb9b..21bd138c28 100644
--- a/tests/expr_and_series/sum_horizontal_test.py
+++ b/tests/expr_and_series/sum_horizontal_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("col_expr", [nw.col("a"), "a"])
@@ -20,7 +20,7 @@ def test_sumh(constructor: Constructor, col_expr: Any) -> None:
         "z": [7.0, 8.0, 9.0],
         "horizontal_sum": [5, 7, 8],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_sumh_nullable(constructor: Constructor) -> None:
@@ -29,7 +29,7 @@ def test_sumh_nullable(constructor: Constructor) -> None:
 
     df = nw.from_native(constructor(data))
     result = df.select(hsum=nw.sum_horizontal("a", "b"))
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_sumh_all(constructor: Constructor) -> None:
@@ -39,9 +39,9 @@ def test_sumh_all(constructor: Constructor) -> None:
     expected = {
         "a": [11, 22, 33],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select(c=nw.sum_horizontal(nw.all()))
     expected = {
         "c": [11, 22, 33],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/sum_test.py b/tests/expr_and_series/sum_test.py
index 914d902f3e..f988e8991b 100644
--- a/tests/expr_and_series/sum_test.py
+++ b/tests/expr_and_series/sum_test.py
@@ -5,7 +5,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -15,7 +15,7 @@ def test_expr_sum_expr(constructor: Constructor, expr: nw.Expr) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(expr)
     expected = {"a": [6], "b": [14], "z": [24.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(("col", "expected"), [("a", 6), ("b", 14), ("z", 24.0)])
@@ -24,4 +24,4 @@ def test_expr_sum_series(
 ) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)[col]
     result = series.sum()
-    compare_dicts({col: [result]}, {col: [expected]})
+    assert_equal_data({col: [result]}, {col: [expected]})
diff --git a/tests/expr_and_series/tail_test.py b/tests/expr_and_series/tail_test.py
index 9240702b34..2545eb75d3 100644
--- a/tests/expr_and_series/tail_test.py
+++ b/tests/expr_and_series/tail_test.py
@@ -5,7 +5,7 @@
 import narwhals as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize("n", [2, -1])
@@ -18,7 +18,7 @@ def test_tail_expr(
     df = nw.from_native(constructor({"a": [1, 2, 3]}))
     result = df.select(nw.col("a").tail(n))
     expected = {"a": [2, 3]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("n", [2, -1])
@@ -26,4 +26,4 @@ def test_tail_series(constructor_eager: ConstructorEager, n: int) -> None:
     s = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
     result = {"a": s.tail(n)}
     expected = {"a": [2, 3]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/unary_test.py b/tests/expr_and_series/unary_test.py
index 71a00f8f35..c165be8bd8 100644
--- a/tests/expr_and_series/unary_test.py
+++ b/tests/expr_and_series/unary_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_unary(constructor: Constructor) -> None:
@@ -22,7 +22,7 @@ def test_unary(constructor: Constructor) -> None:
         "z_min": [7],
         "z_max": [9],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_unary_series(constructor_eager: ConstructorEager) -> None:
@@ -42,4 +42,4 @@ def test_unary_series(constructor_eager: ConstructorEager) -> None:
         "z_min": [7],
         "z_max": [9],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/expr_and_series/unique_test.py b/tests/expr_and_series/unique_test.py
index 142badeff4..e4731fe489 100644
--- a/tests/expr_and_series/unique_test.py
+++ b/tests/expr_and_series/unique_test.py
@@ -3,7 +3,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 1, 2]}
 
@@ -12,11 +12,11 @@ def test_unique_expr(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a").unique()).sort("a")
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_unique_series(constructor_eager: ConstructorEager) -> None:
     series = nw.from_native(constructor_eager(data), eager_only=True)["a"]
     result = series.unique()
     expected = {"a": [1, 2]}
-    compare_dicts({"a": result}, expected)
+    assert_equal_data({"a": result}, expected)
diff --git a/tests/expr_and_series/when_test.py b/tests/expr_and_series/when_test.py
index eb1ac9c41d..3cef177fa5 100644
--- a/tests/expr_and_series/when_test.py
+++ b/tests/expr_and_series/when_test.py
@@ -6,7 +6,7 @@
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 2, 3],
@@ -23,7 +23,7 @@ def test_when(constructor: Constructor) -> None:
     expected = {
         "a_when": [3, np.nan, np.nan],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_when_otherwise(constructor: Constructor) -> None:
@@ -32,7 +32,7 @@ def test_when_otherwise(constructor: Constructor) -> None:
     expected = {
         "a_when": [3, 6, 6],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_multiple_conditions(constructor: Constructor) -> None:
@@ -43,7 +43,7 @@ def test_multiple_conditions(constructor: Constructor) -> None:
     expected = {
         "a_when": [3, np.nan, np.nan],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_no_arg_when_fail(constructor: Constructor) -> None:
@@ -67,7 +67,7 @@ def test_value_numpy_array(
     expected = {
         "a_when": [3, np.nan, np.nan],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_value_series(constructor_eager: ConstructorEager) -> None:
@@ -79,7 +79,7 @@ def test_value_series(constructor_eager: ConstructorEager) -> None:
     expected = {
         "a_when": [3, np.nan, np.nan],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_value_expression(constructor: Constructor) -> None:
@@ -88,7 +88,7 @@ def test_value_expression(constructor: Constructor) -> None:
     expected = {
         "a_when": [10, np.nan, np.nan],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_otherwise_numpy_array(
@@ -106,7 +106,7 @@ def test_otherwise_numpy_array(
     expected = {
         "a_when": [-1, 9, 10],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_otherwise_series(constructor_eager: ConstructorEager) -> None:
@@ -118,7 +118,7 @@ def test_otherwise_series(constructor_eager: ConstructorEager) -> None:
     expected = {
         "a_when": [-1, 9, 10],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_otherwise_expression(constructor: Constructor) -> None:
@@ -129,18 +129,18 @@ def test_otherwise_expression(constructor: Constructor) -> None:
     expected = {
         "a_when": [-1, 9, 10],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_when_then_otherwise_into_expr(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.when(nw.col("a") > 1).then("c").otherwise("e"))
     expected = {"c": [7, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_when_then_otherwise_lit_str(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(nw.when(nw.col("a") > 1).then(nw.col("b")).otherwise(nw.lit("z")))
     expected = {"b": ["z", "b", "c"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/add_test.py b/tests/frame/add_test.py
index 69133c2e80..27a332ed0d 100644
--- a/tests/frame/add_test.py
+++ b/tests/frame/add_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_add(constructor: Constructor) -> None:
@@ -21,4 +21,4 @@ def test_add(constructor: Constructor) -> None:
         "d": [-1.0, 1.0, 0.0],
         "e": [0.0, 2.0, 1.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/array_dunder_test.py b/tests/frame/array_dunder_test.py
index 90db2b621a..71446de9cf 100644
--- a/tests/frame/array_dunder_test.py
+++ b/tests/frame/array_dunder_test.py
@@ -1,23 +1,25 @@
 from __future__ import annotations
 
 import numpy as np
-import pandas as pd
-import polars as pl
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import POLARS_VERSION
+from tests.utils import PYARROW_VERSION
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_array_dunder(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("16.0.0"):  # pragma: no cover
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
+        16,
+        0,
+        0,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)
@@ -26,11 +28,14 @@ def test_array_dunder(
 
 
 def test_array_dunder_with_dtype(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("16.0.0"):  # pragma: no cover
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
+        16,
+        0,
+        0,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)
@@ -39,15 +44,16 @@ def test_array_dunder_with_dtype(
 
 
 def test_array_dunder_with_copy(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(pa.__version__) < (
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
         16,
         0,
         0,
     ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
-    if "polars" in str(constructor_eager) and parse_version(pl.__version__) < (
+    if "polars" in str(constructor_eager) and POLARS_VERSION < (
         0,
         20,
         28,
@@ -57,12 +63,10 @@ def test_array_dunder_with_copy(
     df = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)
     result = df.__array__(copy=True)
     np.testing.assert_array_equal(result, np.array([[1], [2], [3]], dtype="int64"))
-    if "pandas_constructor" in str(constructor_eager) and parse_version(
-        pd.__version__
-    ) < (3,):
+    if "pandas_constructor" in str(constructor_eager) and PANDAS_VERSION < (3,):
         # If it's pandas, we know that `copy=False` definitely took effect.
         # So, let's check it!
         result = df.__array__(copy=False)
         np.testing.assert_array_equal(result, np.array([[1], [2], [3]], dtype="int64"))
         result[0, 0] = 999
-        compare_dicts(df, {"a": [999, 2, 3]})
+        assert_equal_data(df, {"a": [999, 2, 3]})
diff --git a/tests/frame/arrow_c_stream_test.py b/tests/frame/arrow_c_stream_test.py
index 66525f1b96..def950d224 100644
--- a/tests/frame/arrow_c_stream_test.py
+++ b/tests/frame/arrow_c_stream_test.py
@@ -6,14 +6,13 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
+from tests.utils import PYARROW_VERSION
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test() -> None:
     df = nw.from_native(pl.Series([1, 2, 3]).to_frame("a"), eager_only=True)
@@ -22,11 +21,9 @@ def test_arrow_c_stream_test() -> None:
     assert pc.all(pc.equal(result["a"], expected["a"])).as_py()
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
     # "poison" the dunder method to make sure it actually got called above
@@ -38,11 +35,9 @@ def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
         pa.table(df)
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
     # Check that fallback to PyArrow works
diff --git a/tests/frame/clone_test.py b/tests/frame/clone_test.py
index c115d0899f..1a02910c84 100644
--- a/tests/frame/clone_test.py
+++ b/tests/frame/clone_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_clone(request: pytest.FixtureRequest, constructor: Constructor) -> None:
@@ -18,4 +18,4 @@ def test_clone(request: pytest.FixtureRequest, constructor: Constructor) -> None
     df_clone = df.clone()
     assert df is not df_clone
     assert df._compliant_frame is not df_clone._compliant_frame
-    compare_dicts(df_clone, expected)
+    assert_equal_data(df_clone, expected)
diff --git a/tests/frame/concat_test.py b/tests/frame/concat_test.py
index ebf4bcb050..6a18d872b1 100644
--- a/tests/frame/concat_test.py
+++ b/tests/frame/concat_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_concat_horizontal(constructor: Constructor) -> None:
@@ -22,7 +22,7 @@ def test_concat_horizontal(constructor: Constructor) -> None:
         "c": [6, 12, -1],
         "d": [0, -4, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     with pytest.raises(ValueError, match="No items"):
         nw.concat([])
@@ -39,7 +39,7 @@ def test_concat_vertical(constructor: Constructor) -> None:
 
     result = nw.concat([df_left, df_right], how="vertical")
     expected = {"c": [1, 3, 2, 6, 12, -1], "d": [4, 4, 6, 0, -4, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     with pytest.raises(ValueError, match="No items"):
         nw.concat([], how="vertical")
diff --git a/tests/frame/double_test.py b/tests/frame/double_test.py
index 1c46bf3f72..87ff66af90 100644
--- a/tests/frame/double_test.py
+++ b/tests/frame/double_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_double(constructor: Constructor) -> None:
@@ -11,7 +11,7 @@ def test_double(constructor: Constructor) -> None:
 
     result = df.with_columns(nw.all() * 2)
     expected = {"a": [2, 6, 4], "b": [8, 8, 12], "z": [14.0, 16.0, 18.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.with_columns(nw.col("a").alias("o"), nw.all() * 2)
     expected = {
@@ -20,4 +20,4 @@ def test_double(constructor: Constructor) -> None:
         "b": [8, 8, 12],
         "z": [14.0, 16.0, 18.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/drop_nulls_test.py b/tests/frame/drop_nulls_test.py
index 9988aa6b2b..680cbd4c4f 100644
--- a/tests/frame/drop_nulls_test.py
+++ b/tests/frame/drop_nulls_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1.0, 2.0, None, 4.0],
@@ -18,7 +18,7 @@ def test_drop_nulls(constructor: Constructor) -> None:
         "a": [2.0, 4.0],
         "b": [3.0, 5.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -33,4 +33,4 @@ def test_drop_nulls_subset(
     constructor: Constructor, subset: str | list[str], expected: dict[str, float]
 ) -> None:
     result = nw.from_native(constructor(data)).drop_nulls(subset=subset)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/drop_test.py b/tests/frame/drop_test.py
index f8fc332541..f9da91cbb8 100644
--- a/tests/frame/drop_test.py
+++ b/tests/frame/drop_test.py
@@ -4,13 +4,12 @@
 from typing import TYPE_CHECKING
 from typing import Any
 
-import polars as pl
 import pytest
 from polars.exceptions import ColumnNotFoundError as PlColumnNotFoundError
 
 import narwhals.stable.v1 as nw
 from narwhals._exceptions import ColumnNotFoundError
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import Constructor
@@ -49,11 +48,7 @@ def test_drop_strict(
     *,
     strict: bool,
 ) -> None:
-    if (
-        "polars_lazy" in str(request)
-        and parse_version(pl.__version__) < (1, 0, 0)
-        and strict
-    ):
+    if "polars_lazy" in str(request) and POLARS_VERSION < (1, 0, 0) and strict:
         request.applymarker(pytest.mark.xfail)
 
     data = {"a": [1, 3, 2], "b": [4, 4, 6]}
diff --git a/tests/frame/filter_test.py b/tests/frame/filter_test.py
index 3f10fba8a2..8721f3bde1 100644
--- a/tests/frame/filter_test.py
+++ b/tests/frame/filter_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_filter(constructor: Constructor) -> None:
@@ -14,7 +14,7 @@ def test_filter(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.filter(nw.col("a") > 1)
     expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_filter_with_boolean_list(constructor: Constructor) -> None:
@@ -33,4 +33,4 @@ def test_filter_with_boolean_list(constructor: Constructor) -> None:
     with context:
         result = df.filter([False, True, True])
         expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]}
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
diff --git a/tests/frame/gather_every_test.py b/tests/frame/gather_every_test.py
index 347132c14f..671737ad10 100644
--- a/tests/frame/gather_every_test.py
+++ b/tests/frame/gather_every_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": list(range(10))}
 
@@ -15,4 +15,4 @@ def test_gather_every(constructor: Constructor, n: int, offset: int) -> None:
     df = nw.from_native(constructor(data))
     result = df.gather_every(n=n, offset=offset)
     expected = {"a": data["a"][offset::n]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/get_column_test.py b/tests/frame/get_column_test.py
index b0a2a7ca55..ec5ab24aa9 100644
--- a/tests/frame/get_column_test.py
+++ b/tests/frame/get_column_test.py
@@ -5,13 +5,13 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_get_column(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager({"a": [1, 2], "b": [3, 4]}), eager_only=True)
     result = df.get_column("a")
-    compare_dicts({"a": result}, {"a": [1, 2]})
+    assert_equal_data({"a": result}, {"a": [1, 2]})
     assert result.name == "a"
     with pytest.raises(
         (KeyError, TypeError), match="Expected str|'int' object cannot be converted|0"
@@ -23,11 +23,11 @@ def test_get_column(constructor_eager: ConstructorEager) -> None:
 def test_non_string_name() -> None:
     df = pd.DataFrame({0: [1, 2]})
     result = nw.from_native(df, eager_only=True).get_column(0)  # type: ignore[arg-type]
-    compare_dicts({"a": result}, {"a": [1, 2]})
+    assert_equal_data({"a": result}, {"a": [1, 2]})
     assert result.name == 0  # type: ignore[comparison-overlap]
 
 
 def test_get_single_row() -> None:
     df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
     result = nw.from_native(df, eager_only=True)[0]  # type: ignore[call-overload]
-    compare_dicts(result, {"a": [1], "b": [3]})
+    assert_equal_data(result, {"a": [1], "b": [3]})
diff --git a/tests/frame/getitem_test.py b/tests/frame/getitem_test.py
index a5397c7efc..9f5a9b52de 100644
--- a/tests/frame/getitem_test.py
+++ b/tests/frame/getitem_test.py
@@ -8,7 +8,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
@@ -19,15 +19,15 @@
 def test_slice_column(constructor_eager: ConstructorEager) -> None:
     result = nw.from_native(constructor_eager(data))["a"]
     assert isinstance(result, nw.Series)
-    compare_dicts({"a": result}, {"a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
+    assert_equal_data({"a": result}, {"a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
 
 
 def test_slice_rows(constructor_eager: ConstructorEager) -> None:
     result = nw.from_native(constructor_eager(data))[1:]
-    compare_dicts(result, {"a": [2.0, 3.0, 4.0, 5.0, 6.0], "b": [12, 13, 14, 15, 16]})
+    assert_equal_data(result, {"a": [2.0, 3.0, 4.0, 5.0, 6.0], "b": [12, 13, 14, 15, 16]})
 
     result = nw.from_native(constructor_eager(data))[2:4]
-    compare_dicts(result, {"a": [3.0, 4.0], "b": [13, 14]})
+    assert_equal_data(result, {"a": [3.0, 4.0], "b": [13, 14]})
 
 
 def test_slice_rows_with_step(
@@ -36,7 +36,7 @@ def test_slice_rows_with_step(
     if "pyarrow_table" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
     result = nw.from_native(constructor_eager(data))[1::2]
-    compare_dicts(result, {"a": [2.0, 4.0, 6.0], "b": [12, 14, 16]})
+    assert_equal_data(result, {"a": [2.0, 4.0, 6.0], "b": [12, 14, 16]})
 
 
 def test_slice_rows_with_step_pyarrow() -> None:
@@ -54,7 +54,7 @@ def test_slice_lazy_fails() -> None:
 
 def test_slice_int(constructor_eager: ConstructorEager) -> None:
     result = nw.from_native(constructor_eager(data), eager_only=True)[1]  # type: ignore[call-overload]
-    compare_dicts(result, {"a": [2], "b": [12]})
+    assert_equal_data(result, {"a": [2], "b": [12]})
 
 
 def test_slice_fails(constructor_eager: ConstructorEager) -> None:
@@ -71,9 +71,9 @@ def test_gather(constructor_eager: ConstructorEager) -> None:
         "a": [1.0, 4.0, 2.0],
         "b": [11, 14, 12],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[np.array([0, 3, 1])]
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_gather_pandas_index() -> None:
@@ -81,11 +81,11 @@ def test_gather_pandas_index() -> None:
     df = pd.DataFrame({"a": [4, 1, 2], "b": [1, 4, 2]}, index=[2, 1, 3])
     result = nw.from_native(df, eager_only=True)[[1, 2]]
     expected = {"a": [1, 2], "b": [4, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = nw.from_native(df, eager_only=True)[[1, 2], "a"].to_frame()
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_gather_rows_cols(constructor_eager: ConstructorEager) -> None:
@@ -95,10 +95,10 @@ def test_gather_rows_cols(constructor_eager: ConstructorEager) -> None:
     expected = {"b": [11, 14, 12]}
 
     result = {"b": df[[0, 3, 1], 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = {"b": df[np.array([0, 3, 1]), "b"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_slice_both_tuples_of_ints(constructor_eager: ConstructorEager) -> None:
@@ -106,7 +106,7 @@ def test_slice_both_tuples_of_ints(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df[[0, 1], [0, 2]]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_slice_int_rows_str_columns(constructor_eager: ConstructorEager) -> None:
@@ -114,7 +114,7 @@ def test_slice_int_rows_str_columns(constructor_eager: ConstructorEager) -> None
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df[[0, 1], ["a", "c"]]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_slice_slice_columns(constructor_eager: ConstructorEager) -> None:  # noqa: PLR0915
@@ -122,69 +122,69 @@ def test_slice_slice_columns(constructor_eager: ConstructorEager) -> None:  # no
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = df[[0, 1], "b":"c"]  # type: ignore[misc]
     expected = {"b": [4, 5], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], :"c"]  # type: ignore[misc]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], "a":"d":2]  # type: ignore[misc]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], "b":]  # type: ignore[misc]
     expected = {"b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], 1:3]
     expected = {"b": [4, 5], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], :3]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], 0:4:2]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], 1:]
     expected = {"b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[:, ["b", "d"]]
     expected = {"b": [4, 5, 6], "d": [1, 4, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[:, [0, 2]]
     expected = {"a": [1, 2, 3], "c": [7, 8, 9]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[:2, [0, 2]]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[:2, ["a", "c"]]
     expected = {"a": [1, 2], "c": [7, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[1:, [0, 2]]
     expected = {"a": [2, 3], "c": [8, 9]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[1:, ["a", "c"]]
     expected = {"a": [2, 3], "c": [8, 9]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[["b", "c"]]
     expected = {"b": [4, 5, 6], "c": [7, 8, 9]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[:2]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[2:]
     expected = {"a": [3], "b": [6], "c": [9], "d": [2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     # mypy says "Slice index must be an integer", but we do in fact support
     # using string slices
     result = df["a":"b"]  # type: ignore[misc]
     expected = {"a": [1, 2, 3], "b": [4, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[(0, 1), :]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], :]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df[[0, 1], df.columns]
     expected = {"a": [1, 2], "b": [4, 5], "c": [7, 8], "d": [1, 4]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_slice_invalid(constructor_eager: ConstructorEager) -> None:
diff --git a/tests/frame/head_test.py b/tests/frame/head_test.py
index 7234828b07..e817aa4169 100644
--- a/tests/frame/head_test.py
+++ b/tests/frame/head_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_head(constructor: Constructor) -> None:
@@ -13,11 +13,11 @@ def test_head(constructor: Constructor) -> None:
     df = nw.from_native(df_raw)
 
     result = df.head(2)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     result = df.head(2)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     # negative indices not allowed for lazyframes
     result = df.lazy().collect().head(-1)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/interchange_native_namespace_test.py b/tests/frame/interchange_native_namespace_test.py
index 084f6ea050..22d0364600 100644
--- a/tests/frame/interchange_native_namespace_test.py
+++ b/tests/frame/interchange_native_namespace_test.py
@@ -27,6 +27,7 @@ def test_interchange() -> None:
         series.__native_namespace__()
 
 
+@pytest.mark.filterwarnings("ignore:.*The `ArrowDtype` class is not available in pandas")
 def test_ibis(
     tmpdir: pytest.TempdirFactory,
 ) -> None:  # pragma: no cover
diff --git a/tests/frame/interchange_schema_test.py b/tests/frame/interchange_schema_test.py
index 33f2e0044c..d702cf8ab1 100644
--- a/tests/frame/interchange_schema_test.py
+++ b/tests/frame/interchange_schema_test.py
@@ -10,7 +10,7 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import IBIS_VERSION
 
 
 def test_interchange_schema() -> None:
@@ -116,7 +116,7 @@ def test_interchange_schema_ibis(
     tbl = ibis.read_parquet(filepath)
     df = nw.from_native(tbl, eager_or_interchange_only=True)
     result = df.schema
-    if parse_version(ibis.__version__) > (6, 0, 0):
+    if IBIS_VERSION > (6, 0, 0):
         expected = {
             "a": nw.Int64,
             "b": nw.Int32,
@@ -156,6 +156,7 @@ def test_interchange_schema_ibis(
         }
     assert result == expected
     assert df["a"].dtype == nw.Int64
+    assert df.columns == list(expected.keys())
 
 
 def test_interchange_schema_duckdb() -> None:
@@ -220,6 +221,7 @@ def test_interchange_schema_duckdb() -> None:
     }
     assert result == expected
     assert df["a"].dtype == nw.Int64
+    assert df.columns == list(expected.keys())
 
 
 def test_invalid() -> None:
@@ -227,7 +229,7 @@ def test_invalid() -> None:
     with pytest.raises(
         NotImplementedError, match="is not supported for metadata-only dataframes"
     ):
-        nw.from_native(df, eager_or_interchange_only=True).select("a")
+        nw.from_native(df, eager_or_interchange_only=True).filter([True, False, True])
     with pytest.raises(TypeError, match="Cannot only use `series_only=True`"):
         nw.from_native(df, eager_only=True)
     with pytest.raises(ValueError, match="Invalid parameter combination"):
diff --git a/tests/frame/interchange_select_test.py b/tests/frame/interchange_select_test.py
new file mode 100644
index 0000000000..b553af7516
--- /dev/null
+++ b/tests/frame/interchange_select_test.py
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+from typing import Any
+
+import duckdb
+import polars as pl
+import pytest
+
+import narwhals.stable.v1 as nw
+
+data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
+
+
+class InterchangeDataFrame:
+    def __init__(self, df: CustomDataFrame) -> None:
+        self._df = df
+
+    def __dataframe__(self) -> InterchangeDataFrame:  # pragma: no cover
+        return self
+
+    def column_names(self) -> list[str]:
+        return list(self._df._data.keys())
+
+    def select_columns_by_name(self, columns: list[str]) -> InterchangeDataFrame:
+        return InterchangeDataFrame(
+            CustomDataFrame(
+                {key: value for key, value in self._df._data.items() if key in columns}
+            )
+        )
+
+
+class CustomDataFrame:
+    def __init__(self, data: dict[str, Any]) -> None:
+        self._data = data
+
+    def __dataframe__(self, *, allow_copy: bool = True) -> InterchangeDataFrame:
+        return InterchangeDataFrame(self)
+
+
+def test_interchange() -> None:
+    df = CustomDataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "z": [1, 4, 2]})
+    result = nw.from_native(df, eager_or_interchange_only=True).select("a", "z")
+    assert result.columns == ["a", "z"]
+
+
+def test_interchange_ibis(
+    tmpdir: pytest.TempdirFactory,
+) -> None:  # pragma: no cover
+    ibis = pytest.importorskip("ibis")
+    df_pl = pl.DataFrame(data)
+
+    filepath = str(tmpdir / "file.parquet")  # type: ignore[operator]
+    df_pl.write_parquet(filepath)
+
+    tbl = ibis.read_parquet(filepath)
+    df = nw.from_native(tbl, eager_or_interchange_only=True)
+
+    out_cols = df.select("a", "z").schema.names()
+
+    assert out_cols == ["a", "z"]
+
+
+def test_interchange_duckdb() -> None:
+    df_pl = pl.DataFrame(data)  # noqa: F841
+    rel = duckdb.sql("select * from df_pl")
+    df = nw.from_native(rel, eager_or_interchange_only=True)
+
+    out_cols = df.select("a", "z").schema.names()
+
+    assert out_cols == ["a", "z"]
diff --git a/tests/frame/interchange_to_pandas_test.py b/tests/frame/interchange_to_pandas_test.py
index 3cb722b1cd..938c23eafb 100644
--- a/tests/frame/interchange_to_pandas_test.py
+++ b/tests/frame/interchange_to_pandas_test.py
@@ -5,13 +5,13 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.0], "z": ["x", "y", "z"]}
 
 
 def test_interchange_to_pandas(request: pytest.FixtureRequest) -> None:
-    if parse_version(pd.__version__) < parse_version("1.5.0"):
+    if PANDAS_VERSION < (1, 5, 0):
         request.applymarker(pytest.mark.xfail)
     df_raw = pd.DataFrame(data)
     df = nw.from_native(df_raw.__dataframe__(), eager_or_interchange_only=True)
@@ -20,9 +20,10 @@ def test_interchange_to_pandas(request: pytest.FixtureRequest) -> None:
 
 
 def test_interchange_ibis_to_pandas(
-    tmpdir: pytest.TempdirFactory, request: pytest.FixtureRequest
+    tmpdir: pytest.TempdirFactory,
+    request: pytest.FixtureRequest,
 ) -> None:  # pragma: no cover
-    if parse_version(pd.__version__) < parse_version("1.5.0"):
+    if PANDAS_VERSION < (1, 5, 0):
         request.applymarker(pytest.mark.xfail)
 
     ibis = pytest.importorskip("ibis")
@@ -38,7 +39,7 @@ def test_interchange_ibis_to_pandas(
 
 
 def test_interchange_duckdb_to_pandas(request: pytest.FixtureRequest) -> None:
-    if parse_version(pd.__version__) < parse_version("1.0.0"):
+    if PANDAS_VERSION < (1, 0, 0):
         request.applymarker(pytest.mark.xfail)
     df_raw = pd.DataFrame(data)
     rel = duckdb.sql("select * from df_raw")
diff --git a/tests/frame/invalid_test.py b/tests/frame/invalid_test.py
index 834e192b70..7fdf3e5fe8 100644
--- a/tests/frame/invalid_test.py
+++ b/tests/frame/invalid_test.py
@@ -1,13 +1,12 @@
 from __future__ import annotations
 
-import numpy as np
 import pandas as pd
 import polars as pl
 import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import NUMPY_VERSION
 
 
 def test_invalid() -> None:
@@ -44,9 +43,7 @@ def test_validate_laziness() -> None:
         nw.concat([nw.from_native(df, eager_only=True), nw.from_native(df).lazy()])  # type: ignore[list-item]
 
 
-@pytest.mark.skipif(
-    parse_version(np.__version__) < parse_version("1.26.4"), reason="too old"
-)
+@pytest.mark.skipif(NUMPY_VERSION < (1, 26, 4), reason="too old")
 def test_memmap() -> None:
     pytest.importorskip("sklearn")
     # the headache this caused me...
diff --git a/tests/frame/is_duplicated_test.py b/tests/frame/is_duplicated_test.py
index a4dbd97aae..bcc8037123 100644
--- a/tests/frame/is_duplicated_test.py
+++ b/tests/frame/is_duplicated_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_is_duplicated(constructor_eager: ConstructorEager) -> None:
@@ -11,4 +11,4 @@ def test_is_duplicated(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(df_raw, eager_only=True)
     result = nw.concat([df, df.head(1)]).is_duplicated()
     expected = {"is_duplicated": [True, False, False, True]}
-    compare_dicts({"is_duplicated": result}, expected)
+    assert_equal_data({"is_duplicated": result}, expected)
diff --git a/tests/frame/is_unique_test.py b/tests/frame/is_unique_test.py
index cb9d57ba2a..81718f36cf 100644
--- a/tests/frame/is_unique_test.py
+++ b/tests/frame/is_unique_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_is_unique(constructor_eager: ConstructorEager) -> None:
@@ -11,4 +11,4 @@ def test_is_unique(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(df_raw, eager_only=True)
     result = nw.concat([df, df.head(1)]).is_unique()
     expected = {"is_unique": [False, True, True, False]}
-    compare_dicts({"is_unique": result}, expected)
+    assert_equal_data({"is_unique": result}, expected)
diff --git a/tests/frame/item_test.py b/tests/frame/item_test.py
index 4453ea611e..5a5f037f1c 100644
--- a/tests/frame/item_test.py
+++ b/tests/frame/item_test.py
@@ -7,7 +7,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -22,8 +22,8 @@ def test_item(
 ) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df = nw.from_native(constructor_eager(data), eager_only=True)
-    compare_dicts({"a": [df.item(row, column)]}, {"a": [expected]})
-    compare_dicts({"a": [df.select("a").head(1).item()]}, {"a": [1]})
+    assert_equal_data({"a": [df.item(row, column)]}, {"a": [expected]})
+    assert_equal_data({"a": [df.select("a").head(1).item()]}, {"a": [1]})
 
 
 @pytest.mark.parametrize(
diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py
index 85c76eba73..c743893d0d 100644
--- a/tests/frame/join_test.py
+++ b/tests/frame/join_test.py
@@ -10,9 +10,9 @@
 
 import narwhals.stable.v1 as nw
 from narwhals.utils import Implementation
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_inner_join_two_keys(constructor: Constructor) -> None:
@@ -40,8 +40,8 @@ def test_inner_join_two_keys(constructor: Constructor) -> None:
         "zorro_right": [7.0, 8, 9],
         "index": [0, 1, 2],
     }
-    compare_dicts(result, expected)
-    compare_dicts(result_on, expected)
+    assert_equal_data(result, expected)
+    assert_equal_data(result_on, expected)
 
 
 def test_inner_join_single_key(constructor: Constructor) -> None:
@@ -70,8 +70,8 @@ def test_inner_join_single_key(constructor: Constructor) -> None:
         "zorro_right": [7.0, 8, 9],
         "index": [0, 1, 2],
     }
-    compare_dicts(result, expected)
-    compare_dicts(result_on, expected)
+    assert_equal_data(result, expected)
+    assert_equal_data(result_on, expected)
 
 
 def test_cross_join(constructor: Constructor) -> None:
@@ -82,7 +82,7 @@ def test_cross_join(constructor: Constructor) -> None:
         "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3],
         "antananarivo_right": [1, 2, 3, 1, 2, 3, 1, 2, 3],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     with pytest.raises(
         ValueError, match="Can not pass `left_on`, `right_on` or `on` keys for cross join"
@@ -122,7 +122,7 @@ def test_cross_join_suffix(constructor: Constructor, suffix: str) -> None:
         "antananarivo": [1, 1, 1, 2, 2, 2, 3, 3, 3],
         f"antananarivo{suffix}": [1, 2, 3, 1, 2, 3, 1, 2, 3],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_cross_join_non_pandas() -> None:
@@ -135,7 +135,7 @@ def test_cross_join_non_pandas() -> None:
         "antananarivo": [1, 1, 1, 3, 3, 3, 2, 2, 2],
         "antananarivo_right": [1, 3, 2, 1, 3, 2, 1, 3, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -164,7 +164,7 @@ def test_anti_join(
     df = nw.from_native(constructor(data))
     other = df.filter(filter_expr)
     result = df.join(other, how="anti", left_on=join_key, right_on=join_key)  # type: ignore[arg-type]
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -204,7 +204,7 @@ def test_semi_join(
     result = df.join(other, how="semi", left_on=join_key, right_on=join_key).sort(  # type: ignore[arg-type]
         "antananarivo"
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("how", ["right", "full"])
@@ -258,8 +258,8 @@ def test_left_join(constructor: Constructor) -> None:
         "index": [0, 1, 2],
         "co": [4, 5, 7],
     }
-    compare_dicts(result, expected)
-    compare_dicts(result_on_list, expected_on_list)
+    assert_equal_data(result, expected)
+    assert_equal_data(result_on_list, expected_on_list)
 
 
 @pytest.mark.filterwarnings("ignore: the default coalesce behavior")
@@ -277,7 +277,7 @@ def test_left_join_multiple_column(constructor: Constructor) -> None:
     result = result.sort("index")
     result = result.drop("index_right")
     expected = {"antananarivo": [1, 2, 3], "bob": [4, 5, 6], "index": [0, 1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.filterwarnings("ignore: the default coalesce behavior")
@@ -306,7 +306,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None:
         "d_right": [1, 4, 2],
         "index": [0, 1, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df_left.join(
         df_right,  # type: ignore[arg-type]
         left_on="antananarivo",
@@ -323,7 +323,7 @@ def test_left_join_overlapping_column(constructor: Constructor) -> None:
         "c": [4.0, 6.0, float("nan")],
         "index": [0, 1, 2],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("how", ["inner", "left", "semi", "anti"])
@@ -354,11 +354,12 @@ def test_join_keys_exceptions(constructor: Constructor, how: str) -> None:
 
 
 def test_joinasof_numeric(
-    constructor: Constructor, request: pytest.FixtureRequest
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
 ) -> None:
     if "pyarrow_table" in str(constructor) or "cudf" in str(constructor):
         request.applymarker(pytest.mark.xfail)
-    if parse_version(pd.__version__) < (2, 1) and (
+    if PANDAS_VERSION < (2, 1) and (
         ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
@@ -403,18 +404,21 @@ def test_joinasof_numeric(
         "val": ["a", "b", "c"],
         "val_right": [1, 6, 7],
     }
-    compare_dicts(result_backward, expected_backward)
-    compare_dicts(result_forward, expected_forward)
-    compare_dicts(result_nearest, expected_nearest)
-    compare_dicts(result_backward_on, expected_backward)
-    compare_dicts(result_forward_on, expected_forward)
-    compare_dicts(result_nearest_on, expected_nearest)
+    assert_equal_data(result_backward, expected_backward)
+    assert_equal_data(result_forward, expected_forward)
+    assert_equal_data(result_nearest, expected_nearest)
+    assert_equal_data(result_backward_on, expected_backward)
+    assert_equal_data(result_forward_on, expected_forward)
+    assert_equal_data(result_nearest_on, expected_nearest)
 
 
-def test_joinasof_time(constructor: Constructor, request: pytest.FixtureRequest) -> None:
+def test_joinasof_time(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
     if "pyarrow_table" in str(constructor) or "cudf" in str(constructor):
         request.applymarker(pytest.mark.xfail)
-    if parse_version(pd.__version__) < (2, 1) and ("pandas_pyarrow" in str(constructor)):
+    if PANDAS_VERSION < (2, 1) and ("pandas_pyarrow" in str(constructor)):
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(
         constructor(
@@ -481,18 +485,21 @@ def test_joinasof_time(constructor: Constructor, request: pytest.FixtureRequest)
         "population": [82.19, 82.66, 83.12],
         "gdp": [4164, 4696, 4696],
     }
-    compare_dicts(result_backward, expected_backward)
-    compare_dicts(result_forward, expected_forward)
-    compare_dicts(result_nearest, expected_nearest)
-    compare_dicts(result_backward_on, expected_backward)
-    compare_dicts(result_forward_on, expected_forward)
-    compare_dicts(result_nearest_on, expected_nearest)
+    assert_equal_data(result_backward, expected_backward)
+    assert_equal_data(result_forward, expected_forward)
+    assert_equal_data(result_nearest, expected_nearest)
+    assert_equal_data(result_backward_on, expected_backward)
+    assert_equal_data(result_forward_on, expected_forward)
+    assert_equal_data(result_nearest_on, expected_nearest)
 
 
-def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) -> None:
+def test_joinasof_by(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
     if "pyarrow_table" in str(constructor) or "cudf" in str(constructor):
         request.applymarker(pytest.mark.xfail)
-    if parse_version(pd.__version__) < (2, 1) and (
+    if PANDAS_VERSION < (2, 1) and (
         ("pandas_pyarrow" in str(constructor)) or ("pandas_nullable" in str(constructor))
     ):
         request.applymarker(pytest.mark.xfail)
@@ -518,8 +525,8 @@ def test_joinasof_by(constructor: Constructor, request: pytest.FixtureRequest) -
         "c": [9, 2, 1, 1],
         "d": [1, 3, float("nan"), 4],
     }
-    compare_dicts(result, expected)
-    compare_dicts(result_by, expected)
+    assert_equal_data(result, expected)
+    assert_equal_data(result_by, expected)
 
 
 @pytest.mark.parametrize("strategy", ["back", "furthest"])
diff --git a/tests/frame/lit_test.py b/tests/frame/lit_test.py
index aa18edb404..b30233fbdb 100644
--- a/tests/frame/lit_test.py
+++ b/tests/frame/lit_test.py
@@ -8,7 +8,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 if TYPE_CHECKING:
     from narwhals.dtypes import DType
@@ -31,7 +31,7 @@ def test_lit(
         "z": [7.0, 8.0, 9.0],
         "lit": expected_lit,
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_lit_error(constructor: Constructor) -> None:
diff --git a/tests/frame/null_count_test.py b/tests/frame/null_count_test.py
index 71ac965f8a..f89c24e52f 100644
--- a/tests/frame/null_count_test.py
+++ b/tests/frame/null_count_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_null_count(constructor_eager: ConstructorEager) -> None:
@@ -11,4 +11,4 @@ def test_null_count(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(df_raw, eager_only=True)
     result = df.null_count()
     expected = {"a": [1], "b": [0], "z": [1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/pipe_test.py b/tests/frame/pipe_test.py
index 506d4a317e..6a3b30fc7f 100644
--- a/tests/frame/pipe_test.py
+++ b/tests/frame/pipe_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": ["foo", "bars"],
@@ -15,4 +15,4 @@ def test_pipe(constructor: Constructor) -> None:
     columns = df.collect_schema().names()
     result = df.pipe(lambda _df: _df.select([x for x in columns if len(x) == 2]))
     expected = {"ab": ["foo", "bars"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/reindex_test.py b/tests/frame/reindex_test.py
index 431e7b0022..5696f56742 100644
--- a/tests/frame/reindex_test.py
+++ b/tests/frame/reindex_test.py
@@ -6,7 +6,7 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -16,9 +16,9 @@ def test_reindex(df_raw: Any) -> None:
     df = nw.from_native(df_raw, eager_only=True)
     result = df.select("b", df["a"].sort(descending=True))
     expected = {"b": [4, 4, 6], "a": [3, 2, 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.select("b", nw.col("a").sort(descending=True))
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     s = df["a"]
     result_s = s > s.sort()
@@ -27,6 +27,6 @@ def test_reindex(df_raw: Any) -> None:
     assert not result_s[2]
     result = df.with_columns(s.sort())
     expected = {"a": [1, 2, 3], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]}  # type: ignore[list-item]
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     with pytest.raises(ValueError, match="Multi-output expressions are not supported"):
         nw.to_native(df.with_columns(nw.all() + nw.all()))
diff --git a/tests/frame/rename_test.py b/tests/frame/rename_test.py
index d51e86f83c..24c0462002 100644
--- a/tests/frame/rename_test.py
+++ b/tests/frame/rename_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_rename(constructor: Constructor) -> None:
@@ -10,4 +10,4 @@ def test_rename(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.rename({"a": "x", "b": "y"})
     expected = {"x": [1, 3, 2], "y": [4, 4, 6], "z": [7.0, 8, 9]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py
index 65da7bf00f..c2fdec31a5 100644
--- a/tests/frame/schema_test.py
+++ b/tests/frame/schema_test.py
@@ -13,7 +13,7 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import Constructor
@@ -81,9 +81,7 @@ class Foo: ...
     assert result == {"a": nw.Object}
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"), reason="too old"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0, 0), reason="too old")
 def test_dtypes() -> None:
     df_pl = pl.DataFrame(
         {
@@ -199,7 +197,7 @@ def test_schema_object(method: str, expected: Any) -> None:
 
 
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < (2,),
+    PANDAS_VERSION < (2,),
     reason="Before 2.0, pandas would raise on `drop_duplicates`",
 )
 def test_from_non_hashable_column_name() -> None:
@@ -213,7 +211,7 @@ def test_from_non_hashable_column_name() -> None:
 
 
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.2.0"),
+    PANDAS_VERSION < (2, 2, 0),
     reason="too old for pyarrow types",
 )
 def test_nested_dtypes() -> None:
@@ -257,8 +255,10 @@ def test_nested_dtypes() -> None:
     }
 
 
-def test_nested_dtypes_ibis() -> None:  # pragma: no cover
+def test_nested_dtypes_ibis(request: pytest.FixtureRequest) -> None:  # pragma: no cover
     ibis = pytest.importorskip("ibis")
+    if PANDAS_VERSION < (1, 1):
+        request.applymarker(pytest.mark.xfail)
     df = pl.DataFrame(
         {"a": [[1, 2]], "b": [[1, 2]], "c": [{"a": 1}]},
         schema_overrides={"b": pl.Array(pl.Int64, 2)},
@@ -269,7 +269,7 @@ def test_nested_dtypes_ibis() -> None:  # pragma: no cover
 
 
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.2.0"),
+    PANDAS_VERSION < (2, 2, 0),
     reason="too old for pyarrow types",
 )
 def test_nested_dtypes_dask() -> None:
diff --git a/tests/frame/select_test.py b/tests/frame/select_test.py
index 7f907e8c53..6e4c4b0956 100644
--- a/tests/frame/select_test.py
+++ b/tests/frame/select_test.py
@@ -5,7 +5,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_select(constructor: Constructor) -> None:
@@ -13,7 +13,7 @@ def test_select(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select("a")
     expected = {"a": [1, 3, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_empty_select(constructor: Constructor) -> None:
@@ -48,7 +48,7 @@ def test_dask_select_reduction_and_modify_index() -> None:
         nw.col("z").head(2),
     )
     expected = {"a": [4, 4], "b": [5, 5], "z": [7.0, 8]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     # all reductions
     result = df.select(
@@ -57,4 +57,4 @@ def test_dask_select_reduction_and_modify_index() -> None:
         nw.col("z").max(),
     )
     expected = {"a": [4], "b": [5], "z": [9]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/sort_test.py b/tests/frame/sort_test.py
index bea9177df9..4e12cc95a0 100644
--- a/tests/frame/sort_test.py
+++ b/tests/frame/sort_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_sort(constructor: Constructor) -> None:
@@ -16,14 +16,14 @@ def test_sort(constructor: Constructor) -> None:
         "b": [4, 6, 4],
         "z": [7.0, 9.0, 8.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = df.sort("a", "b", descending=[True, False])
     expected = {
         "a": [3, 2, 1],
         "b": [4, 6, 4],
         "z": [8.0, 9.0, 7.0],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -39,4 +39,4 @@ def test_sort_nulls(
     data = {"a": [0, 0, 2, -1], "b": [1, 3, 2, None]}
     df = nw.from_native(constructor(data))
     result = df.sort("b", descending=True, nulls_last=nulls_last)
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/tail_test.py b/tests/frame/tail_test.py
index f7e06475c6..a4d2657977 100644
--- a/tests/frame/tail_test.py
+++ b/tests/frame/tail_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_tail(constructor: Constructor) -> None:
@@ -27,13 +27,13 @@ def test_tail(constructor: Constructor) -> None:
 
     with context:
         result = df.tail(2)
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
         result = df.collect().tail(2)  # type: ignore[assignment]
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
         result = df.collect().tail(-1)  # type: ignore[assignment]
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
 
         result = df.collect().select(nw.col("a").tail(2))  # type: ignore[assignment]
-        compare_dicts(result, {"a": expected["a"]})
+        assert_equal_data(result, {"a": expected["a"]})
diff --git a/tests/frame/to_arrow_test.py b/tests/frame/to_arrow_test.py
index 373f6310b2..3e8c704ea8 100644
--- a/tests/frame/to_arrow_test.py
+++ b/tests/frame/to_arrow_test.py
@@ -2,21 +2,21 @@
 
 from typing import TYPE_CHECKING
 
-import pandas as pd
 import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import ConstructorEager
 
 
 def test_to_arrow(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pandas" in str(constructor_eager) and parse_version(pd.__version__) < (1, 0, 0):
+    if "pandas" in str(constructor_eager) and PANDAS_VERSION < (1, 0, 0):
         # pyarrow requires pandas>=1.0.0
         request.applymarker(pytest.mark.xfail)
 
diff --git a/tests/frame/to_dict_test.py b/tests/frame/to_dict_test.py
index 537b68f312..e6a434b7f2 100644
--- a/tests/frame/to_dict_test.py
+++ b/tests/frame/to_dict_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.filterwarnings(
@@ -24,4 +24,4 @@ def test_to_dict_as_series(constructor_eager: ConstructorEager) -> None:
     assert isinstance(result["a"], nw.Series)
     assert isinstance(result["b"], nw.Series)
     assert isinstance(result["c"], nw.Series)
-    compare_dicts(result, data)
+    assert_equal_data(result, data)
diff --git a/tests/frame/to_pandas_test.py b/tests/frame/to_pandas_test.py
index d6370f02e9..d9bce7a693 100644
--- a/tests/frame/to_pandas_test.py
+++ b/tests/frame/to_pandas_test.py
@@ -6,7 +6,7 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import ConstructorEager
@@ -14,11 +14,12 @@
 
 @pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning")
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"),
+    PANDAS_VERSION < (2, 0, 0),
     reason="too old for pandas-pyarrow",
 )
 def test_convert_pandas(
-    constructor_eager: ConstructorEager, request: pytest.FixtureRequest
+    constructor_eager: ConstructorEager,
+    request: pytest.FixtureRequest,
 ) -> None:
     if "modin" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
diff --git a/tests/frame/unique_test.py b/tests/frame/unique_test.py
index 40589c5456..c8079f593b 100644
--- a/tests/frame/unique_test.py
+++ b/tests/frame/unique_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
 
@@ -29,7 +29,7 @@ def test_unique(
     df = nw.from_native(df_raw)
 
     result = df.unique(subset, keep=keep, maintain_order=True)  # type: ignore[arg-type]
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_unique_none(constructor: Constructor) -> None:
@@ -37,4 +37,4 @@ def test_unique_none(constructor: Constructor) -> None:
     df = nw.from_native(df_raw)
 
     result = df.unique(maintain_order=True)
-    compare_dicts(result, data)
+    assert_equal_data(result, data)
diff --git a/tests/frame/unpivot_test.py b/tests/frame/unpivot_test.py
index 33f7eaca08..ed8d98c969 100644
--- a/tests/frame/unpivot_test.py
+++ b/tests/frame/unpivot_test.py
@@ -3,13 +3,12 @@
 from typing import TYPE_CHECKING
 from typing import Any
 
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 if TYPE_CHECKING:
     from narwhals.stable.v1.dtypes import DType
@@ -44,7 +43,7 @@ def test_unpivot_on(
 ) -> None:
     df = nw.from_native(constructor(data))
     result = df.unpivot(on=on, index=["a"]).sort("variable", "a")
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -95,13 +94,8 @@ def test_unpivot_mixed_types(
     data: dict[str, Any],
     expected_dtypes: list[DType],
 ) -> None:
-    if (
-        "dask" in str(constructor)
-        or "cudf" in str(constructor)
-        or (
-            "pyarrow_table" in str(constructor)
-            and parse_version(pa.__version__) < parse_version("14.0.0")
-        )
+    if "cudf" in str(constructor) or (
+        "pyarrow_table" in str(constructor) and PYARROW_VERSION < (14, 0, 0)
     ):
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor(data))
diff --git a/tests/frame/with_columns_sequence_test.py b/tests/frame/with_columns_sequence_test.py
index 5249f01066..b88036a4da 100644
--- a/tests/frame/with_columns_sequence_test.py
+++ b/tests/frame/with_columns_sequence_test.py
@@ -5,7 +5,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": ["foo", "bars"],
@@ -23,4 +23,4 @@ def test_with_columns(constructor: Constructor, request: pytest.FixtureRequest)
         .select("d", "e")
     )
     expected = {"d": [4, 5], "e": [5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/with_columns_test.py b/tests/frame/with_columns_test.py
index 30e31ff4ee..cb7111b4a5 100644
--- a/tests/frame/with_columns_test.py
+++ b/tests/frame/with_columns_test.py
@@ -2,13 +2,12 @@
 
 import numpy as np
 import pandas as pd
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_with_columns_int_col_name_pandas() -> None:
@@ -28,14 +27,14 @@ def test_with_columns_order(constructor: Constructor) -> None:
     result = df.with_columns(nw.col("a") + 1, d=nw.col("a") - 1)
     assert result.collect_schema().names() == ["a", "b", "z", "d"]
     expected = {"a": [2, 4, 3], "b": [4, 4, 6], "z": [7.0, 8, 9], "d": [0, 2, 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_with_columns_empty(constructor: Constructor) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df = nw.from_native(constructor(data))
     result = df.select().with_columns()
-    compare_dicts(result, {})
+    assert_equal_data(result, {})
 
 
 def test_with_columns_order_single_row(constructor: Constructor) -> None:
@@ -44,13 +43,14 @@ def test_with_columns_order_single_row(constructor: Constructor) -> None:
     result = df.with_columns(nw.col("a") + 1, d=nw.col("a") - 1)
     assert result.collect_schema().names() == ["a", "b", "z", "d"]
     expected = {"a": [2], "b": [4], "z": [7.0], "d": [0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_with_columns_dtypes_single_row(
-    constructor: Constructor, request: pytest.FixtureRequest
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
 ) -> None:
-    if "pyarrow_table" in str(constructor) and parse_version(pa.__version__) < (15,):
+    if "pyarrow_table" in str(constructor) and PYARROW_VERSION < (15,):
         request.applymarker(pytest.mark.xfail)
     data = {"a": ["foo"]}
     df = nw.from_native(constructor(data)).with_columns(nw.col("a").cast(nw.Categorical))
diff --git a/tests/frame/with_row_index_test.py b/tests/frame/with_row_index_test.py
index a4307acc3c..b6ad9c82d5 100644
--- a/tests/frame/with_row_index_test.py
+++ b/tests/frame/with_row_index_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": ["foo", "bars"],
@@ -13,4 +13,4 @@
 def test_with_row_index(constructor: Constructor) -> None:
     result = nw.from_native(constructor(data)).with_row_index()
     expected = {"a": ["foo", "bars"], "ab": ["foo", "bars"], "index": [0, 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/frame/write_parquet_test.py b/tests/frame/write_parquet_test.py
index c616de198c..e4b826cfba 100644
--- a/tests/frame/write_parquet_test.py
+++ b/tests/frame/write_parquet_test.py
@@ -2,11 +2,10 @@
 
 from typing import TYPE_CHECKING
 
-import pandas as pd
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import ConstructorEager
@@ -14,11 +13,10 @@
 data = {"a": [1, 2, 3]}
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"), reason="too old for pyarrow"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0, 0), reason="too old for pyarrow")
 def test_write_parquet(
-    constructor_eager: ConstructorEager, tmpdir: pytest.TempdirFactory
+    constructor_eager: ConstructorEager,
+    tmpdir: pytest.TempdirFactory,
 ) -> None:
     path = tmpdir / "foo.parquet"  # type: ignore[operator]
     nw.from_native(constructor_eager(data), eager_only=True).write_parquet(str(path))
diff --git a/tests/from_dict_test.py b/tests/from_dict_test.py
index 9797713d9a..833cd3acc8 100644
--- a/tests/from_dict_test.py
+++ b/tests/from_dict_test.py
@@ -5,7 +5,7 @@
 import narwhals as nw
 import narwhals.stable.v1 as nw_v1
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_from_dict(constructor: Constructor, request: pytest.FixtureRequest) -> None:
@@ -15,7 +15,7 @@ def test_from_dict(constructor: Constructor, request: pytest.FixtureRequest) ->
     native_namespace = nw.get_native_namespace(df)
     result = nw.from_dict({"c": [1, 2], "d": [5, 6]}, native_namespace=native_namespace)
     expected = {"c": [1, 2], "d": [5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     assert isinstance(result, nw.DataFrame)
 
 
@@ -38,7 +38,7 @@ def test_from_dict_schema(
 def test_from_dict_without_namespace(constructor: Constructor) -> None:
     df = nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]})).lazy().collect()
     result = nw.from_dict({"c": df["a"], "d": df["b"]})
-    compare_dicts(result, {"c": [1, 2, 3], "d": [4, 5, 6]})
+    assert_equal_data(result, {"c": [1, 2, 3], "d": [4, 5, 6]})
 
 
 def test_from_dict_without_namespace_invalid(
@@ -55,7 +55,7 @@ def test_from_dict_one_native_one_narwhals(
     df = nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]})).lazy().collect()
     result = nw.from_dict({"c": nw.to_native(df["a"]), "d": df["b"]})
     expected = {"c": [1, 2, 3], "d": [4, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_from_dict_v1(constructor: Constructor, request: pytest.FixtureRequest) -> None:
@@ -65,7 +65,7 @@ def test_from_dict_v1(constructor: Constructor, request: pytest.FixtureRequest)
     native_namespace = nw.get_native_namespace(df)
     result = nw.from_dict({"c": [1, 2], "d": [5, 6]}, native_namespace=native_namespace)
     expected = {"c": [1, 2], "d": [5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     assert isinstance(result, nw.DataFrame)
 
 
diff --git a/tests/from_pycapsule_test.py b/tests/from_pycapsule_test.py
index 496138dd2c..7d91a44f35 100644
--- a/tests/from_pycapsule_test.py
+++ b/tests/from_pycapsule_test.py
@@ -8,20 +8,20 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-from tests.utils import compare_dicts
+from tests.utils import PYARROW_VERSION
+from tests.utils import assert_equal_data
 
 
-@pytest.mark.xfail(parse_version(pa.__version__) < (14,), reason="too old")
+@pytest.mark.xfail(PYARROW_VERSION < (14,), reason="too old")
 def test_from_arrow_to_arrow() -> None:
     df = nw.from_native(pl.DataFrame({"ab": [1, 2, 3], "ba": [4, 5, 6]}), eager_only=True)
     result = nw.from_arrow(df, native_namespace=pa)
     assert isinstance(result.to_native(), pa.Table)
     expected = {"ab": [1, 2, 3], "ba": [4, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-@pytest.mark.xfail(parse_version(pa.__version__) < (14,), reason="too old")
+@pytest.mark.xfail(PYARROW_VERSION < (14,), reason="too old")
 def test_from_arrow_to_polars(monkeypatch: pytest.MonkeyPatch) -> None:
     tbl = pa.table({"ab": [1, 2, 3], "ba": [4, 5, 6]})
     monkeypatch.delitem(sys.modules, "pandas")
@@ -29,17 +29,17 @@ def test_from_arrow_to_polars(monkeypatch: pytest.MonkeyPatch) -> None:
     result = nw.from_arrow(df, native_namespace=pl)
     assert isinstance(result.to_native(), pl.DataFrame)
     expected = {"ab": [1, 2, 3], "ba": [4, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     assert "pandas" not in sys.modules
 
 
-@pytest.mark.xfail(parse_version(pa.__version__) < (14,), reason="too old")
+@pytest.mark.xfail(PYARROW_VERSION < (14,), reason="too old")
 def test_from_arrow_to_pandas() -> None:
     df = nw.from_native(pa.table({"ab": [1, 2, 3], "ba": [4, 5, 6]}), eager_only=True)
     result = nw.from_arrow(df, native_namespace=pd)
     assert isinstance(result.to_native(), pd.DataFrame)
     expected = {"ab": [1, 2, 3], "ba": [4, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_from_arrow_invalid() -> None:
diff --git a/tests/group_by_test.py b/tests/group_by_test.py
index 27407a26a6..09ee213e85 100644
--- a/tests/group_by_test.py
+++ b/tests/group_by_test.py
@@ -8,10 +8,11 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {"a": [1, 1, 3], "b": [4, 4, 6], "c": [7.0, 8, 9]}
 
@@ -27,13 +28,13 @@ def test_group_by_complex() -> None:
         result = nw.to_native(
             df.group_by("a").agg((nw.col("b") - nw.col("c").mean()).mean()).sort("a")
         )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     lf = nw.from_native(df_lazy).lazy()
     result = nw.to_native(
         lf.group_by("a").agg((nw.col("b") - nw.col("c").mean()).mean()).sort("a")
     )
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_invalid_group_by_dask() -> None:
@@ -80,7 +81,7 @@ def test_group_by_iter(constructor_eager: ConstructorEager) -> None:
     for key, sub_df in df.group_by("a"):
         if key == (1,):
             expected = {"a": [1, 1], "b": [4, 4], "c": [7.0, 8.0]}
-            compare_dicts(sub_df, expected)
+            assert_equal_data(sub_df, expected)
             assert isinstance(sub_df, nw.DataFrame)
         keys.append(key)
     assert sorted(keys) == sorted(expected_keys)
@@ -100,7 +101,7 @@ def test_group_by_len(constructor: Constructor) -> None:
         nw.from_native(constructor(data)).group_by("a").agg(nw.col("b").len()).sort("a")
     )
     expected = {"a": [1, 3], "b": [2, 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_n_unique(constructor: Constructor) -> None:
@@ -111,7 +112,7 @@ def test_group_by_n_unique(constructor: Constructor) -> None:
         .sort("a")
     )
     expected = {"a": [1, 3], "b": [1, 1]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_std(constructor: Constructor) -> None:
@@ -120,7 +121,7 @@ def test_group_by_std(constructor: Constructor) -> None:
         nw.from_native(constructor(data)).group_by("a").agg(nw.col("b").std()).sort("a")
     )
     expected = {"a": [1, 2], "b": [0.707107] * 2}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_n_unique_w_missing(
@@ -149,7 +150,7 @@ def test_group_by_n_unique_w_missing(
         "c_n_min": [4, 5],
         "d_n_unique": [1, 1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_same_name_twice() -> None:
@@ -186,7 +187,7 @@ def test_group_by_simple_named(constructor: Constructor) -> None:
         "b_min": [4, 6],
         "b_max": [5, 6],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_simple_unnamed(constructor: Constructor) -> None:
@@ -206,7 +207,7 @@ def test_group_by_simple_unnamed(constructor: Constructor) -> None:
         "b": [4, 6],
         "c": [7, 1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_multiple_keys(constructor: Constructor) -> None:
@@ -227,19 +228,19 @@ def test_group_by_multiple_keys(constructor: Constructor) -> None:
         "c_min": [2, 1],
         "c_max": [7, 1],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_key_with_nulls(constructor: Constructor, request: pytest.FixtureRequest) -> None:
+def test_key_with_nulls(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
     if "modin" in str(constructor):
         # TODO(unassigned): Modin flaky here?
         request.applymarker(pytest.mark.skip)
     context = (
         pytest.raises(NotImplementedError, match="null values")
-        if (
-            "pandas_constructor" in str(constructor)
-            and parse_version(pd.__version__) < parse_version("1.0.0")
-        )
+        if ("pandas_constructor" in str(constructor) and PANDAS_VERSION < (1, 1, 0))
         else nullcontext()
     )
     data = {"b": [4, 5, None], "a": [1, 2, 3]}
@@ -252,20 +253,63 @@ def test_key_with_nulls(constructor: Constructor, request: pytest.FixtureRequest
             .with_columns(nw.col("b").cast(nw.Float64))
         )
         expected = {"b": [4.0, 5, float("nan")], "len": [1, 1, 1], "a": [1, 2, 3]}
-        compare_dicts(result, expected)
+        assert_equal_data(result, expected)
+
+
+def test_key_with_nulls_ignored(
+    constructor: Constructor,
+) -> None:
+    data = {"b": [4, 5, None], "a": [1, 2, 3]}
+    result = (
+        nw.from_native(constructor(data))
+        .group_by("b", drop_null_keys=True)
+        .agg(nw.len(), nw.col("a").min())
+        .sort("a")
+        .with_columns(nw.col("b").cast(nw.Float64))
+    )
+    expected = {"b": [4.0, 5], "len": [1, 1], "a": [1, 2]}
+    assert_equal_data(result, expected)
+
+
+def test_key_with_nulls_iter(
+    constructor_eager: ConstructorEager,
+    request: pytest.FixtureRequest,
+) -> None:
+    if PANDAS_VERSION < (1, 3) and "pandas_constructor" in str(constructor_eager):
+        # bug in old pandas
+        request.applymarker(pytest.mark.xfail)
+    data = {"b": ["4", "5", None, "7"], "a": [1, 2, 3, 4], "c": ["4", "3", None, None]}
+    result = dict(
+        nw.from_native(constructor_eager(data), eager_only=True)
+        .group_by("b", "c", drop_null_keys=True)
+        .__iter__()
+    )
+    assert len(result) == 2
+    assert_equal_data(result[("4", "4")], {"b": ["4"], "a": [1], "c": ["4"]})
+    assert_equal_data(result[("5", "3")], {"b": ["5"], "a": [2], "c": ["3"]})
+
+    result = dict(
+        nw.from_native(constructor_eager(data), eager_only=True)
+        .group_by("b", "c", drop_null_keys=False)
+        .__iter__()
+    )
+    assert_equal_data(result[("4", "4")], {"b": ["4"], "a": [1], "c": ["4"]})
+    assert_equal_data(result[("5", "3")], {"b": ["5"], "a": [2], "c": ["3"]})
+    assert len(result) == 4
 
 
 def test_no_agg(constructor: Constructor) -> None:
     result = nw.from_native(constructor(data)).group_by(["a", "b"]).agg().sort("a", "b")
 
     expected = {"a": [1, 3], "b": [4, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_group_by_categorical(
-    constructor: Constructor, request: pytest.FixtureRequest
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
 ) -> None:
-    if "pyarrow_table" in str(constructor) and parse_version(pa.__version__) < (
+    if "pyarrow_table" in str(constructor) and PYARROW_VERSION < (
         15,
         0,
         0,
@@ -283,4 +327,4 @@ def test_group_by_categorical(
         .agg(nw.col("x").sum())
         .sort("x")
     )
-    compare_dicts(result, data)
+    assert_equal_data(result, data)
diff --git a/tests/hypothesis/concat_test.py b/tests/hypothesis/concat_test.py
index 9ae54dbc44..e0ec45369f 100644
--- a/tests/hypothesis/concat_test.py
+++ b/tests/hypothesis/concat_test.py
@@ -9,7 +9,7 @@
 from hypothesis import strategies as st
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 from tests.utils import is_windows
 
 
@@ -62,4 +62,4 @@ def test_concat(  # pragma: no cover
     dframe_pd1 = nw.to_native(dframe_pl)
     dframe_pd2 = nw.to_native(dframe_pd)
 
-    compare_dicts(dframe_pd1, dframe_pd2)
+    assert_equal_data(dframe_pd1, dframe_pd2)
diff --git a/tests/hypothesis/join_test.py b/tests/hypothesis/join_test.py
index bc1cd735cb..5b498db65f 100644
--- a/tests/hypothesis/join_test.py
+++ b/tests/hypothesis/join_test.py
@@ -10,11 +10,9 @@
 from pandas.testing import assert_frame_equal
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-from tests.utils import compare_dicts
-
-pl_version = parse_version(pl.__version__)
-pd_version = parse_version(pd.__version__)
+from tests.utils import PANDAS_VERSION
+from tests.utils import POLARS_VERSION
+from tests.utils import assert_equal_data
 
 
 @given(
@@ -40,8 +38,8 @@
         unique=True,
     ),
 )  # type: ignore[misc]
-@pytest.mark.skipif(pl_version < parse_version("0.20.13"), reason="0.0 == -0.0")
-@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
+@pytest.mark.skipif(POLARS_VERSION < (0, 20, 13), reason="0.0 == -0.0")
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0, 0), reason="requires pyarrow")
 @pytest.mark.slow
 def test_join(  # pragma: no cover
     integers: st.SearchStrategy[list[int]],
@@ -88,8 +86,8 @@ def test_join(  # pragma: no cover
         max_size=3,
     ),
 )  # type: ignore[misc]
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0, 0), reason="requires pyarrow")
 @pytest.mark.slow
-@pytest.mark.skipif(pd_version < parse_version("2.0.0"), reason="requires pyarrow")
 def test_cross_join(  # pragma: no cover
     integers: st.SearchStrategy[list[int]],
     other_integers: st.SearchStrategy[list[int]],
@@ -164,7 +162,9 @@ def test_left_join(  # pragma: no cover
             right_on=right_key,
         )
     ).select(pl.all().fill_null(float("nan")))
-    compare_dicts(result_pd.to_dict(as_series=False), result_pl.to_dict(as_series=False))
+    assert_equal_data(
+        result_pd.to_dict(as_series=False), result_pl.to_dict(as_series=False)
+    )
     # For PyArrow, insert an extra sort, as the order of rows isn't guaranteed
     result_pa = (
         nw.from_native(pa.table(data_left), eager_only=True)
@@ -177,7 +177,7 @@ def test_left_join(  # pragma: no cover
         .select(nw.all().cast(nw.Float64).fill_null(float("nan")))
         .pipe(lambda df: df.sort(df.columns))
     )
-    compare_dicts(
+    assert_equal_data(
         result_pa,
         result_pd.pipe(lambda df: df.sort(df.columns)).to_dict(as_series=False),
     )
diff --git a/tests/new_series_test.py b/tests/new_series_test.py
index f5dda284df..0d635c8536 100644
--- a/tests/new_series_test.py
+++ b/tests/new_series_test.py
@@ -6,7 +6,7 @@
 import narwhals as nw
 import narwhals.stable.v1 as nw_v1
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_new_series(constructor_eager: ConstructorEager) -> None:
@@ -16,14 +16,14 @@ def test_new_series(constructor_eager: ConstructorEager) -> None:
     # all supported libraries auto-infer this to be int64, we can always special-case
     # something different if necessary
     assert result.dtype == nw.Int64
-    compare_dicts(result.to_frame(), expected)
+    assert_equal_data(result.to_frame(), expected)
 
     result = nw.new_series(
         "b", [4, 1, 2], nw.Int32, native_namespace=nw.get_native_namespace(s)
     )
     expected = {"b": [4, 1, 2]}
     assert result.dtype == nw.Int32
-    compare_dicts(result.to_frame(), expected)
+    assert_equal_data(result.to_frame(), expected)
 
 
 def test_new_series_v1(constructor_eager: ConstructorEager) -> None:
@@ -35,14 +35,14 @@ def test_new_series_v1(constructor_eager: ConstructorEager) -> None:
     # all supported libraries auto-infer this to be int64, we can always special-case
     # something different if necessary
     assert result.dtype == nw_v1.Int64
-    compare_dicts(result.to_frame(), expected)
+    assert_equal_data(result.to_frame(), expected)
 
     result = nw_v1.new_series(
         "b", [4, 1, 2], nw_v1.Int32, native_namespace=nw_v1.get_native_namespace(s)
     )
     expected = {"b": [4, 1, 2]}
     assert result.dtype == nw_v1.Int32
-    compare_dicts(result.to_frame(), expected)
+    assert_equal_data(result.to_frame(), expected)
 
 
 def test_new_series_dask() -> None:
diff --git a/tests/selectors_test.py b/tests/selectors_test.py
index c78a9eac46..93f5cbd77f 100644
--- a/tests/selectors_test.py
+++ b/tests/selectors_test.py
@@ -11,9 +11,9 @@
 from narwhals.selectors import categorical
 from narwhals.selectors import numeric
 from narwhals.selectors import string
-from narwhals.utils import parse_version
+from tests.utils import PYARROW_VERSION
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = {
     "a": [1, 1, 2],
@@ -27,43 +27,49 @@ def test_selectors(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(by_dtype([nw.Int64, nw.Float64]) + 1)
     expected = {"a": [2, 2, 3], "c": [5.1, 6.0, 7.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_numeric(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(numeric() + 1)
     expected = {"a": [2, 2, 3], "c": [5.1, 6.0, 7.0]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_boolean(constructor: Constructor) -> None:
     df = nw.from_native(constructor(data))
     result = df.select(boolean())
     expected = {"d": [True, False, True]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_string(constructor: Constructor, request: pytest.FixtureRequest) -> None:
-    if "dask" in str(constructor) and parse_version(pa.__version__) < (12,):
+def test_string(
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
+    if "dask" in str(constructor) and PYARROW_VERSION < (12,):
         # Dask doesn't infer `'b'` as String for old PyArrow versions
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor(data))
     result = df.select(string())
     expected = {"b": ["a", "b", "c"]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
-def test_categorical(request: pytest.FixtureRequest, constructor: Constructor) -> None:
-    if "pyarrow_table_constructor" in str(constructor) and parse_version(
-        pa.__version__
-    ) <= (15,):  # pragma: no cover
+def test_categorical(
+    request: pytest.FixtureRequest,
+    constructor: Constructor,
+) -> None:
+    if "pyarrow_table_constructor" in str(constructor) and PYARROW_VERSION <= (
+        15,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
     expected = {"b": ["a", "b", "c"]}
 
     df = nw.from_native(constructor(data)).with_columns(nw.col("b").cast(nw.Categorical))
     result = df.select(categorical())
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/series_only/__iter___test.py b/tests/series_only/__iter___test.py
index 06753917b3..2a88ae1d3b 100644
--- a/tests/series_only/__iter___test.py
+++ b/tests/series_only/__iter___test.py
@@ -6,7 +6,7 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 if TYPE_CHECKING:
     from tests.utils import ConstructorEager
@@ -22,4 +22,4 @@ def test_iter(
     s = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
 
     assert isinstance(s, Iterable)
-    compare_dicts({"a": [x for x in s]}, {"a": [1, 2, 3]})  # noqa: C416
+    assert_equal_data({"a": [x for x in s]}, {"a": [1, 2, 3]})  # noqa: C416
diff --git a/tests/series_only/alias_rename_test.py b/tests/series_only/alias_rename_test.py
index 0219927357..87143a5744 100644
--- a/tests/series_only/alias_rename_test.py
+++ b/tests/series_only/alias_rename_test.py
@@ -2,7 +2,7 @@
 
 import narwhals as nw
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_alias_rename(constructor_eager: Constructor) -> None:
@@ -10,6 +10,6 @@ def test_alias_rename(constructor_eager: Constructor) -> None:
     expected = {"bar": data}
     series = nw.from_native(constructor_eager({"foo": data}), eager_only=True)["foo"]
     result = series.alias("bar").to_frame()
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = series.rename("bar").to_frame()
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/series_only/array_dunder_test.py b/tests/series_only/array_dunder_test.py
index 3c30ef8943..cdb837c16c 100644
--- a/tests/series_only/array_dunder_test.py
+++ b/tests/series_only/array_dunder_test.py
@@ -1,22 +1,24 @@
 from __future__ import annotations
 
 import numpy as np
-import pandas as pd
-import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import PYARROW_VERSION
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_array_dunder(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("16.0.0"):  # pragma: no cover
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
+        16,
+        0,
+        0,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
 
     s = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
@@ -25,11 +27,14 @@ def test_array_dunder(
 
 
 def test_array_dunder_with_dtype(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("16.0.0"):  # pragma: no cover
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
+        16,
+        0,
+        0,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
 
     s = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
@@ -38,22 +43,23 @@ def test_array_dunder_with_dtype(
 
 
 def test_array_dunder_with_copy(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
-    if "pyarrow_table" in str(constructor_eager) and parse_version(
-        pa.__version__
-    ) < parse_version("16.0.0"):  # pragma: no cover
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (
+        16,
+        0,
+        0,
+    ):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
 
     s = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True)["a"]
     result = s.__array__(copy=True)
     np.testing.assert_array_equal(result, np.array([1, 2, 3], dtype="int64"))
-    if "pandas_constructor" in str(constructor_eager) and parse_version(
-        pd.__version__
-    ) < (3,):
+    if "pandas_constructor" in str(constructor_eager) and PANDAS_VERSION < (3,):
         # If it's pandas, we know that `copy=False` definitely took effect.
         # So, let's check it!
         result = s.__array__(copy=False)
         np.testing.assert_array_equal(result, np.array([1, 2, 3], dtype="int64"))
         result[0] = 999
-        compare_dicts({"a": s}, {"a": [999, 2, 3]})
+        assert_equal_data({"a": s}, {"a": [999, 2, 3]})
diff --git a/tests/series_only/arrow_c_stream_test.py b/tests/series_only/arrow_c_stream_test.py
index 3417bb9a55..3118c6f8c5 100644
--- a/tests/series_only/arrow_c_stream_test.py
+++ b/tests/series_only/arrow_c_stream_test.py
@@ -6,14 +6,13 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import POLARS_VERSION
+from tests.utils import PYARROW_VERSION
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test() -> None:
     s = nw.from_native(pl.Series([1, 2, 3]), series_only=True)
@@ -22,11 +21,9 @@ def test_arrow_c_stream_test() -> None:
     assert pc.all(pc.equal(result, expected)).as_py()
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
     # "poison" the dunder method to make sure it actually got called above
@@ -36,11 +33,9 @@ def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
         pa.chunked_array(s)
 
 
+@pytest.mark.skipif(POLARS_VERSION < (1, 3), reason="too old for pycapsule in Polars")
 @pytest.mark.skipif(
-    parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
-)
-@pytest.mark.skipif(
-    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+    PYARROW_VERSION < (16, 0, 0), reason="too old for pycapsule in PyArrow"
 )
 def test_arrow_c_stream_test_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
     # Check that fallback to PyArrow works
diff --git a/tests/series_only/cast_test.py b/tests/series_only/cast_test.py
index c5b37b8c06..10587a0840 100644
--- a/tests/series_only/cast_test.py
+++ b/tests/series_only/cast_test.py
@@ -11,7 +11,7 @@
 from polars.testing import assert_frame_equal
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import ConstructorEager
@@ -68,7 +68,7 @@ def test_cast_date_datetime_pyarrow() -> None:
 
 
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"),
+    PANDAS_VERSION < (2, 0, 0),
     reason="pyarrow dtype not available",
 )
 def test_cast_date_datetime_pandas() -> None:
@@ -99,7 +99,7 @@ def test_cast_date_datetime_pandas() -> None:
 
 
 @pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"),
+    PANDAS_VERSION < (2, 0, 0),
     reason="pyarrow dtype not available",
 )
 def test_cast_date_datetime_invalid() -> None:
diff --git a/tests/series_only/is_ordered_categorical_test.py b/tests/series_only/is_ordered_categorical_test.py
index 58aa9616f6..7e7db5f238 100644
--- a/tests/series_only/is_ordered_categorical_test.py
+++ b/tests/series_only/is_ordered_categorical_test.py
@@ -8,7 +8,7 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import ConstructorEager
@@ -31,9 +31,7 @@ def test_is_ordered_categorical() -> None:
     assert not nw.is_ordered_categorical(nw.from_native(s, series_only=True))
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < (2, 0), reason="requires interchange protocol"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="requires interchange protocol")
 def test_is_ordered_categorical_interchange_protocol() -> None:
     df = pd.DataFrame(
         {"a": ["a", "b"]}, dtype=pd.CategoricalDtype(ordered=True)
diff --git a/tests/series_only/is_sorted_test.py b/tests/series_only/is_sorted_test.py
index 23610ee56a..2ff6e50f1f 100644
--- a/tests/series_only/is_sorted_test.py
+++ b/tests/series_only/is_sorted_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = [1, 3, 2]
 data_dups = [4, 4, 6]
@@ -23,7 +23,7 @@ def test_is_sorted(
 ) -> None:
     series = nw.from_native(constructor_eager({"a": input_data}), eager_only=True)["a"]
     result = series.is_sorted(descending=descending)
-    compare_dicts({"a": [result]}, {"a": [expected]})
+    assert_equal_data({"a": [result]}, {"a": [expected]})
 
 
 def test_is_sorted_invalid(constructor_eager: ConstructorEager) -> None:
diff --git a/tests/series_only/item_test.py b/tests/series_only/item_test.py
index 4c199578b3..979ac888df 100644
--- a/tests/series_only/item_test.py
+++ b/tests/series_only/item_test.py
@@ -6,7 +6,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = [1, 3, 2]
 
@@ -15,8 +15,8 @@
 def test_item(constructor_eager: ConstructorEager, index: int, expected: int) -> None:
     series = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
     result = series.item(index)
-    compare_dicts({"a": [result]}, {"a": [expected]})
-    compare_dicts({"a": [series.head(1).item()]}, {"a": [1]})
+    assert_equal_data({"a": [result]}, {"a": [expected]})
+    assert_equal_data({"a": [series.head(1).item()]}, {"a": [1]})
 
     with pytest.raises(
         ValueError,
diff --git a/tests/series_only/scatter_test.py b/tests/series_only/scatter_test.py
index 9e4bb08afa..11065ec977 100644
--- a/tests/series_only/scatter_test.py
+++ b/tests/series_only/scatter_test.py
@@ -4,7 +4,7 @@
 
 import narwhals as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_scatter(
@@ -24,7 +24,7 @@ def test_scatter(
         "a": [999, 888, 3],
         "b": [142, 132, 124],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_scatter_unchanged(constructor_eager: ConstructorEager) -> None:
@@ -38,7 +38,7 @@ def test_scatter_unchanged(constructor_eager: ConstructorEager) -> None:
         "a": [1, 2, 3],
         "b": [142, 124, 132],
     }
-    compare_dicts(df, expected)
+    assert_equal_data(df, expected)
 
 
 def test_single_series(constructor_eager: ConstructorEager) -> None:
@@ -48,4 +48,4 @@ def test_single_series(constructor_eager: ConstructorEager) -> None:
     s = df["a"]
     s.scatter([0, 1], [999, 888])
     expected = {"a": [1, 2, 3]}
-    compare_dicts({"a": s}, expected)
+    assert_equal_data({"a": s}, expected)
diff --git a/tests/series_only/slice_test.py b/tests/series_only/slice_test.py
index 0744c1b777..5f8a0a0a98 100644
--- a/tests/series_only/slice_test.py
+++ b/tests/series_only/slice_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_slice(constructor_eager: ConstructorEager) -> None:
@@ -10,25 +10,25 @@ def test_slice(constructor_eager: ConstructorEager) -> None:
     df = nw.from_native(constructor_eager(data), eager_only=True)
     result = {"a": df["a"][[0, 1]]}
     expected = {"a": [1, 2]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"a": df["a"][1:]}
     expected = {"a": [2, 3]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[:, 1]}
     expected = {"b": [4, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[:, "b"]}
     expected = {"b": [4, 5, 6]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[:2, "b"]}
     expected = {"b": [4, 5]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[:2, 1]}
     expected = {"b": [4, 5]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[[0, 1], 1]}
     expected = {"b": [4, 5]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
     result = {"b": df[[], 1]}
     expected = {"b": []}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/series_only/to_dummy_test.py b/tests/series_only/to_dummy_test.py
index 52b51242e6..10d6e971e7 100644
--- a/tests/series_only/to_dummy_test.py
+++ b/tests/series_only/to_dummy_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = [1, 2, 3]
 
@@ -15,7 +15,7 @@ def test_to_dummies(constructor_eager: ConstructorEager, sep: str) -> None:
     result = s.to_dummies(separator=sep)
     expected = {f"a{sep}1": [1, 0, 0], f"a{sep}2": [0, 1, 0], f"a{sep}3": [0, 0, 1]}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize("sep", ["_", "-"])
@@ -24,4 +24,4 @@ def test_to_dummies_drop_first(constructor_eager: ConstructorEager, sep: str) ->
     result = s.to_dummies(drop_first=True, separator=sep)
     expected = {f"a{sep}2": [0, 1, 0], f"a{sep}3": [0, 0, 1]}
 
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/series_only/to_frame_test.py b/tests/series_only/to_frame_test.py
index 77be9a4be2..cd90b6f156 100644
--- a/tests/series_only/to_frame_test.py
+++ b/tests/series_only/to_frame_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = [1, 2, 3]
 
@@ -13,4 +13,4 @@ def test_to_frame(constructor_eager: ConstructorEager) -> None:
         .alias("")
         .to_frame()
     )
-    compare_dicts(df, {"": [1, 2, 3]})
+    assert_equal_data(df, {"": [1, 2, 3]})
diff --git a/tests/series_only/to_list_test.py b/tests/series_only/to_list_test.py
index ebea07cfff..84b4fad473 100644
--- a/tests/series_only/to_list_test.py
+++ b/tests/series_only/to_list_test.py
@@ -4,7 +4,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = [1, 2, 3]
 
@@ -15,4 +15,4 @@ def test_to_list(
     if "cudf" in str(constructor_eager):  # pragma: no cover
         request.applymarker(pytest.mark.xfail)
     s = nw.from_native(constructor_eager({"a": data}), eager_only=True)["a"]
-    compare_dicts({"a": s.to_list()}, {"a": [1, 2, 3]})
+    assert_equal_data({"a": s.to_list()}, {"a": [1, 2, 3]})
diff --git a/tests/series_only/to_pandas_test.py b/tests/series_only/to_pandas_test.py
index 46d7df6da1..387af2709d 100644
--- a/tests/series_only/to_pandas_test.py
+++ b/tests/series_only/to_pandas_test.py
@@ -7,7 +7,7 @@
 from pandas.testing import assert_series_equal
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
     from tests.utils import ConstructorEager
@@ -15,11 +15,10 @@
 data = [1, 3, 2]
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("2.0.0"), reason="too old for pyarrow"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (2, 0, 0), reason="too old for pyarrow")
 def test_convert(
-    request: pytest.FixtureRequest, constructor_eager: ConstructorEager
+    request: pytest.FixtureRequest,
+    constructor_eager: ConstructorEager,
 ) -> None:
     if any(
         cname in str(constructor_eager)
diff --git a/tests/series_only/value_counts_test.py b/tests/series_only/value_counts_test.py
index 342ad72725..da00f2ef40 100644
--- a/tests/series_only/value_counts_test.py
+++ b/tests/series_only/value_counts_test.py
@@ -2,13 +2,12 @@
 
 from typing import Any
 
-import pandas as pd
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 data = [4, 4, 4, 1, 6, 6, 4, 4, 1, 1]
 
@@ -21,9 +20,10 @@ def test_value_counts(
     normalize: Any,
     name: str | None,
 ) -> None:
-    if "pandas_nullable_constructor" in str(constructor_eager) and parse_version(
-        pd.__version__
-    ) < (2, 2):
+    if "pandas_nullable_constructor" in str(constructor_eager) and PANDAS_VERSION < (
+        2,
+        2,
+    ):
         # bug in old pandas
         request.applymarker(pytest.mark.xfail)
 
@@ -41,9 +41,9 @@ def test_value_counts(
     )
 
     sorted_result = series.value_counts(sort=True, name=name, normalize=normalize)
-    compare_dicts(sorted_result, expected)
+    assert_equal_data(sorted_result, expected)
 
     unsorted_result = series.value_counts(
         sort=False, name=name, normalize=normalize
     ).sort(expected_name, descending=True)
-    compare_dicts(unsorted_result, expected)
+    assert_equal_data(unsorted_result, expected)
diff --git a/tests/series_only/zip_with_test.py b/tests/series_only/zip_with_test.py
index 2de31c060c..b6f2d36dea 100644
--- a/tests/series_only/zip_with_test.py
+++ b/tests/series_only/zip_with_test.py
@@ -2,7 +2,7 @@
 
 import narwhals.stable.v1 as nw
 from tests.utils import ConstructorEager
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_zip_with(constructor_eager: ConstructorEager) -> None:
@@ -14,7 +14,7 @@ def test_zip_with(constructor_eager: ConstructorEager) -> None:
 
     result = series1.zip_with(mask, series2)
     expected = [1, 4, 2]
-    compare_dicts({"a": result}, {"a": expected})
+    assert_equal_data({"a": result}, {"a": expected})
 
 
 def test_zip_with_length_1(constructor_eager: ConstructorEager) -> None:
@@ -24,4 +24,4 @@ def test_zip_with_length_1(constructor_eager: ConstructorEager) -> None:
 
     result = series1.zip_with(mask, series2)
     expected = [4]
-    compare_dicts({"a": result}, {"a": expected})
+    assert_equal_data({"a": result}, {"a": expected})
diff --git a/tests/stable_api_test.py b/tests/stable_api_test.py
index a076b0218a..414b20ad02 100644
--- a/tests/stable_api_test.py
+++ b/tests/stable_api_test.py
@@ -10,7 +10,7 @@
 import narwhals as nw
 import narwhals.stable.v1 as nw_v1
 from tests.utils import Constructor
-from tests.utils import compare_dicts
+from tests.utils import assert_equal_data
 
 
 def test_renamed_taxicab_norm(constructor: Constructor) -> None:
@@ -25,7 +25,7 @@ def test_renamed_taxicab_norm(constructor: Constructor) -> None:
     df = nw.from_native(constructor({"a": [1, 2, 3, -4, 5]}))
     result = df.with_columns(b=nw.col("a")._taxicab_norm())
     expected = {"a": [1, 2, 3, -4, 5], "b": [15] * 5}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     with pytest.raises(AttributeError):
         result = df.with_columns(b=nw.col("a")._l1_norm())  # type: ignore[attr-defined]
@@ -35,11 +35,11 @@ def test_renamed_taxicab_norm(constructor: Constructor) -> None:
     # It's new, so it couldn't be backwards-incompatible.
     result = df.with_columns(b=nw_v1.col("a")._taxicab_norm())
     expected = {"a": [1, 2, 3, -4, 5], "b": [15] * 5}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
     # The older `_l1_norm` still works in the stable api
     result = df.with_columns(b=nw_v1.col("a")._l1_norm())
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_renamed_taxicab_norm_dataframe(constructor: Constructor) -> None:
@@ -53,7 +53,7 @@ def func(df_any: Any) -> Any:
 
     result = nw_v1.from_native(func(constructor({"a": [1, 2, 3, -4, 5]})))
     expected = {"a": [15]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_renamed_taxicab_norm_dataframe_narwhalify(constructor: Constructor) -> None:
@@ -66,7 +66,7 @@ def func(df: Any) -> Any:
 
     result = nw_v1.from_native(func(constructor({"a": [1, 2, 3, -4, 5]})))
     expected = {"a": [15]}
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 def test_stable_api_completeness() -> None:
@@ -87,14 +87,7 @@ def test_stable_api_docstrings() -> None:
         nw_doc = getattr(nw, item).__doc__
         if item == "from_native":
             v1_doc = v1_doc.replace("native_dataframe", "native_object")
-        assert (
-            v1_doc.replace("import narwhals.stable.v1 as nw", "import narwhals as nw")
-            == nw_doc
-        )
-        assert (
-            nw_doc.replace("import narwhals as nw", "import narwhals.stable.v1 as nw")
-            == v1_doc
-        )
+        assert v1_doc == nw_doc
 
 
 def test_dataframe_docstrings() -> None:
diff --git a/tests/tpch_q1_test.py b/tests/tpch_q1_test.py
index c506ee0de8..99a9e10919 100644
--- a/tests/tpch_q1_test.py
+++ b/tests/tpch_q1_test.py
@@ -10,8 +10,8 @@
 import pytest
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
-from tests.utils import compare_dicts
+from tests.utils import PANDAS_VERSION
+from tests.utils import assert_equal_data
 
 
 @pytest.mark.parametrize(
@@ -20,7 +20,7 @@
 )
 @pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning")
 def test_q1(library: str, request: pytest.FixtureRequest) -> None:
-    if library == "pandas" and parse_version(pd.__version__) < (1, 5):
+    if library == "pandas" and PANDAS_VERSION < (1, 5):
         request.applymarker(pytest.mark.xfail)
     elif library == "pandas":
         df_raw = pd.read_parquet("tests/data/lineitem.parquet")
@@ -87,7 +87,7 @@ def test_q1(library: str, request: pytest.FixtureRequest) -> None:
         "avg_disc": [0.05039473684210526, 0.02, 0.05537414965986395, 0.04507042253521127],
         "count_order": [76, 1, 147, 71],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @pytest.mark.parametrize(
@@ -99,7 +99,7 @@ def test_q1(library: str, request: pytest.FixtureRequest) -> None:
     "ignore:.*Complex.*:UserWarning",
 )
 def test_q1_w_generic_funcs(library: str, request: pytest.FixtureRequest) -> None:
-    if library == "pandas" and parse_version(pd.__version__) < (1, 5):
+    if library == "pandas" and PANDAS_VERSION < (1, 5):
         request.applymarker(pytest.mark.xfail)
     elif library == "pandas":
         df_raw = pd.read_parquet("tests/data/lineitem.parquet")
@@ -155,14 +155,12 @@ def test_q1_w_generic_funcs(library: str, request: pytest.FixtureRequest) -> Non
         "avg_disc": [0.05039473684210526, 0.02, 0.05537414965986395, 0.04507042253521127],
         "count_order": [76, 1, 147, 71],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
 
 
 @mock.patch.dict(os.environ, {"NARWHALS_FORCE_GENERIC": "1"})
 @pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning")
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("1.0.0"), reason="too old for pyarrow"
-)
+@pytest.mark.skipif(PANDAS_VERSION < (1, 0, 0), reason="too old for pyarrow")
 def test_q1_w_pandas_agg_generic_path() -> None:
     df_raw = pd.read_parquet("tests/data/lineitem.parquet")
     df_raw["l_shipdate"] = pd.to_datetime(df_raw["l_shipdate"])
@@ -216,4 +214,4 @@ def test_q1_w_pandas_agg_generic_path() -> None:
         "avg_disc": [0.05039473684210526, 0.02, 0.05537414965986395, 0.04507042253521127],
         "count_order": [76, 1, 147, 71],
     }
-    compare_dicts(result, expected)
+    assert_equal_data(result, expected)
diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py
index 2d5ecd642a..e1fc4a59be 100644
--- a/tests/translate/from_native_test.py
+++ b/tests/translate/from_native_test.py
@@ -99,6 +99,8 @@ def test_eager_only_lazy(dframe: Any, eager_only: Any, context: Any) -> None:
     with context:
         res = nw.from_native(dframe, eager_only=eager_only)
         assert isinstance(res, nw.LazyFrame)
+    if eager_only:
+        assert nw.from_native(dframe, eager_only=eager_only, strict=False) is dframe
 
 
 @pytest.mark.parametrize("dframe", eager_frames)
@@ -122,6 +124,9 @@ def test_series_only(obj: Any, context: Any) -> None:
     with context:
         res = nw.from_native(obj, series_only=True)
         assert isinstance(res, nw.Series)
+    assert nw.from_native(obj, series_only=True, strict=False) is obj or isinstance(
+        res, nw.Series
+    )
 
 
 @pytest.mark.parametrize("series", all_series)
@@ -129,13 +134,20 @@ def test_series_only(obj: Any, context: Any) -> None:
     ("allow_series", "context"),
     [
         (True, does_not_raise()),
-        (False, pytest.raises(TypeError, match="Please set `allow_series=True`")),
+        (
+            False,
+            pytest.raises(
+                TypeError, match="Please set `allow_series=True` or `series_only=True`"
+            ),
+        ),
     ],
 )
 def test_allow_series(series: Any, allow_series: Any, context: Any) -> None:
     with context:
         res = nw.from_native(series, allow_series=allow_series)
         assert isinstance(res, nw.Series)
+    if not allow_series:
+        assert nw.from_native(series, allow_series=allow_series, strict=False) is series
 
 
 def test_invalid_series_combination() -> None:
@@ -151,7 +163,9 @@ def test_pandas_like_validate() -> None:
     df2 = pd.DataFrame({"b": [1, 2, 3]})
     df = pd.concat([df1, df2, df2], axis=1)
 
-    with pytest.raises(ValueError, match="Expected unique column names"):
+    with pytest.raises(
+        ValueError, match=r"Expected unique column names, got:\n- 'b' 2 times"
+    ):
         nw.from_native(df)
 
 
@@ -182,6 +196,7 @@ def test_series_only_dask() -> None:
 
     with pytest.raises(TypeError, match="Cannot only use `series_only`"):
         nw.from_native(dframe, series_only=True)
+    assert nw.from_native(dframe, series_only=True, strict=False) is dframe
 
 
 @pytest.mark.parametrize(
@@ -201,6 +216,8 @@ def test_eager_only_lazy_dask(eager_only: Any, context: Any) -> None:
     with context:
         res = nw.from_native(dframe, eager_only=eager_only)
         assert isinstance(res, nw.LazyFrame)
+    if eager_only:
+        assert nw.from_native(dframe, eager_only=eager_only, strict=False) is dframe
 
 
 def test_from_native_strict_false_typing() -> None:
@@ -212,3 +229,13 @@ def test_from_native_strict_false_typing() -> None:
     unstable_nw.from_native(df, strict=False)
     unstable_nw.from_native(df, strict=False, eager_only=True)
     unstable_nw.from_native(df, strict=False, eager_or_interchange_only=True)
+
+
+def test_from_mock_interchange_protocol_non_strict() -> None:
+    class MockDf:
+        def __dataframe__(self) -> None:  # pragma: no cover
+            pass
+
+    mockdf = MockDf()
+    result = nw.from_native(mockdf, eager_only=True, strict=False)
+    assert result is mockdf  # type: ignore[comparison-overlap]
diff --git a/tests/translate/to_py_scalar_test.py b/tests/translate/to_py_scalar_test.py
index c9aa2749db..3519b5e878 100644
--- a/tests/translate/to_py_scalar_test.py
+++ b/tests/translate/to_py_scalar_test.py
@@ -20,7 +20,9 @@
     ("input_value", "expected"),
     [
         (1, 1),
+        (np.int64(1), 1),
         (1.0, 1.0),
+        (None, None),
         ("a", "a"),
         (True, True),
         (b"a", b"a"),
@@ -29,8 +31,13 @@
     ],
 )
 def test_to_py_scalar(
-    constructor_eager: ConstructorEager, input_value: Any, expected: Any
+    constructor_eager: ConstructorEager,
+    input_value: Any,
+    expected: Any,
+    request: pytest.FixtureRequest,
 ) -> None:
+    if isinstance(input_value, bytes) and "cudf" in str(constructor_eager):
+        request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor_eager({"a": [input_value]}))
     output = nw.to_py_scalar(df["a"].item(0))
     if expected == 1 and constructor_eager.__name__.startswith("pandas"):
diff --git a/tests/utils.py b/tests/utils.py
index 302f26f1d5..90143959dc 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -10,15 +10,31 @@
 
 import pandas as pd
 
+import narwhals as nw
 from narwhals.typing import IntoDataFrame
 from narwhals.typing import IntoFrame
 from narwhals.utils import Implementation
+from narwhals.utils import parse_version
 
 if sys.version_info >= (3, 10):
     from typing import TypeAlias  # pragma: no cover
 else:
     from typing_extensions import TypeAlias  # pragma: no cover
 
+
+def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]:
+    try:
+        return parse_version(__import__(module_name).__version__)
+    except ImportError:
+        return (0, 0, 0)
+
+
+IBIS_VERSION: tuple[int, ...] = get_module_version_as_tuple("ibis")
+NUMPY_VERSION: tuple[int, ...] = get_module_version_as_tuple("numpy")
+PANDAS_VERSION: tuple[int, ...] = get_module_version_as_tuple("pandas")
+POLARS_VERSION: tuple[int, ...] = get_module_version_as_tuple("polars")
+PYARROW_VERSION: tuple[int, ...] = get_module_version_as_tuple("pyarrow")
+
 Constructor: TypeAlias = Callable[[Any], IntoFrame]
 ConstructorEager: TypeAlias = Callable[[Any], IntoDataFrame]
 
@@ -30,36 +46,37 @@ def zip_strict(left: Sequence[Any], right: Sequence[Any]) -> Iterator[Any]:
     return zip(left, right)
 
 
-def compare_dicts(result: Any, expected: dict[str, Any]) -> None:
+def _to_comparable_list(column_values: Any) -> Any:
+    if (
+        hasattr(column_values, "_compliant_series")
+        and column_values._compliant_series._implementation is Implementation.CUDF
+    ):  # pragma: no cover
+        column_values = column_values.to_pandas()
+    if hasattr(column_values, "to_list"):
+        return column_values.to_list()
+    return [nw.to_py_scalar(v) for v in column_values]
+
+
+def assert_equal_data(result: Any, expected: dict[str, Any]) -> None:
     if hasattr(result, "collect"):
         result = result.collect()
     if hasattr(result, "columns"):
         for key in result.columns:
             assert key in expected
+    result = {key: _to_comparable_list(result[key]) for key in expected}
     for key in expected:
         result_key = result[key]
-        if (
-            hasattr(result_key, "_compliant_series")
-            and result_key._compliant_series._implementation is Implementation.CUDF
-        ):  # pragma: no cover
-            result_key = result_key.to_pandas()
-        for lhs, rhs in zip_strict(result_key, expected[key]):
-            if hasattr(lhs, "as_py"):
-                lhs = lhs.as_py()  # noqa: PLW2901
-            if hasattr(rhs, "as_py"):  # pragma: no cover
-                rhs = rhs.as_py()  # noqa: PLW2901
-            if hasattr(lhs, "item"):  # pragma: no cover
-                lhs = lhs.item()  # noqa: PLW2901
-            if hasattr(rhs, "item"):  # pragma: no cover
-                rhs = rhs.item()  # noqa: PLW2901
+        expected_key = expected[key]
+        for i, (lhs, rhs) in enumerate(zip_strict(result_key, expected_key)):
             if isinstance(lhs, float) and not math.isnan(lhs):
-                assert math.isclose(lhs, rhs, rel_tol=0, abs_tol=1e-6), (lhs, rhs)
+                are_equivalent_values = math.isclose(lhs, rhs, rel_tol=0, abs_tol=1e-6)
             elif isinstance(lhs, float) and math.isnan(lhs) and rhs is not None:
-                assert math.isnan(rhs), (lhs, rhs)  # pragma: no cover
+                are_equivalent_values = math.isnan(rhs)  # pragma: no cover
             elif pd.isna(lhs):
-                assert pd.isna(rhs), (lhs, rhs)
+                are_equivalent_values = pd.isna(rhs)
             else:
-                assert lhs == rhs, (lhs, rhs)
+                are_equivalent_values = lhs == rhs
+            assert are_equivalent_values, f"Mismatch at index {i}: {lhs} != {rhs}\nExpected: {expected}\nGot: {result}"
 
 
 def maybe_get_modin_df(df_pandas: pd.DataFrame) -> Any:
diff --git a/tests/utils_test.py b/tests/utils_test.py
index 30805b15d8..fb668b4d2b 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -1,14 +1,19 @@
 from __future__ import annotations
 
+import string
+
+import hypothesis.strategies as st
 import pandas as pd
 import polars as pl
 import pytest
+from hypothesis import given
 from pandas.testing import assert_frame_equal
 from pandas.testing import assert_index_equal
 from pandas.testing import assert_series_equal
 
 import narwhals.stable.v1 as nw
-from narwhals.utils import parse_version
+from tests.utils import PANDAS_VERSION
+from tests.utils import get_module_version_as_tuple
 
 
 def test_maybe_align_index_pandas() -> None:
@@ -93,12 +98,22 @@ def test_maybe_reset_index_pandas() -> None:
     result = nw.maybe_reset_index(pandas_df)
     expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[0, 1, 2])
     assert_frame_equal(nw.to_native(result), expected)
+    pandas_df = nw.from_native(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
+    result = nw.maybe_reset_index(pandas_df)
+    expected = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    assert_frame_equal(nw.to_native(result), expected)
+    assert result.to_native() is pandas_df.to_native()
     pandas_series = nw.from_native(
         pd.Series([1, 2, 3], index=[7, 8, 9]), series_only=True
     )
     result_s = nw.maybe_reset_index(pandas_series)
     expected_s = pd.Series([1, 2, 3], index=[0, 1, 2])
     assert_series_equal(nw.to_native(result_s), expected_s)
+    pandas_series = nw.from_native(pd.Series([1, 2, 3]), series_only=True)
+    result_s = nw.maybe_reset_index(pandas_series)
+    expected_s = pd.Series([1, 2, 3])
+    assert_series_equal(nw.to_native(result_s), expected_s)
+    assert result_s.to_native() is pandas_series.to_native()
 
 
 def test_maybe_reset_index_polars() -> None:
@@ -110,10 +125,7 @@ def test_maybe_reset_index_polars() -> None:
     assert result_s is series
 
 
-@pytest.mark.skipif(
-    parse_version(pd.__version__) < parse_version("1.0.0"),
-    reason="too old for convert_dtypes",
-)
+@pytest.mark.skipif(PANDAS_VERSION < (1, 0, 0), reason="too old for convert_dtypes")
 def test_maybe_convert_dtypes_pandas() -> None:
     import numpy as np
 
@@ -134,3 +146,34 @@ def test_maybe_convert_dtypes_polars() -> None:
     df = nw.from_native(pl.DataFrame({"a": [1.1, np.nan]}))
     result = nw.maybe_convert_dtypes(df)
     assert result is df
+
+
+def test_get_trivial_version_with_uninstalled_module() -> None:
+    result = get_module_version_as_tuple("non_existent_module")
+    assert result == (0, 0, 0)
+
+
+@given(n_bytes=st.integers(1, 100))  # type: ignore[misc]
+def test_generate_temporary_column_name(n_bytes: int) -> None:
+    columns = ["abc", "XYZ"]
+
+    temp_col_name = nw.generate_temporary_column_name(n_bytes=n_bytes, columns=columns)
+    assert temp_col_name not in columns
+
+
+def test_generate_temporary_column_name_raise() -> None:
+    from itertools import product
+
+    columns = [
+        "".join(t)
+        for t in product(
+            string.ascii_lowercase + string.digits,
+            string.ascii_lowercase + string.digits,
+        )
+    ]
+
+    with pytest.raises(
+        AssertionError,
+        match="Internal Error: Narwhals was not able to generate a column name with ",
+    ):
+        nw.generate_temporary_column_name(n_bytes=1, columns=columns)
diff --git a/utils/generate_zen_content.py b/utils/generate_zen_content.py
new file mode 100644
index 0000000000..001f7263ce
--- /dev/null
+++ b/utils/generate_zen_content.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Final
+
+from narwhals.this import ZEN
+
+DESTINATION_PATH: Final[Path] = Path("docs") / "this.md"
+
+content = f"""
+# The Zen of Narwhals
+
+The well famous Python easter egg `import this` will reveal The Zen of Python, by Tim Peters.
+
+Narwhals took inspiration from _this_ and created its own Zen.
+
+```py
+import narwhals.this
+```
+
+```terminal
+{ZEN}
+```
+"""
+
+with DESTINATION_PATH.open(mode="w") as destination:
+    destination.write(content)