From a9abd1b39eb0a2e9e2b697a5b9e8e5cd209417d3 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Sun, 14 Jul 2024 19:41:21 +0100 Subject: [PATCH] docs: rework README, remove ibis comparison (#522) * docs: rework README, remove ibis comparison * add levels page * add levels page --- README.md | 29 ++++++++++------------------ docs/levels.md | 40 +++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + pyproject.toml | 2 +- tests/series/cast_test.py | 1 + 5 files changed, 53 insertions(+), 20 deletions(-) create mode 100644 docs/levels.md diff --git a/README.md b/README.md index caf0f751a..94bc8c662 100644 --- a/README.md +++ b/README.md @@ -9,12 +9,10 @@ [![PyPI version](https://badge.fury.io/py/narwhals.svg)](https://badge.fury.io/py/narwhals) -Extremely lightweight and extensible compatibility layer between Polars, pandas, Modin, and cuDF (and more!). +Extremely lightweight and extensible compatibility layer between dataframe libraries! -- [Read the documentation](https://narwhals-dev.github.io/narwhals/) -- [Chat with us on Discord!](https://discord.gg/V3PqtB4VA4) -- [Join our community call](https://calendar.google.com/calendar/embed?src=27ff6dc5f598c1d94c1f6e627a1aaae680e2fac88f848bda1f2c7946ae74d5ab%40group.calendar.google.com&ctz=Europe%2FLondon) -- [Read the contributing guide](https://github.com/narwhals-dev/narwhals/blob/main/CONTRIBUTING.md) +- **Full API support**: cuDF, Modin, pandas, Polars +- **Interchange-level support**: Ibis, PyArrow, Vaex, anything else which implements the DataFrame Interchange Protocol Seamlessly support all, without depending on any! @@ -30,6 +28,13 @@ Seamlessly support all, without depending on any! - ✅ **Perfect backwards compatibility policy**, see [stable api](https://narwhals-dev.github.io/narwhals/backcompat/) for how to opt-in +Get started! + +- [Read the documentation](https://narwhals-dev.github.io/narwhals/) +- [Chat with us on Discord!](https://discord.gg/V3PqtB4VA4) +- [Join our community call](https://calendar.google.com/calendar/embed?src=27ff6dc5f598c1d94c1f6e627a1aaae680e2fac88f848bda1f2c7946ae74d5ab%40group.calendar.google.com&ctz=Europe%2FLondon) +- [Read the contributing guide](https://github.com/narwhals-dev/narwhals/blob/main/CONTRIBUTING.md) + ## Used by / integrates with Join the party! @@ -68,20 +73,6 @@ There are three steps to writing dataframe-agnostic code using Narwhals: - if you started with Modin, you'll get Modin back (and compute will be distributed) - if you started with cuDF, you'll get cuDF back (and compute will happen on GPU) -## What about Ibis? - -Like Ibis, Narwhals aims to enable dataframe-agnostic code. However, Narwhals comes with **zero** dependencies, -is about as lightweight as it gets, and is aimed at library developers rather than at end users. It also does -not aim to support as many backends, instead preferring to focus on dataframes. So, which should you use? - -- If you need a SQL frontend in Python: Ibis! -- If you're a library maintainer and want a lightweight and minimal-overhead layer to get cross-dataframe library support: Narwhals! - -Here is the package size increase which would result from installing each tool in a non-pandas -environment: - -![image](https://github.com/MarcoGorelli/narwhals/assets/33491632/a8dfba78-feb1-48c1-960a-5b9b03585fa5) - ## Example See the [tutorial](https://narwhals-dev.github.io/narwhals/basics/dataframe/) for several examples! diff --git a/docs/levels.md b/docs/levels.md new file mode 100644 index 000000000..d632baa6d --- /dev/null +++ b/docs/levels.md @@ -0,0 +1,40 @@ +# Levels + +Narwhals comes with two levels of support: "full" and "interchange". + +Libraries for which we have full support can benefit from the whole +[Narwhals API](https://narwhals-dev.github.io/narwhals/api-reference/). + +For example: + +```python exec="1" source="above" +import narwhals as nw +from narwhals.typing import FrameT + +@nw.narwhalify +def func(df: FrameT) -> FrameT: + return df.group_by('a').agg( + b_mean=nw.col('b').mean(), + b_std=nw.col('b').std(), + ) +``` +will work for any of pandas, Polars, cuDF, and Modin. + +However, sometimes you don't need to do complex operations on dataframes - all you need +is to inspect the schema a bit before making other decisions, such as which columns to +select or whether to convert to another library. For that purpose, we also provide "interchange" +level of support. If a library implements the +[Dataframe Interchange Protocol](https://data-apis.org/dataframe-protocol/latest/), then +a call such as + +```python exec="1" source="above" +import narwhals as nw +from narwhals.schema import Schema + + +def func(df_any: Any) -> Schema: + df = nw.from_native(df, eager_or_interchange_only=True) + return df.schema +``` +is also supported, meaning that, in addition to the libraries mentioned above, you can +also pass Ibis, Vaex, PyArrow, and any other library which implements the protocol. diff --git a/mkdocs.yml b/mkdocs.yml index 101c1a7f4..891e96ed6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,6 +13,7 @@ nav: - basics/complete_example.md - Other concepts: - other/pandas_index.md + - levels.md - overhead.md - backcompat.md - extending.md diff --git a/pyproject.toml b/pyproject.toml index 934a82cfe..8bcd7ef02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ version = "1.0.6" authors = [ { name="Marco Gorelli", email="33491632+MarcoGorelli@users.noreply.github.com" }, ] -description = "Extremely lightweight compatibility layer between pandas, Polars, cuDF, and Modin" +description = "Extremely lightweight compatibility layer between dataframe libraries" readme = "README.md" requires-python = ">=3.8" classifiers = [ diff --git a/tests/series/cast_test.py b/tests/series/cast_test.py index 2c66ca92f..37ae55a01 100644 --- a/tests/series/cast_test.py +++ b/tests/series/cast_test.py @@ -105,6 +105,7 @@ def test_cast_date_datetime_invalid() -> None: df.select(nw.col("a").cast(nw.Date)) +@pytest.mark.filterwarnings("ignore: casting period") def test_unknown_to_int() -> None: df = pd.DataFrame({"a": pd.period_range("2000", periods=3, freq="min")}) assert nw.from_native(df).select(nw.col("a").cast(nw.Int64)).schema == {"a": nw.Int64}