From 8a9131e75ba1bfb6f70bbbd2990c2741457b7eaf Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 14 Nov 2024 16:38:14 +0000 Subject: [PATCH] Deployed 717d312 with MkDocs version: 1.6.1 --- .nojekyll | 0 404.html | 1373 ++++ api-completeness/dataframe/index.html | 1710 +++++ api-completeness/expr/index.html | 2074 +++++ api-completeness/index.html | 1454 ++++ api-completeness/lazyframe/index.html | 1583 ++++ api-completeness/series/index.html | 2034 +++++ api-reference/dataframe/index.html | 6282 +++++++++++++++ api-reference/dependencies/index.html | 2670 +++++++ api-reference/dtypes/index.html | 3099 ++++++++ api-reference/expr/index.html | 5837 ++++++++++++++ api-reference/expr_cat/index.html | 1611 ++++ api-reference/expr_dt/index.html | 3240 ++++++++ api-reference/expr_name/index.html | 2016 +++++ api-reference/expr_str/index.html | 2911 +++++++ api-reference/group_by/index.html | 1694 +++++ api-reference/index.html | 1462 ++++ api-reference/lazyframe/index.html | 4663 ++++++++++++ api-reference/narwhals/index.html | 5608 ++++++++++++++ api-reference/schema/index.html | 1704 +++++ api-reference/selectors/index.html | 1925 +++++ api-reference/series/index.html | 6658 ++++++++++++++++ api-reference/series_cat/index.html | 1608 ++++ api-reference/series_dt/index.html | 3090 ++++++++ api-reference/series_str/index.html | 2889 +++++++ api-reference/typing/index.html | 1777 +++++ assets/_mkdocstrings.css | 143 + assets/image.png | Bin 0 -> 132699 bytes assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.83f73b43.min.js | 16 + assets/javascripts/bundle.83f73b43.min.js.map | 7 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++++++++ .../workers/search.6ce7567c.min.js | 42 + .../workers/search.6ce7567c.min.js.map | 7 + assets/logo.svg | 6 + assets/stylesheets/main.0253249f.min.css | 1 + assets/stylesheets/main.0253249f.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + backcompat/index.html | 1728 +++++ basics/complete_example/index.html | 1657 ++++ basics/dataframe/index.html | 1940 +++++ basics/dataframe_conversion/index.html | 1673 ++++ basics/series/index.html | 1884 +++++ extending/index.html | 1694 +++++ how_it_works/index.html | 1829 +++++ index.html | 1513 ++++ installation/index.html | 1679 +++++ javascripts/extra.js | 67 + objects.inv | Bin 0 -> 3853 bytes other/column_names/index.html | 1471 ++++ other/pandas_index/index.html | 1611 ++++ other/user_warning/index.html | 1663 ++++ overhead/index.html | 1463 ++++ requirements-docs.txt | 8 + roadmap_and_related/index.html | 1652 ++++ search/search_index.json | 1 + sitemap.xml | 3 + sitemap.xml.gz | Bin 0 -> 127 bytes this/index.html | 1450 ++++ why/index.html | 1478 ++++ 94 files changed, 104862 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 api-completeness/dataframe/index.html create mode 100644 api-completeness/expr/index.html create mode 100644 api-completeness/index.html create mode 100644 api-completeness/lazyframe/index.html create mode 100644 api-completeness/series/index.html create mode 100644 api-reference/dataframe/index.html create mode 100644 api-reference/dependencies/index.html create mode 100644 api-reference/dtypes/index.html create mode 100644 api-reference/expr/index.html create mode 100644 api-reference/expr_cat/index.html create mode 100644 api-reference/expr_dt/index.html create mode 100644 api-reference/expr_name/index.html create mode 100644 api-reference/expr_str/index.html create mode 100644 api-reference/group_by/index.html create mode 100644 api-reference/index.html create mode 100644 api-reference/lazyframe/index.html create mode 100644 api-reference/narwhals/index.html create mode 100644 api-reference/schema/index.html create mode 100644 api-reference/selectors/index.html create mode 100644 api-reference/series/index.html create mode 100644 api-reference/series_cat/index.html create mode 100644 api-reference/series_dt/index.html create mode 100644 api-reference/series_str/index.html create mode 100644 api-reference/typing/index.html create mode 100644 assets/_mkdocstrings.css create mode 100644 assets/image.png create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.83f73b43.min.js create mode 100644 assets/javascripts/bundle.83f73b43.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js.map create mode 100644 assets/logo.svg create mode 100644 assets/stylesheets/main.0253249f.min.css create mode 100644 assets/stylesheets/main.0253249f.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 backcompat/index.html create mode 100644 basics/complete_example/index.html create mode 100644 basics/dataframe/index.html create mode 100644 basics/dataframe_conversion/index.html create mode 100644 basics/series/index.html create mode 100644 extending/index.html create mode 100644 how_it_works/index.html create mode 100644 index.html create mode 100644 installation/index.html create mode 100644 javascripts/extra.js create mode 100644 objects.inv create mode 100644 other/column_names/index.html create mode 100644 other/pandas_index/index.html create mode 100644 other/user_warning/index.html create mode 100644 overhead/index.html create mode 100644 requirements-docs.txt create mode 100644 roadmap_and_related/index.html create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 this/index.html create mode 100644 why/index.html diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 000000000..e69de29bb diff --git a/404.html b/404.html new file mode 100644 index 000000000..3a294982a --- /dev/null +++ b/404.html @@ -0,0 +1,1373 @@ + + + +
+ + + + + + + + + + + + + + +Class | +Method | +pandas-like | +arrow | +
---|---|---|---|
DataFrame | +clone | ++ | + |
DataFrame | +collect_schema | ++ | + |
DataFrame | +columns | ++ | + |
DataFrame | +drop | ++ | + |
DataFrame | +drop_nulls | ++ | + |
DataFrame | +filter | ++ | + |
DataFrame | +gather_every | ++ | + |
DataFrame | +get_column | ++ | + |
DataFrame | +group_by | ++ | + |
DataFrame | +head | ++ | + |
DataFrame | +is_duplicated | ++ | + |
DataFrame | +is_empty | ++ | + |
DataFrame | +is_unique | ++ | + |
DataFrame | +item | ++ | + |
DataFrame | +iter_rows | ++ | + |
DataFrame | +join | ++ | + |
DataFrame | +join_asof | ++ | + |
DataFrame | +lazy | ++ | + |
DataFrame | +null_count | ++ | + |
DataFrame | +pipe | ++ | + |
DataFrame | +pivot | ++ | + |
DataFrame | +rename | ++ | + |
DataFrame | +row | ++ | + |
DataFrame | +rows | ++ | + |
DataFrame | +sample | ++ | + |
DataFrame | +schema | ++ | + |
DataFrame | +select | ++ | + |
DataFrame | +shape | ++ | + |
DataFrame | +sort | ++ | + |
DataFrame | +tail | ++ | + |
DataFrame | +to_arrow | ++ | + |
DataFrame | +to_dict | ++ | + |
DataFrame | +to_native | ++ | + |
DataFrame | +to_numpy | ++ | + |
DataFrame | +to_pandas | ++ | + |
DataFrame | +unique | ++ | + |
DataFrame | +unpivot | ++ | + |
DataFrame | +with_columns | ++ | + |
DataFrame | +with_row_index | ++ | + |
DataFrame | +write_csv | ++ | + |
DataFrame | +write_parquet | ++ | + |
Class | +Method | +pandas-like | +arrow | +dask | +
---|---|---|---|---|
Expr | +abs | ++ | + | + |
Expr | +alias | ++ | + | + |
Expr | +all | ++ | + | + |
Expr | +any | ++ | + | + |
Expr | +arg_true | ++ | + | + |
Expr | +cast | ++ | + | + |
Expr | +cat | ++ | + | + |
Expr | +clip | ++ | + | + |
Expr | +count | ++ | + | + |
Expr | +cum_sum | ++ | + | + |
Expr | +diff | ++ | + | + |
Expr | +drop_nulls | ++ | + | + |
Expr | +dt | ++ | + | + |
Expr | +fill_null | ++ | + | + |
Expr | +filter | ++ | + | + |
Expr | +gather_every | ++ | + | + |
Expr | +head | ++ | + | + |
Expr | +is_between | ++ | + | + |
Expr | +is_duplicated | ++ | + | + |
Expr | +is_first_distinct | ++ | + | + |
Expr | +is_in | ++ | + | + |
Expr | +is_last_distinct | ++ | + | + |
Expr | +is_null | ++ | + | + |
Expr | +is_unique | ++ | + | + |
Expr | +len | ++ | + | + |
Expr | +map_batches | ++ | + | + |
Expr | +max | ++ | + | + |
Expr | +mean | ++ | + | + |
Expr | +median | ++ | + | + |
Expr | +min | ++ | + | + |
Expr | +mode | ++ | + | + |
Expr | +n_unique | ++ | + | + |
Expr | +name | ++ | + | + |
Expr | +null_count | ++ | + | + |
Expr | +over | ++ | + | + |
Expr | +pipe | ++ | + | + |
Expr | +quantile | ++ | + | + |
Expr | +replace_strict | ++ | + | + |
Expr | +round | ++ | + | + |
Expr | +sample | ++ | + | + |
Expr | +shift | ++ | + | + |
Expr | +sort | ++ | + | + |
Expr | +std | ++ | + | + |
Expr | +str | ++ | + | + |
Expr | +sum | ++ | + | + |
Expr | +tail | ++ | + | + |
Expr | +unique | ++ | + | + |
ExprCatNamespace | +get_categories | ++ | + | + |
ExprDateTimeNamespace | +convert_time_zone | ++ | + | + |
ExprDateTimeNamespace | +date | ++ | + | + |
ExprDateTimeNamespace | +day | ++ | + | + |
ExprDateTimeNamespace | +hour | ++ | + | + |
ExprDateTimeNamespace | +microsecond | ++ | + | + |
ExprDateTimeNamespace | +millisecond | ++ | + | + |
ExprDateTimeNamespace | +minute | ++ | + | + |
ExprDateTimeNamespace | +month | ++ | + | + |
ExprDateTimeNamespace | +nanosecond | ++ | + | + |
ExprDateTimeNamespace | +ordinal_day | ++ | + | + |
ExprDateTimeNamespace | +replace_time_zone | ++ | + | + |
ExprDateTimeNamespace | +second | ++ | + | + |
ExprDateTimeNamespace | +timestamp | ++ | + | + |
ExprDateTimeNamespace | +to_string | ++ | + | + |
ExprDateTimeNamespace | +total_microseconds | ++ | + | + |
ExprDateTimeNamespace | +total_milliseconds | ++ | + | + |
ExprDateTimeNamespace | +total_minutes | ++ | + | + |
ExprDateTimeNamespace | +total_nanoseconds | ++ | + | + |
ExprDateTimeNamespace | +total_seconds | ++ | + | + |
ExprDateTimeNamespace | +year | ++ | + | + |
ExprNameNamespace | +keep | ++ | + | + |
ExprNameNamespace | +map | ++ | + | + |
ExprNameNamespace | +prefix | ++ | + | + |
ExprNameNamespace | +suffix | ++ | + | + |
ExprNameNamespace | +to_lowercase | ++ | + | + |
ExprNameNamespace | +to_uppercase | ++ | + | + |
ExprStringNamespace | +contains | ++ | + | + |
ExprStringNamespace | +ends_with | ++ | + | + |
ExprStringNamespace | +head | ++ | + | + |
ExprStringNamespace | +len_chars | ++ | + | + |
ExprStringNamespace | +replace | ++ | + | + |
ExprStringNamespace | +replace_all | ++ | + | + |
ExprStringNamespace | +slice | ++ | + | + |
ExprStringNamespace | +starts_with | ++ | + | + |
ExprStringNamespace | +strip_chars | ++ | + | + |
ExprStringNamespace | +tail | ++ | + | + |
ExprStringNamespace | +to_datetime | ++ | + | + |
ExprStringNamespace | +to_lowercase | ++ | + | + |
ExprStringNamespace | +to_uppercase | ++ | + | + |
Narwhals has two different level of support for libraries: "full" and "interchange".
+Libraries for which we have full support we intend to support the whole Narwhals API, +however this is a continuous work in progress.
+In the following section it is possible to check which method is implemented for which +class and backend.
+Info
+Class | +Method | +dask | +
---|---|---|
LazyFrame | +clone | ++ |
LazyFrame | +collect | ++ |
LazyFrame | +collect_schema | ++ |
LazyFrame | +columns | ++ |
LazyFrame | +drop | ++ |
LazyFrame | +drop_nulls | ++ |
LazyFrame | +filter | ++ |
LazyFrame | +gather_every | ++ |
LazyFrame | +group_by | ++ |
LazyFrame | +head | ++ |
LazyFrame | +join | ++ |
LazyFrame | +join_asof | ++ |
LazyFrame | +lazy | ++ |
LazyFrame | +pipe | ++ |
LazyFrame | +rename | ++ |
LazyFrame | +schema | ++ |
LazyFrame | +select | ++ |
LazyFrame | +sort | ++ |
LazyFrame | +tail | ++ |
LazyFrame | +to_native | ++ |
LazyFrame | +unique | ++ |
LazyFrame | +unpivot | ++ |
LazyFrame | +with_columns | ++ |
LazyFrame | +with_row_index | ++ |
Class | +Method | +pandas-like | +arrow | +
---|---|---|---|
Series | +abs | ++ | + |
Series | +alias | ++ | + |
Series | +all | ++ | + |
Series | +any | ++ | + |
Series | +arg_true | ++ | + |
Series | +cast | ++ | + |
Series | +cat | ++ | + |
Series | +clip | ++ | + |
Series | +count | ++ | + |
Series | +cum_sum | ++ | + |
Series | +diff | ++ | + |
Series | +drop_nulls | ++ | + |
Series | +dt | ++ | + |
Series | +dtype | ++ | + |
Series | +fill_null | ++ | + |
Series | +filter | ++ | + |
Series | +gather_every | ++ | + |
Series | +head | ++ | + |
Series | +is_between | ++ | + |
Series | +is_duplicated | ++ | + |
Series | +is_empty | ++ | + |
Series | +is_first_distinct | ++ | + |
Series | +is_in | ++ | + |
Series | +is_last_distinct | ++ | + |
Series | +is_null | ++ | + |
Series | +is_sorted | ++ | + |
Series | +is_unique | ++ | + |
Series | +item | ++ | + |
Series | +len | ++ | + |
Series | +max | ++ | + |
Series | +mean | ++ | + |
Series | +median | ++ | + |
Series | +min | ++ | + |
Series | +mode | ++ | + |
Series | +n_unique | ++ | + |
Series | +name | ++ | + |
Series | +null_count | ++ | + |
Series | +pipe | ++ | + |
Series | +quantile | ++ | + |
Series | +rename | ++ | + |
Series | +replace_strict | ++ | + |
Series | +round | ++ | + |
Series | +sample | ++ | + |
Series | +scatter | ++ | + |
Series | +shape | ++ | + |
Series | +shift | ++ | + |
Series | +sort | ++ | + |
Series | +std | ++ | + |
Series | +str | ++ | + |
Series | +sum | ++ | + |
Series | +tail | ++ | + |
Series | +to_arrow | ++ | + |
Series | +to_dummies | ++ | + |
Series | +to_frame | ++ | + |
Series | +to_list | ++ | + |
Series | +to_native | ++ | + |
Series | +to_numpy | ++ | + |
Series | +to_pandas | ++ | + |
Series | +unique | ++ | + |
Series | +value_counts | ++ | + |
Series | +zip_with | ++ | + |
SeriesCatNamespace | +get_categories | ++ | + |
SeriesDateTimeNamespace | +convert_time_zone | ++ | + |
SeriesDateTimeNamespace | +date | ++ | + |
SeriesDateTimeNamespace | +day | ++ | + |
SeriesDateTimeNamespace | +hour | ++ | + |
SeriesDateTimeNamespace | +microsecond | ++ | + |
SeriesDateTimeNamespace | +millisecond | ++ | + |
SeriesDateTimeNamespace | +minute | ++ | + |
SeriesDateTimeNamespace | +month | ++ | + |
SeriesDateTimeNamespace | +nanosecond | ++ | + |
SeriesDateTimeNamespace | +ordinal_day | ++ | + |
SeriesDateTimeNamespace | +replace_time_zone | ++ | + |
SeriesDateTimeNamespace | +second | ++ | + |
SeriesDateTimeNamespace | +timestamp | ++ | + |
SeriesDateTimeNamespace | +to_string | ++ | + |
SeriesDateTimeNamespace | +total_microseconds | ++ | + |
SeriesDateTimeNamespace | +total_milliseconds | ++ | + |
SeriesDateTimeNamespace | +total_minutes | ++ | + |
SeriesDateTimeNamespace | +total_nanoseconds | ++ | + |
SeriesDateTimeNamespace | +total_seconds | ++ | + |
SeriesDateTimeNamespace | +year | ++ | + |
SeriesStringNamespace | +contains | ++ | + |
SeriesStringNamespace | +ends_with | ++ | + |
SeriesStringNamespace | +head | ++ | + |
SeriesStringNamespace | +len_chars | ++ | + |
SeriesStringNamespace | +replace | ++ | + |
SeriesStringNamespace | +replace_all | ++ | + |
SeriesStringNamespace | +slice | ++ | + |
SeriesStringNamespace | +starts_with | ++ | + |
SeriesStringNamespace | +strip_chars | ++ | + |
SeriesStringNamespace | +tail | ++ | + |
SeriesStringNamespace | +to_datetime | ++ | + |
SeriesStringNamespace | +to_lowercase | ++ | + |
SeriesStringNamespace | +to_uppercase | ++ | + |
narwhals.DataFrame
Narwhals DataFrame, backed by a native dataframe.
+The native dataframe might be pandas.DataFrame, polars.DataFrame, ...
+This class is not meant to be instantiated directly - instead, use
+narwhals.from_native
.
columns: list[str]
+
+
+ property
+
+
+Get column names.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.columns
+
We can pass any supported library such as pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+['foo', 'bar', 'ham']
+>>> func(df_pl)
+['foo', 'bar', 'ham']
+>>> func(df_pa)
+['foo', 'bar', 'ham']
+
schema: Schema
+
+
+ property
+
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.schema
+
You can pass either pandas or Polars to func
:
>>> df_pd_schema = func(df_pd)
+>>> df_pd_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
>>> df_pl_schema = func(df_pl)
+>>> df_pl_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
shape: tuple[int, int]
+
+
+ property
+
+
+Get the shape of the DataFrame.
+ + +Examples:
+Construct pandas and polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3, 4, 5]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.shape
+
We can then pass either pandas, Polars or PyArrow to func
:
>>> func(df_pd)
+(5, 1)
+>>> func(df_pl)
+(5, 1)
+>>> func(df_pa)
+(5, 1)
+
__arrow_c_stream__(requested_schema=None)
+
+Export a DataFrame via the Arrow PyCapsule Interface.
+to_arrow
and then defer to PyArrow's implementationSee PyCapsule Interface +for more.
+ +__getitem__(item)
+
+__getitem__(item: tuple[Sequence[int], slice]) -> Self
+
__getitem__(item: tuple[Sequence[int], Sequence[int]]) -> Self
+
__getitem__(item: tuple[slice, Sequence[int]]) -> Self
+
__getitem__(item: tuple[Sequence[int], str]) -> Series
+
__getitem__(item: tuple[slice, str]) -> Series
+
__getitem__(item: tuple[Sequence[int], Sequence[str]]) -> Self
+
__getitem__(item: tuple[slice, Sequence[str]]) -> Self
+
__getitem__(item: tuple[Sequence[int], int]) -> Series
+
__getitem__(item: tuple[slice, int]) -> Series
+
__getitem__(item: Sequence[int]) -> Self
+
__getitem__(item: str) -> Series
+
__getitem__(item: Sequence[str]) -> Self
+
__getitem__(item: slice) -> Self
+
__getitem__(item: tuple[slice, slice]) -> Self
+
Extract column or slice of DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ item
+ |
+
+ str | slice | Sequence[int] | Sequence[str] | tuple[Sequence[int], str | int] | tuple[slice, str | int] | tuple[slice | Sequence[int], Sequence[int] | Sequence[str] | slice] | tuple[slice, slice]
+ |
+
+
+
+ How to slice dataframe. What happens depends on what is passed. It's easiest
+to explain by example. Suppose we have a Dataframe
|
+ + required + | +
In contrast with Polars, pandas allows non-string column names.
+If you don't know whether the column name you're trying to extract
+is definitely a string (e.g. df[df.columns[0]]
) then you should
+use DataFrame.get_column
instead.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify(eager_only=True)
+... def func(df):
+... return df["a"]
+
We can then pass either pandas, Polars or PyArrow to func
:
>>> func(df_pd)
+0 1
+1 2
+Name: a, dtype: int64
+>>> func(df_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 1
+ 2
+]
+>>> func(df_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 1,
+ 2
+ ]
+]
+
clone()
+
+Create a copy of this DataFrame.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function in which we clone the DataFrame:
+>>> @nw.narwhalify
+... def func(df):
+... return df.clone()
+
>>> func(df_pd)
+ a b
+0 1 3
+1 2 4
+
>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
collect_schema()
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.collect_schema()
+
You can pass either pandas or Polars to func
:
>>> df_pd_schema = func(df_pd)
+>>> df_pd_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
>>> df_pl_schema = func(df_pl)
+>>> df_pl_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
drop(*columns, strict=True)
+
+Remove columns from the dataframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *columns
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of the columns that should be removed from the dataframe. + |
+
+ ()
+ |
+
+ strict
+ |
+
+ bool
+ |
+
+
+
+ Validate that all column names exist in the schema and throw an +exception if a column name does not exist in the schema. + |
+
+ True
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop("ham")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar
+0 1 6.0
+1 2 7.0
+2 3 8.0
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 1 ┆ 6.0 │
+│ 2 ┆ 7.0 │
+│ 3 ┆ 8.0 │
+└─────┴─────┘
+
Use positional arguments to drop multiple columns.
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop("foo", "ham")
+
>>> func(df_pd)
+ bar
+0 6.0
+1 7.0
+2 8.0
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ bar │
+│ --- │
+│ f64 │
+╞═════╡
+│ 6.0 │
+│ 7.0 │
+│ 8.0 │
+└─────┘
+
drop_nulls(subset=None)
+
+Drop null values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) for which null values are considered. If set to None +(default), use all columns. + |
+
+ None
+ |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop_nulls()
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ a ba
+0 1.0 1.0
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ ba │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 1.0 ┆ 1.0 │
+└─────┴─────┘
+
filter(*predicates)
+
+Filter the rows in the DataFrame based on one or more predicate expressions.
+The original order of the remaining rows is preserved.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *predicates
+ |
+
+ IntoExpr | Iterable[IntoExpr] | list[bool]
+ |
+
+
+
+ Expression(s) that evaluates to a boolean Series. Can +also be a (single!) boolean list. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we filter on +one condition.
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter(nw.col("foo") > 1)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+1 2 7 b
+2 3 8 c
+>>> func(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Filter on multiple conditions, combined with and/or operators:
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a"))
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
>>> @nw.narwhalify
+... def func(df):
+... return df.filter((nw.col("foo") == 1) | (nw.col("ham") == "c"))
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+2 3 8 c
+>>> func(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Provide multiple filters using *args
syntax:
>>> @nw.narwhalify
+... def func(df):
+... dframe = df.filter(
+... nw.col("foo") <= 2,
+... ~nw.col("ham").is_in(["b", "c"]),
+... )
+... return dframe
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
gather_every(n, offset=0)
+
+Take every nth row in the DataFrame and return as a new DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function in which gather every 2 rows, +starting from a offset of 1:
+>>> @nw.narwhalify
+... def func(df):
+... return df.gather_every(n=2, offset=1)
+
>>> func(df_pd)
+ a b
+1 2 6
+3 4 8
+
>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 2 ┆ 6 │
+│ 4 ┆ 8 │
+└─────┴─────┘
+
get_column(name)
+
+Get a single column by name.
+ + +Although name
is typed as str
, pandas does allow non-string column
+names, and they will work when passed to this function if the
+narwhals.DataFrame
is backed by a pandas dataframe with non-string
+columns. This function can only be used to extract a column by name, so
+there is no risk of ambiguity.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify(eager_only=True)
+... def func(df):
+... name = df.columns[0]
+... return df.get_column(name)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+0 1
+1 2
+Name: a, dtype: int64
+>>> func(df_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 1
+ 2
+]
+
group_by(*keys, drop_null_keys=False)
+
+Start a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *keys
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to group by. Accepts multiple columns names as a list. + |
+
+ ()
+ |
+
+ drop_null_keys
+ |
+
+ bool
+ |
+
+
+
+ if True, then groups where any key is null won't be included +in the result. + |
+
+ False
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
GroupBy |
+ GroupBy[Self]
+ |
+
+
+
+ Object which can be used to perform aggregations. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we group by one column
+and call agg
to compute the grouped sum of another column.
>>> @nw.narwhalify
+... def func(df):
+... return df.group_by("a").agg(nw.col("b").sum()).sort("a")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 a 2
+1 b 5
+2 c 3
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+
Group by multiple columns by passing a list of column names.
+>>> @nw.narwhalify
+... def func(df):
+... return df.group_by(["a", "b"]).agg(nw.max("c")).sort("a", "b")
+>>> func(df_pd)
+ a b c
+0 a 1 5
+1 b 2 4
+2 b 3 2
+3 c 3 1
+>>> func(df_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
head(n=5)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. If a negative value is passed, return all rows
+except the last |
+
+ 5
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3, 4, 5],
+... "bar": [6, 7, 8, 9, 10],
+... "ham": ["a", "b", "c", "d", "e"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function that gets the first 3 rows.
+>>> @nw.narwhalify
+... def func(df):
+... return df.head(3)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+>>> func(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
is_duplicated()
+
+Get a mask of all duplicated rows in this DataFrame.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> df_pd = pd.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.is_duplicated()
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+0 True
+1 False
+2 False
+3 True
+dtype: bool
+
>>> func(df_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ true
+ false
+ false
+ true
+]
+
is_empty()
+
+Check if the dataframe is empty.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
Let's define a dataframe-agnostic function that filters rows in which "foo" +values are greater than 10, and then checks if the result is empty or not:
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter(nw.col("foo") > 10).is_empty()
+
We can then pass either pandas or Polars to func
:
>>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
+>>> func(df_pd), func(df_pl)
+(True, True)
+
>>> df_pd = pd.DataFrame({"foo": [100, 2, 3], "bar": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"foo": [100, 2, 3], "bar": [4, 5, 6]})
+>>> func(df_pd), func(df_pl)
+(False, False)
+
is_unique()
+
+Get a mask of all unique rows in this DataFrame.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> df_pd = pd.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.is_unique()
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+0 False
+1 True
+2 True
+3 False
+dtype: bool
+
>>> func(df_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+ false
+]
+
item(row=None, column=None)
+
+Return the DataFrame as a scalar, or return the element at the given row/column.
+ + +If row/col not provided, this is equivalent to df[0,0], with a check that the shape is (1,1). +With row/col, this is equivalent to df[row,col].
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function that returns item at given row/column
+>>> @nw.narwhalify
+... def func(df, row, column):
+... return df.item(row, column)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd, 1, 1), func(df_pd, 2, "b")
+(np.int64(5), np.int64(6))
+
>>> func(df_pl, 1, 1), func(df_pl, 2, "b")
+(5, 6)
+
iter_rows(*, named=False, buffer_size=512)
+
+iter_rows(*, named: Literal[False], buffer_size: int = ...) -> Iterator[tuple[Any, ...]]
+
iter_rows(*, named: Literal[True], buffer_size: int = ...) -> Iterator[dict[str, Any]]
+
iter_rows(*, named: bool, buffer_size: int = ...) -> Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]
+
Returns an iterator over the DataFrame of rows of python-native values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ named
+ |
+
+ bool
+ |
+
+
+
+ By default, each row is returned as a tuple of values given +in the same order as the frame columns. Setting named=True will +return rows of dictionaries instead. + |
+
+ False
+ |
+
+ buffer_size
+ |
+
+ int
+ |
+
+
+
+ Determines the number of rows that are buffered +internally while iterating over the data. +See https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_rows.html + |
+
+ 512
+ |
+
cuDF doesn't support this method.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df, *, named):
+... return df.iter_rows(named=named)
+
We can then pass either pandas or Polars to func
:
>>> [row for row in func(df_pd, named=False)]
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> [row for row in func(df_pd, named=True)]
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+>>> [row for row in func(df_pl, named=False)]
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> [row for row in func(df_pl, named=True)]
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+
join(other, on=None, how='inner', *, left_on=None, right_on=None, suffix='_right')
+
+Join in SQL-like fashion.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ Lazy DataFrame to join with. + |
+ + required + | +
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Name(s) of the join columns in both DataFrames. If set, |
+
+ None
+ |
+
+ how
+ |
+
+ Literal['inner', 'left', 'cross', 'semi', 'anti']
+ |
+
+
+
+ Join strategy. +
|
+
+ 'inner'
+ |
+
+ left_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the left DataFrame. + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the right DataFrame. + |
+
+ None
+ |
+
+ suffix
+ |
+
+ str
+ |
+
+
+
+ Suffix to append to columns with a duplicate name. + |
+
+ '_right'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined DataFrame + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> data_other = {
+... "apple": ["x", "y", "z"],
+... "ham": ["a", "b", "d"],
+... }
+
>>> df_pd = pd.DataFrame(data)
+>>> other_pd = pd.DataFrame(data_other)
+
>>> df_pl = pl.DataFrame(data)
+>>> other_pl = pl.DataFrame(data_other)
+
Let's define a dataframe-agnostic function in which we join over "ham" column:
+>>> @nw.narwhalify
+... def join_on_ham(df, other_any):
+... return df.join(other_any, left_on="ham", right_on="ham")
+
We can now pass either pandas or Polars to the function:
+>>> join_on_ham(df_pd, other_pd)
+ foo bar ham apple
+0 1 6.0 a x
+1 2 7.0 b y
+
>>> join_on_ham(df_pl, other_pl)
+shape: (2, 4)
+┌─────┬─────┬─────┬───────┐
+│ foo ┆ bar ┆ ham ┆ apple │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str ┆ str │
+╞═════╪═════╪═════╪═══════╡
+│ 1 ┆ 6.0 ┆ a ┆ x │
+│ 2 ┆ 7.0 ┆ b ┆ y │
+└─────┴─────┴─────┴───────┘
+
join_asof(other, *, left_on=None, right_on=None, on=None, by_left=None, by_right=None, by=None, strategy='backward')
+
+Perform an asof join.
+This is similar to a left-join except that we match on nearest key rather than equal keys.
+Both DataFrames must be sorted by the asof_join key.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ DataFrame to join with. + |
+ + required + | +
+ left_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the left join column(s). + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the right join column(s). + |
+
+ None
+ |
+
+ on
+ |
+
+ str | None
+ |
+
+
+
+ Join column of both DataFrames. If set, left_on and right_on should be None. + |
+
+ None
+ |
+
+ by_left
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ by_right
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ by
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['backward', 'forward', 'nearest']
+ |
+
+
+
+ Join strategy. The default is "backward". +
|
+
+ 'backward'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined DataFrame + |
+
Examples:
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data_gdp = {
+... "datetime": [
+... datetime(2016, 1, 1),
+... datetime(2017, 1, 1),
+... datetime(2018, 1, 1),
+... datetime(2019, 1, 1),
+... datetime(2020, 1, 1),
+... ],
+... "gdp": [4164, 4411, 4566, 4696, 4827],
+... }
+>>> data_population = {
+... "datetime": [
+... datetime(2016, 3, 1),
+... datetime(2018, 8, 1),
+... datetime(2019, 1, 1),
+... ],
+... "population": [82.19, 82.66, 83.12],
+... }
+>>> gdp_pd = pd.DataFrame(data_gdp)
+>>> population_pd = pd.DataFrame(data_population)
+
>>> gdp_pl = pl.DataFrame(data_gdp).sort("datetime")
+>>> population_pl = pl.DataFrame(data_population).sort("datetime")
+
Let's define a dataframe-agnostic function in which we join over "datetime" column:
+>>> @nw.narwhalify
+... def join_asof_datetime(df, other_any, strategy):
+... return df.join_asof(other_any, on="datetime", strategy=strategy)
+
We can now pass either pandas or Polars to the function:
+>>> join_asof_datetime(population_pd, gdp_pd, strategy="backward")
+ datetime population gdp
+0 2016-03-01 82.19 4164
+1 2018-08-01 82.66 4566
+2 2019-01-01 83.12 4696
+
>>> join_asof_datetime(population_pl, gdp_pl, strategy="backward")
+shape: (3, 3)
+┌─────────────────────┬────────────┬──────┐
+│ datetime ┆ population ┆ gdp │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ f64 ┆ i64 │
+╞═════════════════════╪════════════╪══════╡
+│ 2016-03-01 00:00:00 ┆ 82.19 ┆ 4164 │
+│ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │
+│ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │
+└─────────────────────┴────────────┴──────┘
+
Here is a real-world times-series example that uses by
argument.
>>> from datetime import datetime
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data_quotes = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 30),
+... datetime(2016, 5, 25, 13, 30, 0, 41),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 49),
+... datetime(2016, 5, 25, 13, 30, 0, 72),
+... datetime(2016, 5, 25, 13, 30, 0, 75),
+... ],
+... "ticker": [
+... "GOOG",
+... "MSFT",
+... "MSFT",
+... "MSFT",
+... "GOOG",
+... "AAPL",
+... "GOOG",
+... "MSFT",
+... ],
+... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
+... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],
+... }
+>>> data_trades = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 38),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... ],
+... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
+... "price": [51.95, 51.95, 720.77, 720.92, 98.0],
+... "quantity": [75, 155, 100, 100, 100],
+... }
+>>> quotes_pd = pd.DataFrame(data_quotes)
+>>> trades_pd = pd.DataFrame(data_trades)
+>>> quotes_pl = pl.DataFrame(data_quotes).sort("datetime")
+>>> trades_pl = pl.DataFrame(data_trades).sort("datetime")
+
Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns:
+>>> @nw.narwhalify
+... def join_asof_datetime_by_ticker(df, other_any):
+... return df.join_asof(other_any, on="datetime", by="ticker")
+
We can now pass either pandas or Polars to the function:
+>>> join_asof_datetime_by_ticker(trades_pd, quotes_pd)
+ datetime ticker price quantity bid ask
+0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96
+1 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98
+2 2016-05-25 13:30:00.000048 GOOG 720.77 100 720.50 720.93
+3 2016-05-25 13:30:00.000048 GOOG 720.92 100 720.50 720.93
+4 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN
+
>>> join_asof_datetime_by_ticker(trades_pl, quotes_pl)
+shape: (5, 6)
+┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐
+│ datetime ┆ ticker ┆ price ┆ quantity ┆ bid ┆ ask │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ str ┆ f64 ┆ i64 ┆ f64 ┆ f64 │
+╞════════════════════════════╪════════╪════════╪══════════╪═══════╪════════╡
+│ 2016-05-25 13:30:00.000023 ┆ MSFT ┆ 51.95 ┆ 75 ┆ 51.95 ┆ 51.96 │
+│ 2016-05-25 13:30:00.000038 ┆ MSFT ┆ 51.95 ┆ 155 ┆ 51.97 ┆ 51.98 │
+│ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.77 ┆ 100 ┆ 720.5 ┆ 720.93 │
+│ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.92 ┆ 100 ┆ 720.5 ┆ 720.93 │
+│ 2016-05-25 13:30:00.000048 ┆ AAPL ┆ 98.0 ┆ 100 ┆ null ┆ null │
+└────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘
+
lazy()
+
+Lazify the DataFrame (if possible).
+If a library does not support lazy execution, then this is a no-op.
+ + +Examples:
+Construct pandas, Polars and PyArrow DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.lazy()
+
Note that then, pandas and pyarrow dataframe stay eager, but Polars DataFrame becomes a Polars LazyFrame:
+>>> func(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> func(df_pl)
+<LazyFrame ...>
+>>> func(df_pa)
+pyarrow.Table
+foo: int64
+bar: double
+ham: string
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+ham: [["a","b","c"]]
+
null_count()
+
+Create a new DataFrame that shows the null counts per column.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> df_pd = pd.DataFrame(
+... {
+... "foo": [1, None, 3],
+... "bar": [6, 7, None],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, None, 3],
+... "bar": [6, 7, None],
+... "ham": ["a", "b", "c"],
+... }
+... )
+
Let's define a dataframe-agnostic function that returns the null count of +each columns:
+>>> @nw.narwhalify
+... def func(df):
+... return df.null_count()
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 1 0
+
>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ u32 ┆ u32 │
+╞═════╪═════╪═════╡
+│ 1 ┆ 1 ┆ 0 │
+└─────┴─────┴─────┘
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.pipe(
+... lambda _df: _df.select([x for x in _df.columns if len(x) == 1])
+... )
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ a
+0 1
+1 2
+2 3
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
pivot(on, *, index=None, values=None, aggregate_function=None, maintain_order=True, sort_columns=False, separator='_')
+
+Create a spreadsheet-style pivot table as a DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ on
+ |
+
+ str | list[str]
+ |
+
+
+
+ Name of the column(s) whose values will be used as the header of the +output DataFrame. + |
+ + required + | +
+ index
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ One or multiple keys to group by. If None, all remaining columns not
+specified on |
+
+ None
+ |
+
+ values
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ One or multiple keys to group by. If None, all remaining columns not
+specified on |
+
+ None
+ |
+
+ aggregate_function
+ |
+
+ Literal['min', 'max', 'first', 'last', 'sum', 'mean', 'median', 'len'] | None
+ |
+
+
+
+ Choose from: +- None: no aggregation takes place, will raise error if multiple values + are in group. +- A predefined aggregate function string, one of + {'min', 'max', 'first', 'last', 'sum', 'mean', 'median', 'len'} + |
+
+ None
+ |
+
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Sort the grouped keys so that the output order is predictable. + |
+
+ True
+ |
+
+ sort_columns
+ |
+
+ bool
+ |
+
+
+
+ Sort the transposed columns by name. Default is by order of +discovery. + |
+
+ False
+ |
+
+ separator
+ |
+
+ str
+ |
+
+
+
+ Used as separator/delimiter in generated column names in case of
+multiple |
+
+ '_'
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "ix": [1, 1, 2, 2, 1, 2],
+... "col": ["a", "a", "a", "a", "b", "b"],
+... "foo": [0, 1, 2, 2, 7, 1],
+... "bar": [0, 2, 0, 0, 9, 4],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.pivot("col", index="ix", aggregate_function="sum")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ ix foo_a foo_b bar_a bar_b
+0 1 1 7 2 9
+1 2 4 1 0 4
+>>> func(df_pl)
+shape: (2, 5)
+┌─────┬───────┬───────┬───────┬───────┐
+│ ix ┆ foo_a ┆ foo_b ┆ bar_a ┆ bar_b │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
+╞═════╪═══════╪═══════╪═══════╪═══════╡
+│ 1 ┆ 1 ┆ 7 ┆ 2 ┆ 9 │
+│ 2 ┆ 4 ┆ 1 ┆ 0 ┆ 4 │
+└─────┴───────┴───────┴───────┴───────┘
+
rename(mapping)
+
+Rename column names.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ mapping
+ |
+
+ dict[str, str]
+ |
+
+
+
+ Key value pairs that map from old name to new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.rename({"foo": "apple"})
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ apple bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+>>> func(df_pl)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ apple ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═══════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└───────┴─────┴─────┘
+
row(index)
+
+Get values at given row.
+Note
+You should NEVER use this method to iterate over a DataFrame; +if you require row-iteration you should strongly prefer use of iter_rows() instead.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ index
+ |
+
+ int
+ |
+
+
+
+ Row number. + |
+ + required + | +
cuDF doesn't support this method.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a library-agnostic function to get the second row.
+>>> @nw.narwhalify
+... def func(df):
+... return df.row(1)
+
We can then pass pandas / Polars / any other supported library:
+>>> func(df_pd)
+(2, 5)
+>>> func(df_pl)
+(2, 5)
+
rows(*, named=False)
+
+rows(*, named: Literal[False] = False) -> list[tuple[Any, ...]]
+
rows(*, named: Literal[True]) -> list[dict[str, Any]]
+
rows(*, named: bool) -> list[tuple[Any, ...]] | list[dict[str, Any]]
+
Returns all data in the DataFrame as a list of rows of python-native values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ named
+ |
+
+ bool
+ |
+
+
+
+ By default, each row is returned as a tuple of values given +in the same order as the frame columns. Setting named=True will +return rows of dictionaries instead. + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df, *, named):
+... return df.rows(named=named)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd, named=False)
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> func(df_pd, named=True)
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+>>> func(df_pl, named=False)
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> func(df_pl, named=True)
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
+
+Sample from this DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int | None
+ |
+
+
+
+ Number of items to return. Cannot be used with fraction. + |
+
+ None
+ |
+
+ fraction
+ |
+
+ float | None
+ |
+
+
+
+ Fraction of items to return. Cannot be used with n. + |
+
+ None
+ |
+
+ with_replacement
+ |
+
+ bool
+ |
+
+
+
+ Allow values to be sampled more than once. + |
+
+ False
+ |
+
+ seed
+ |
+
+ int | None
+ |
+
+
+
+ Seed for the random number generator. If set to None (default), a random +seed is generated for each sample operation. + |
+
+ None
+ |
+
The results may not be consistent across libraries.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3, 4], "b": ["x", "y", "x", "y"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.sample(n=2, seed=123)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+3 4 y
+0 1 x
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ str │
+╞═════╪═════╡
+│ 2 ┆ y │
+│ 3 ┆ x │
+└─────┴─────┘
+
As you can see, by using the same seed, the result will be consistent within +the same backend, but not necessarely across different backends.
+ +select(*exprs, **named_exprs)
+
+Select columns from this DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to select, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, + other non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to select, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we pass the name of a +column to select that column.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select("foo")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo
+0 1
+1 2
+2 3
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
Multiple columns can be selected by passing a list of column names.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(["foo", "bar"])
+>>> func(df_pd)
+ foo bar
+0 1 6
+1 2 7
+2 3 8
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+
Multiple columns can also be selected using positional arguments instead of a +list. Expressions are also accepted.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("foo"), nw.col("bar") + 1)
+>>> func(df_pd)
+ foo bar
+0 1 7
+1 2 8
+2 3 9
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
Use keyword arguments to easily name your expression inputs.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(threshold=nw.col("foo") * 2)
+>>> func(df_pd)
+ threshold
+0 2
+1 4
+2 6
+>>> func(df_pl)
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+
sort(by, *more_by, descending=False, nulls_last=False)
+
+Sort the dataframe by the given columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ by
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) names to sort by. + |
+ + required + | +
+ *more_by
+ |
+
+ str
+ |
+
+
+
+ Additional columns to sort by, specified as positional arguments. + |
+
+ ()
+ |
+
+ descending
+ |
+
+ bool | Sequence[bool]
+ |
+
+
+
+ Sort in descending order. When sorting by multiple columns, can be +specified per column by passing a sequence of booleans. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last. + |
+
+ False
+ |
+
Unlike Polars, it is not possible to specify a sequence of booleans for
+nulls_last
in order to control per-column behaviour. Instead a single
+boolean is applied for all by
columns.
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "a": [1, 2, None],
+... "b": [6.0, 5.0, 4.0],
+... "c": ["a", "c", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function in which we sort by multiple +columns in different orders
+>>> @nw.narwhalify
+... def func(df):
+... return df.sort("c", "a", descending=[False, True])
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b c
+0 1.0 6.0 a
+2 NaN 4.0 b
+1 2.0 5.0 c
+>>> func(df_pl)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ null ┆ 4.0 ┆ b │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
tail(n=5)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. If a negative value is passed, return all rows
+except the first |
+
+ 5
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3, 4, 5],
+... "bar": [6, 7, 8, 9, 10],
+... "ham": ["a", "b", "c", "d", "e"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function that gets the last 3 rows.
+>>> @nw.narwhalify
+... def func(df):
+... return df.tail(3)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+2 3 8 c
+3 4 9 d
+4 5 10 e
+>>> func(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 3 ┆ 8 ┆ c │
+│ 4 ┆ 9 ┆ d │
+│ 5 ┆ 10 ┆ e │
+└─────┴─────┴─────┘
+
to_arrow()
+
+Convert to arrow table.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2, 3], "bar": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function that converts to arrow table:
+>>> @nw.narwhalify
+... def func(df):
+... return df.to_arrow()
+
>>> func(df_pd)
+pyarrow.Table
+foo: int64
+bar: string
+----
+foo: [[1,2,3]]
+bar: [["a","b","c"]]
+
>>> func(df_pl)
+pyarrow.Table
+foo: int64
+bar: large_string
+----
+foo: [[1,2,3]]
+bar: [["a","b","c"]]
+
to_dict(*, as_series=True)
+
+to_dict(*, as_series: Literal[True] = ...) -> dict[str, Series]
+
to_dict(*, as_series: Literal[False]) -> dict[str, list[Any]]
+
to_dict(*, as_series: bool) -> dict[str, Series] | dict[str, list[Any]]
+
Convert DataFrame to a dictionary mapping column name to values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ as_series
+ |
+
+ bool
+ |
+
+
+
+ If set to true |
+
+ True
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df = {
+... "A": [1, 2, 3, 4, 5],
+... "fruits": ["banana", "banana", "apple", "apple", "banana"],
+... "B": [5, 4, 3, 2, 1],
+... "animals": ["beetle", "fly", "beetle", "beetle", "beetle"],
+... "optional": [28, 300, None, 2, -30],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.to_dict(as_series=False)
+
We can then pass either pandas, Polars or PyArrow to func
:
>>> func(df_pd)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28.0, 300.0, nan, 2.0, -30.0]}
+>>> func(df_pl)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]}
+>>> func(df_pa)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'animals': ['beetle', 'fly', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]}
+
to_native()
+
+Convert Narwhals DataFrame to native one.
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrameT
+ |
+
+
+
+ Object of class that user started with. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Calling to_native
on a Narwhals DataFrame returns the native object:
>>> nw.from_native(df_pd).to_native()
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> nw.from_native(df_pl).to_native()
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ 2 ┆ 7.0 ┆ b │
+│ 3 ┆ 8.0 ┆ c │
+└─────┴─────┴─────┘
+>>> nw.from_native(df_pa).to_native()
+pyarrow.Table
+foo: int64
+bar: double
+ham: string
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+ham: [["a","b","c"]]
+
to_numpy()
+
+Convert this DataFrame to a NumPy ndarray.
+ + +Examples:
+Construct pandas and polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.5, 7.0, 8.5], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.to_numpy()
+
We can then pass either pandas, Polars or PyArrow to func
:
>>> func(df_pd)
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+>>> func(df_pl)
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+>>> func(df_pa)
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+
to_pandas()
+
+Convert this DataFrame to a pandas DataFrame.
+ + +Examples:
+Construct pandas, Polars (eager) and PyArrow DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.to_pandas()
+
We can then pass any supported library such as pandas, Polars (eager), or PyArrow to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> func(df_pl)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> func(df_pa)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+
unique(subset=None, *, keep='any', maintain_order=False)
+
+Drop duplicate rows from this dataframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) to consider when identifying duplicate rows. + |
+
+ None
+ |
+
+ keep
+ |
+
+ Literal['any', 'first', 'last', 'none']
+ |
+
+
+
+ {'first', 'last', 'any', 'none'} +Which of the duplicate rows to keep. +
|
+
+ 'any'
+ |
+
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Keep the same order as the original DataFrame. This may be more
+expensive to compute. Settings this to |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3, 1],
+... "bar": ["a", "a", "a", "a"],
+... "ham": ["b", "b", "b", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.unique(["bar", "ham"])
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 a b
+>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+└─────┴─────┴─────┘
+
unpivot(on=None, *, index=None, variable_name=None, value_name=None)
+
+Unpivot a DataFrame from wide to long format.
+Optionally leaves identifiers set.
+This function is useful to massage a DataFrame into a format where one or more +columns are identifier variables (index) while all other columns, considered +measured variables (on), are "unpivoted" to the row axis leaving just +two non-identifier columns, 'variable' and 'value'.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as values variables; if |
+
+ None
+ |
+
+ index
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as identifier variables. + |
+
+ None
+ |
+
+ variable_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
+ value_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
If you're coming from pandas, this is similar to pandas.DataFrame.melt
,
+but with index
replacing id_vars
and on
replacing value_vars
.
+In other frameworks, you might know this operation as pivot_longer
.
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": ["x", "y", "z"],
+... "b": [1, 3, 5],
+... "c": [2, 4, 6],
+... }
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.unpivot(on=["b", "c"], index="a")
+
We can pass any supported library such as pandas, Polars or PyArrow to func
:
>>> func(pl.DataFrame(data))
+shape: (6, 3)
+┌─────┬──────────┬───────┐
+│ a ┆ variable ┆ value │
+│ --- ┆ --- ┆ --- │
+│ str ┆ str ┆ i64 │
+╞═════╪══════════╪═══════╡
+│ x ┆ b ┆ 1 │
+│ y ┆ b ┆ 3 │
+│ z ┆ b ┆ 5 │
+│ x ┆ c ┆ 2 │
+│ y ┆ c ┆ 4 │
+│ z ┆ c ┆ 6 │
+└─────┴──────────┴───────┘
+
>>> func(pd.DataFrame(data))
+ a variable value
+0 x b 1
+1 y b 3
+2 z b 5
+3 x c 2
+4 y c 4
+5 z c 6
+
>>> func(pa.table(data))
+pyarrow.Table
+a: string
+variable: string
+value: int64
+----
+a: [["x","y","z"],["x","y","z"]]
+variable: [["b","b","b"],["c","c","c"]]
+value: [[1,3,5],[2,4,6]]
+
with_columns(*exprs, **named_exprs)
+
+Add columns to this DataFrame.
+Added columns will replace existing columns with the same name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
DataFrame |
+ Self
+ |
+
+
+
+ A new DataFrame with the columns added. + |
+
Creating a new DataFrame using this method does not create a new copy of +existing data.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "a": [1, 2, 3, 4],
+... "b": [0.5, 4, 10, 13],
+... "c": [True, True, False, True],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we pass an expression +to add it as a new column:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns((nw.col("a") * 2).alias("a*2"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b c a*2
+0 1 0.5 True 2
+1 2 4.0 True 4
+2 3 10.0 False 6
+3 4 13.0 True 8
+>>> func(df_pl)
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ a*2 │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+
with_row_index(name='index')
+
+Insert column which enumerates rows.
+ + +Examples:
+Construct pandas as polars DataFrames:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_row_index()
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ index a b
+0 0 1 4
+1 1 2 5
+2 2 3 6
+>>> func(df_pl)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ index ┆ a ┆ b │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ i64 ┆ i64 │
+╞═══════╪═════╪═════╡
+│ 0 ┆ 1 ┆ 4 │
+│ 1 ┆ 2 ┆ 5 │
+│ 2 ┆ 3 ┆ 6 │
+└───────┴─────┴─────┘
+
write_csv(file=None)
+
+Write dataframe to comma-separated values (CSV) file.
+ + +Examples:
+Construct pandas and Polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> def func(df):
+... df = nw.from_native(df)
+... return df.write_csv()
+
We can pass any supported library such as pandas, Polars or PyArrow to func
:
>>> func(df_pd)
+'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
+>>> func(df_pl)
+'foo,bar,ham\n1,6.0,a\n2,7.0,b\n3,8.0,c\n'
+>>> func(df_pa)
+'"foo","bar","ham"\n1,6,"a"\n2,7,"b"\n3,8,"c"\n'
+
If we had passed a file name to write_csv
, it would have been
+written to that file.
write_parquet(file)
+
+Write dataframe to parquet file.
+ + +Examples:
+Construct pandas, Polars and PyArrow DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> df_pa = pa.table(df)
+
We define a library agnostic function:
+>>> def func(df):
+... df = nw.from_native(df)
+... df.write_parquet("foo.parquet")
+
We can then pass either pandas, Polars or PyArrow to func
:
>>> func(df_pd)
+>>> func(df_pl)
+>>> func(df_pa)
+
narwhals.dependencies
get_cudf()
+
+Get cudf module (if already imported - else return None).
+ +get_ibis()
+
+Get ibis module (if already imported - else return None).
+ +get_modin()
+
+Get modin.pandas module (if already imported - else return None).
+ +get_pandas()
+
+Get pandas module (if already imported - else return None).
+ +get_polars()
+
+Get Polars module (if already imported - else return None).
+ +get_pyarrow()
+
+Get pyarrow module (if already imported - else return None).
+ +is_cudf_dataframe(df)
+
+Check whether df
is a cudf DataFrame without importing cudf.
is_cudf_index(index)
+
+Check whether index
is a cudf Index without importing cudf.
is_cudf_series(ser)
+
+Check whether ser
is a cudf Series without importing cudf.
is_dask_dataframe(df)
+
+Check whether df
is a Dask DataFrame without importing Dask.
is_ibis_table(df)
+
+Check whether df
is a Ibis Table without importing Ibis.
is_into_dataframe(native_dataframe)
+
+Check whether native_dataframe
can be converted to a Narwhals DataFrame.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_dataframe
+ |
+
+ Any
+ |
+
+
+
+ The object to check. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+
|
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import numpy as np
+>>> from narwhals.dependencies import is_into_dataframe
+
>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+>>> np_arr = np.array([[1, 4], [2, 5], [3, 6]])
+
>>> is_into_dataframe(df_pd)
+True
+>>> is_into_dataframe(df_pl)
+True
+>>> is_into_dataframe(np_arr)
+False
+
is_into_series(native_series)
+
+Check whether native_series
can be converted to a Narwhals Series.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_series
+ |
+
+ IntoSeries
+ |
+
+
+
+ The object to check. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ bool
+ |
+
+
+
+
|
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import numpy as np
+>>> import narwhals as nw
+
>>> s_pd = pd.Series([1, 2, 3])
+>>> s_pl = pl.Series([1, 2, 3])
+>>> np_arr = np.array([1, 2, 3])
+
>>> nw.dependencies.is_into_series(s_pd)
+True
+>>> nw.dependencies.is_into_series(s_pl)
+True
+>>> nw.dependencies.is_into_series(np_arr)
+False
+
is_modin_dataframe(df)
+
+Check whether df
is a modin DataFrame without importing modin.
is_modin_index(index)
+
+Check whether index
is a modin Index without importing modin.
is_modin_series(ser)
+
+Check whether ser
is a modin Series without importing modin.
is_numpy_array(arr)
+
+Check whether arr
is a NumPy Array without importing NumPy.
is_pandas_dataframe(df)
+
+Check whether df
is a pandas DataFrame without importing pandas.
is_pandas_index(index)
+
+Check whether index
is a pandas Index without importing pandas.
is_pandas_like_dataframe(df)
+
+Check whether df
is a pandas-like DataFrame without doing any imports
By "pandas-like", we mean: pandas, Modin, cuDF.
+ +is_pandas_like_index(index)
+
+Check whether index
is a pandas-like Index without doing any imports
By "pandas-like", we mean: pandas, Modin, cuDF.
+ +is_pandas_like_series(ser)
+
+Check whether ser
is a pandas-like Series without doing any imports
By "pandas-like", we mean: pandas, Modin, cuDF.
+ +is_pandas_series(ser)
+
+Check whether ser
is a pandas Series without importing pandas.
is_polars_dataframe(df)
+
+Check whether df
is a Polars DataFrame without importing Polars.
is_polars_lazyframe(df)
+
+Check whether df
is a Polars LazyFrame without importing Polars.
is_polars_series(ser)
+
+Check whether ser
is a Polars Series without importing Polars.
is_pyarrow_chunked_array(ser)
+
+Check whether ser
is a PyArrow ChunkedArray without importing PyArrow.
is_pyarrow_table(df)
+
+Check whether df
is a PyArrow Table without importing PyArrow.
narwhals.dtypes
Array
+
+
+List
+
+
+Int64
+
+
+Int32
+
+
+Int16
+
+
+Int8
+
+
+UInt64
+
+
+UInt32
+
+
+UInt16
+
+
+UInt8
+
+
+Field
+
+
+Definition of a single field within a Struct
DataType.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The name of the field within its parent |
+ + required + | +
+ dtype
+ |
+
+ type[DType] | DType
+ |
+
+
+
+ The |
+ + required + | +
Float64
+
+
+Float32
+
+
+Boolean
+
+
+Categorical
+
+
+Enum
+
+
+String
+
+
+Struct
+
+
+Struct composite type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ fields
+ |
+
+ Sequence[Field] | Mapping[str, DType | type[DType]]
+ |
+
+
+
+ The fields that make up the struct. Can be either a sequence of Field objects or a mapping of column names to data types. + |
+ + required + | +
to_schema()
+
+Return Struct dtype as a schema dict.
+ +Date
+
+
+Datetime
+
+
+Data type representing a calendar date and time of day.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['us', 'ns', 'ms', 's']
+ |
+
+
+
+ Unit of time. Defaults to |
+
+ 'us'
+ |
+
+ time_zone
+ |
+
+ str | timezone | None
+ |
+
+
+
+ Time zone string, as defined in zoneinfo (to see valid strings run
+ |
+
+ None
+ |
+
Adapted from Polars implementation at: +https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L398-L457
+Duration
+
+
+Data type representing a time duration.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['us', 'ns', 'ms', 's']
+ |
+
+
+
+ Unit of time. Defaults to |
+
+ 'us'
+ |
+
Adapted from Polars implementation at: +https://github.com/pola-rs/polars/blob/py-1.7.1/py-polars/polars/datatypes/classes.py#L460-L502
+Object
+
+
+Unknown
+
+
+narwhals.Expr
abs()
+
+Return absolute value of each element.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"a": [1, -2], "b": [-3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").abs())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 1 3
+1 2 4
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2]]
+b: [[3,4]]
+
alias(name)
+
+Rename the expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [4, 5]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select((nw.col("b") + 10).alias("c"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ c
+0 14
+1 15
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ c │
+│ --- │
+│ i64 │
+╞═════╡
+│ 14 │
+│ 15 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+c: int64
+----
+c: [[14,15]]
+
all()
+
+Return whether all values in the column are True
.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
+>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
+>>> df_pa = pa.table({"a": [True, False], "b": [True, True]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").all())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 False True
+>>> func(df_pl)
+shape: (1, 2)
+┌───────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪══════╡
+│ false ┆ true │
+└───────┴──────┘
+>>> func(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[false]]
+b: [[true]]
+
any()
+
+Return whether any of the values in the column are True
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
+>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
+>>> df_pa = pa.table({"a": [True, False], "b": [True, True]})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").any())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 True True
+>>> func(df_pl)
+shape: (1, 2)
+┌──────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞══════╪══════╡
+│ true ┆ true │
+└──────┴──────┘
+>>> func(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[true]]
+b: [[true]]
+
arg_true()
+
+Find elements where boolean expression is True.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"a": [1, None, None, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").is_null().arg_true())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+1 1
+2 2
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ u32 │
+╞═════╡
+│ 1 │
+│ 2 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[1,2]]
+
cast(dtype)
+
+Redefine an object's data type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ dtype
+ |
+
+ DType | type[DType]
+ |
+
+
+
+ Data type that the object will be cast into. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> from datetime import date
+>>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+>>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+>>> df_pa = pa.table({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(
+... nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
+... )
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ foo bar
+0 1.0 6
+1 2.0 7
+2 3.0 8
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ f32 ┆ u8 │
+╞═════╪═════╡
+│ 1.0 ┆ 6 │
+│ 2.0 ┆ 7 │
+│ 3.0 ┆ 8 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+foo: float
+bar: uint8
+----
+foo: [[1,2,3]]
+bar: [[6,7,8]]
+
count()
+
+Returns the number of non-null elements in the column.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
+>>> df_pa = pa.table({"a": [1, 2, 3], "b": [None, 4, 4]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().count())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 3 2
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 3 ┆ 2 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[3]]
+b: [[2]]
+
cum_sum()
+
+Return cumulative sum.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").cum_sum())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 1 2
+1 2 6
+2 5 10
+3 10 16
+4 15 22
+>>> func(df_pl)
+shape: (5, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+│ 2 ┆ 6 │
+│ 5 ┆ 10 │
+│ 10 ┆ 16 │
+│ 15 ┆ 22 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,5,10,15]]
+b: [[2,6,10,16,22]]
+
diff()
+
+Returns the difference between each element and the previous one.
+ + +pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to calculate
+the diff and fill missing values with 0
in a Int64 column, you could
+do:
nw.col("a").diff().fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(a_diff=nw.col("a").diff())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a_diff
+0 NaN
+1 0.0
+2 2.0
+3 2.0
+4 0.0
+>>> func(df_pl)
+shape: (5, 1)
+┌────────┐
+│ a_diff │
+│ --- │
+│ i64 │
+╞════════╡
+│ null │
+│ 0 │
+│ 2 │
+│ 2 │
+│ 0 │
+└────────┘
+>>> func(df_pa)
+pyarrow.Table
+a_diff: int64
+----
+a_diff: [[null,0,2,2,0]]
+
drop_nulls()
+
+Remove missing values.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+
>>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+>>> df_pl = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+>>> df_pa = pa.table({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").drop_nulls())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 2.0
+1 4.0
+3 3.0
+5 5.0
+>>> func(df_pl) # nan != null for polars
+shape: (5, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 2.0 │
+│ 4.0 │
+│ NaN │
+│ 3.0 │
+│ 5.0 │
+└─────┘
+>>> func(df_pa) # nan != null for pyarrow
+pyarrow.Table
+a: double
+----
+a: [[2,4,nan,3,5]]
+
fill_null(value=None, strategy=None, limit=None)
+
+Fill null values with given value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ value
+ |
+
+ Any | None
+ |
+
+
+
+ Value used to fill null values. + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['forward', 'backward'] | None
+ |
+
+
+
+ Strategy used to fill null values. + |
+
+ None
+ |
+
+ limit
+ |
+
+ int | None
+ |
+
+
+
+ Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. + |
+
+ None
+ |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame(
+... {
+... "a": [2, 4, None, None, 3, 5],
+... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [2, 4, None, None, 3, 5],
+... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
+... }
+... )
+>>> df_pa = pa.table(
+... {
+... "a": [2, 4, None, None, 3, 5],
+... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
+... }
+... )
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(nw.col("a", "b").fill_null(0))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 2.0 2.0
+1 4.0 4.0
+2 0.0 0.0
+3 0.0 0.0
+4 3.0 3.0
+5 5.0 5.0
+
>>> func(df_pl) # nan != null for polars
+shape: (6, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 2.0 │
+│ 4 ┆ 4.0 │
+│ 0 ┆ NaN │
+│ 0 ┆ NaN │
+│ 3 ┆ 3.0 │
+│ 5 ┆ 5.0 │
+└─────┴─────┘
+
>>> func(df_pa) # nan != null for pyarrow
+pyarrow.Table
+a: int64
+b: double
+----
+a: [[2,4,0,0,3,5]]
+b: [[2,4,nan,nan,3,5]]
+
Using a strategy:
+>>> @nw.narwhalify
+... def func_strategies(df):
+... return df.with_columns(
+... nw.col("a", "b")
+... .fill_null(strategy="forward", limit=1)
+... .name.suffix("_filled")
+... )
+
>>> func_strategies(df_pd)
+ a b a_filled b_filled
+0 2.0 2.0 2.0 2.0
+1 4.0 4.0 4.0 4.0
+2 NaN NaN 4.0 4.0
+3 NaN NaN NaN NaN
+4 3.0 3.0 3.0 3.0
+5 5.0 5.0 5.0 5.0
+
>>> func_strategies(df_pl) # nan != null for polars
+shape: (6, 4)
+┌──────┬─────┬──────────┬──────────┐
+│ a ┆ b ┆ a_filled ┆ b_filled │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ i64 ┆ f64 │
+╞══════╪═════╪══════════╪══════════╡
+│ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │
+│ 4 ┆ 4.0 ┆ 4 ┆ 4.0 │
+│ null ┆ NaN ┆ 4 ┆ NaN │
+│ null ┆ NaN ┆ null ┆ NaN │
+│ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │
+│ 5 ┆ 5.0 ┆ 5 ┆ 5.0 │
+└──────┴─────┴──────────┴──────────┘
+
>>> func_strategies(df_pa) # nan != null for pyarrow
+pyarrow.Table
+a: int64
+b: double
+a_filled: int64
+b_filled: double
+----
+a: [[2,4,null,null,3,5]]
+b: [[2,4,nan,nan,3,5]]
+a_filled: [[2,4,4,null,3,5]]
+b_filled: [[2,4,nan,nan,3,5]]
+
filter(*predicates)
+
+Filters elements based on a condition, returning a new expression.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
+>>> df_pl = pl.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
+>>> df_pa = pa.table({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(
+... nw.col("a").filter(nw.col("a") > 4),
+... nw.col("b").filter(nw.col("b") < 13),
+... )
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+3 5 10
+4 6 11
+5 7 12
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 5 ┆ 10 │
+│ 6 ┆ 11 │
+│ 7 ┆ 12 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[5,6,7]]
+b: [[10,11,12]]
+
gather_every(n, offset=0)
+
+Take every nth value in the Series and return as new Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function in which gather every 2 rows, +starting from a offset of 1:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").gather_every(n=2, offset=1))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+1 2
+3 4
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 4 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2,4]]
+
head(n=10)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": list(range(10))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that returns the first 3 rows:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").head(3))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 0
+1 1
+2 2
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 0 │
+│ 1 │
+│ 2 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[0,1,2]]
+
clip(lower_bound=None, upper_bound=None)
+
+Clip values in the Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Any | None
+ |
+
+
+
+ Lower bound value. + |
+
+ None
+ |
+
+ upper_bound
+ |
+
+ Any | None
+ |
+
+
+
+ Upper bound value. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+
>>> s = [1, 2, 3]
+>>> df_pd = pd.DataFrame({"s": s})
+>>> df_pl = pl.DataFrame({"s": s})
+>>> df_pa = pa.table({"s": s})
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func_lower(df):
+... return df.select(nw.col("s").clip(2))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func_lower
:
>>> func_lower(df_pd)
+ s
+0 2
+1 2
+2 3
+>>> func_lower(df_pl)
+shape: (3, 1)
+┌─────┐
+│ s │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 2 │
+│ 3 │
+└─────┘
+>>> func_lower(df_pa)
+pyarrow.Table
+s: int64
+----
+s: [[2,2,3]]
+
We define another library agnostic function:
+>>> @nw.narwhalify
+... def func_upper(df):
+... return df.select(nw.col("s").clip(upper_bound=2))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func_upper
:
>>> func_upper(df_pd)
+ s
+0 1
+1 2
+2 2
+>>> func_upper(df_pl)
+shape: (3, 1)
+┌─────┐
+│ s │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 2 │
+└─────┘
+>>> func_upper(df_pa)
+pyarrow.Table
+s: int64
+----
+s: [[1,2,2]]
+
We can have both at the same time
+>>> s = [-1, 1, -3, 3, -5, 5]
+>>> df_pd = pd.DataFrame({"s": s})
+>>> df_pl = pl.DataFrame({"s": s})
+>>> df_pa = pa.table({"s": s})
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("s").clip(-1, 3))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ s
+0 -1
+1 1
+2 -1
+3 3
+4 -1
+5 3
+>>> func(df_pl)
+shape: (6, 1)
+┌─────┐
+│ s │
+│ --- │
+│ i64 │
+╞═════╡
+│ -1 │
+│ 1 │
+│ -1 │
+│ 3 │
+│ -1 │
+│ 3 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+s: int64
+----
+s: [[-1,1,-1,3,-1,3]]
+
is_between(lower_bound, upper_bound, closed='both')
+
+Check if this expression is between the given lower and upper bounds.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Any
+ |
+
+
+
+ Lower bound value. + |
+ + required + | +
+ upper_bound
+ |
+
+ Any
+ |
+
+
+
+ Upper bound value. + |
+ + required + | +
+ closed
+ |
+
+ str
+ |
+
+
+
+ Define which sides of the interval are closed (inclusive). + |
+
+ 'both'
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
+>>> df_pa = pa.table({"a": [1, 2, 3, 4, 5]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").is_between(2, 4, "right"))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 False
+1 False
+2 True
+3 True
+4 False
+>>> func(df_pl)
+shape: (5, 1)
+┌───────┐
+│ a │
+│ --- │
+│ bool │
+╞═══════╡
+│ false │
+│ false │
+│ true │
+│ true │
+│ false │
+└───────┘
+>>> func(df_pa)
+pyarrow.Table
+a: bool
+----
+a: [[false,false,true,true,false]]
+
is_duplicated()
+
+Return a boolean mask indicating duplicated values.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().is_duplicated())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 True True
+1 False True
+2 False False
+3 True False
+>>> func(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ true ┆ true │
+│ false ┆ true │
+│ false ┆ false │
+│ true ┆ false │
+└───────┴───────┘
+>>> func(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[true,false,false,true]]
+b: [[true,true,false,false]]
+
is_first_distinct()
+
+Return a boolean mask indicating the first occurrence of each distinct value.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().is_first_distinct())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 True True
+1 True False
+2 True True
+3 False True
+>>> func(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ true ┆ true │
+│ true ┆ false │
+│ true ┆ true │
+│ false ┆ true │
+└───────┴───────┘
+>>> func(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[true,true,true,false]]
+b: [[true,false,true,true]]
+
is_in(other)
+
+Check if elements of this expression are present in the other iterable.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Any
+ |
+
+
+
+ iterable + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2, 9, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 9, 10]})
+>>> df_pa = pa.table({"a": [1, 2, 9, 10]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(b=nw.col("a").is_in([1, 2]))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 1 True
+1 2 True
+2 9 False
+3 10 False
+
>>> func(df_pl)
+shape: (4, 2)
+┌─────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ bool │
+╞═════╪═══════╡
+│ 1 ┆ true │
+│ 2 ┆ true │
+│ 9 ┆ false │
+│ 10 ┆ false │
+└─────┴───────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: bool
+----
+a: [[1,2,9,10]]
+b: [[true,true,false,false]]
+
is_last_distinct()
+
+Return a boolean mask indicating the last occurrence of each distinct value.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().is_last_distinct())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 False False
+1 True True
+2 True True
+3 True True
+>>> func(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ false ┆ false │
+│ true ┆ true │
+│ true ┆ true │
+│ true ┆ true │
+└───────┴───────┘
+>>> func(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[false,true,true,true]]
+b: [[false,true,true,true]]
+
is_null()
+
+Returns a boolean Series indicating which values are null.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame(
+... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+... )
+>>> df_pl = pl.DataFrame(
+... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+... )
+>>> df_pa = pa.table(
+... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+... )
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
+... )
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b a_is_null b_is_null
+0 2.0 2.0 False False
+1 4.0 4.0 False False
+2 NaN NaN True True
+3 3.0 3.0 False False
+4 5.0 5.0 False False
+
>>> func(df_pl) # nan != null for polars
+shape: (5, 4)
+┌──────┬─────┬───────────┬───────────┐
+│ a ┆ b ┆ a_is_null ┆ b_is_null │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ bool │
+╞══════╪═════╪═══════════╪═══════════╡
+│ 2 ┆ 2.0 ┆ false ┆ false │
+│ 4 ┆ 4.0 ┆ false ┆ false │
+│ null ┆ NaN ┆ true ┆ false │
+│ 3 ┆ 3.0 ┆ false ┆ false │
+│ 5 ┆ 5.0 ┆ false ┆ false │
+└──────┴─────┴───────────┴───────────┘
+
>>> func(df_pa) # nan != null for pyarrow
+pyarrow.Table
+a: int64
+b: double
+a_is_null: bool
+b_is_null: bool
+----
+a: [[2,4,null,3,5]]
+b: [[2,4,nan,3,5]]
+a_is_null: [[false,false,true,false,false]]
+b_is_null: [[false,false,false,false,false]]
+
is_unique()
+
+Return a boolean mask indicating unique values.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().is_unique())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 False False
+1 True False
+2 True True
+3 False True
+>>> func(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ false ┆ false │
+│ true ┆ false │
+│ true ┆ true │
+│ false ┆ true │
+└───────┴───────┘
+>>> func(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+----
+a: [[false,true,true,false]]
+b: [[false,false,true,true]]
+
len()
+
+Return the number of elements in the column.
+Null values count towards the total.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that computes the len over different values of "b" column:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(
+... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
+... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
+... )
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a1 a2
+0 2 1
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a1 ┆ a2 │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 2 ┆ 1 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a1: int64
+a2: int64
+----
+a1: [[2]]
+a2: [[1]]
+
map_batches(function, return_dtype=None)
+
+Apply a custom python function to a whole Series or sequence of Series.
+The output of this custom function is presumed to be either a Series, +or a NumPy array (in which case it will be automatically converted into +a Series).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ return_dtype
+ |
+
+ DType | None
+ |
+
+
+
+ Dtype of the output Series. + If not set, the dtype will be inferred based on the first non-null value + that is returned by the function. + |
+
+ None
+ |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(
+... nw.col("a", "b").map_batches(
+... lambda s: s.to_numpy() + 1, return_dtype=nw.Float64
+... )
+... )
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 2.0 5.0
+1 3.0 6.0
+2 4.0 7.0
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 2.0 ┆ 5.0 │
+│ 3.0 ┆ 6.0 │
+│ 4.0 ┆ 7.0 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[2,3,4]]
+b: [[5,6,7]]
+
max()
+
+Returns the maximum value(s) from a column(s).
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
+>>> df_pl = pl.DataFrame({"a": [10, 20], "b": [50, 100]})
+>>> df_pa = pa.table({"a": [10, 20], "b": [50, 100]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.max("a", "b"))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 20 100
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 20 ┆ 100 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[20]]
+b: [[100]]
+
mean()
+
+Get mean value.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
+>>> df_pl = pl.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
+>>> df_pa = pa.table({"a": [-1, 0, 1], "b": [2, 4, 6]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").mean())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 0.0 4.0
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 0.0 ┆ 4.0 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[0]]
+b: [[4]]
+
median()
+
+Get median value.
+ + +Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
+>>> df_pl = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
+>>> df_pa = pa.table({"a": [1, 8, 3], "b": [4, 5, 2]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").median())
+
We can then pass any supported library such as pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 3.0 4.0
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 3.0 ┆ 4.0 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[3]]
+b: [[4]]
+
min()
+
+Returns the minimum value(s) from a column(s).
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 3]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [4, 3]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.min("a", "b"))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 1 3
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1]]
+b: [[3]]
+
mode()
+
+Compute the most occurring value(s).
+Can return multiple values.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+
>>> data = {
+... "a": [1, 1, 2, 3],
+... "b": [1, 1, 2, 2],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").mode()).sort("a")
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 1
+
>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+└─────┘
+
>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[1]]
+
null_count()
+
+Count null values.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().null_count())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 1 2
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1]]
+b: [[2]]
+
n_unique()
+
+Returns count of unique values
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+>>> df_pa = pa.table({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").n_unique())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 5 3
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 5 ┆ 3 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[5]]
+b: [[3]]
+
over(*keys)
+
+Compute expressions over the given groups.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ keys
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of columns to compute window expression over.
+ Must be names of columns, as opposed to expressions -
+ so, this is a bit less flexible than Polars' |
+
+ ()
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(a_min_per_group=nw.col("a").min().over("b"))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b a_min_per_group
+0 1 1 1
+1 2 1 1
+2 3 2 3
+>>> func(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────────────────┐
+│ a ┆ b ┆ a_min_per_group │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞═════╪═════╪═════════════════╡
+│ 1 ┆ 1 ┆ 1 │
+│ 2 ┆ 1 ┆ 1 │
+│ 3 ┆ 2 ┆ 3 │
+└─────┴─────┴─────────────────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+a_min_per_group: int64
+----
+a: [[1,2,3]]
+b: [[1,1,2]]
+a_min_per_group: [[1,1,3]]
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Lets define a library-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").pipe(lambda x: x + 1))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 2
+1 3
+2 4
+3 5
+>>> func(df_pl)
+shape: (4, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 3 │
+│ 4 │
+│ 5 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2,3,4,5]]
+
quantile(quantile, interpolation)
+
+Get quantile value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ quantile
+ |
+
+ float
+ |
+
+
+
+ Quantile between 0.0 and 1.0. + |
+ + required + | +
+ interpolation
+ |
+
+ Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']
+ |
+
+
+
+ Interpolation method. + |
+ + required + | +
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": list(range(50)), "b": list(range(50, 100))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").quantile(0.5, interpolation="linear"))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 24.5 74.5
+
>>> func(df_pl)
+shape: (1, 2)
+┌──────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════╪══════╡
+│ 24.5 ┆ 74.5 │
+└──────┴──────┘
+>>> func(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[24.5]]
+b: [[74.5]]
+
replace_strict(old, new=None, *, return_dtype=None)
+
+Replace all values by different values.
+This function must replace all non-null input values (else it raises an error).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ old
+ |
+
+ Sequence[Any] | Mapping[Any, Any]
+ |
+
+
+
+ Sequence of values to replace. It also accepts a mapping of values to
+their replacement as syntactic sugar for
+ |
+ + required + | +
+ new
+ |
+
+ Sequence[Any] | None
+ |
+
+
+
+ Sequence of values to replace by. Length must match the length of |
+
+ None
+ |
+
+ return_dtype
+ |
+
+ DType | type[DType] | None
+ |
+
+
+
+ The data type of the resulting expression. If set to |
+
+ None
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> df_pd = pd.DataFrame({"a": [3, 0, 1, 2]})
+>>> df_pl = pl.DataFrame({"a": [3, 0, 1, 2]})
+>>> df_pa = pa.table({"a": [3, 0, 1, 2]})
+
Let's define dataframe-agnostic functions:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... b=nw.col("a").replace_strict(
+... [0, 1, 2, 3],
+... ["zero", "one", "two", "three"],
+... return_dtype=nw.String,
+... )
+... )
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 3 three
+1 0 zero
+2 1 one
+3 2 two
+>>> func(df_pl)
+shape: (4, 2)
+┌─────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ str │
+╞═════╪═══════╡
+│ 3 ┆ three │
+│ 0 ┆ zero │
+│ 1 ┆ one │
+│ 2 ┆ two │
+└─────┴───────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: string
+----
+a: [[3,0,1,2]]
+b: [["three","zero","one","two"]]
+
round(decimals=0)
+
+Round underlying floating point data by decimals
digits.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ decimals
+ |
+
+ int
+ |
+
+
+
+ Number of decimals to round by. + |
+
+ 0
+ |
+
For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
+pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and +4.5 to 4.0, etc..).
+Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": [1.12345, 2.56789, 3.901234]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that rounds to the first decimal:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").round(1))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 1.1
+1 2.6
+2 3.9
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 1.1 │
+│ 2.6 │
+│ 3.9 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[1.1,2.6,3.9]]
+
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
+
+Sample randomly from this expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int | None
+ |
+
+
+
+ Number of items to return. Cannot be used with fraction. + |
+
+ None
+ |
+
+ fraction
+ |
+
+ float | None
+ |
+
+
+
+ Fraction of items to return. Cannot be used with n. + |
+
+ None
+ |
+
+ with_replacement
+ |
+
+ bool
+ |
+
+
+
+ Allow values to be sampled more than once. + |
+
+ False
+ |
+
+ seed
+ |
+
+ int | None
+ |
+
+
+
+ Seed for the random number generator. If set to None (default), a random +seed is generated for each sample operation. + |
+
+ None
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3]})
+>>> df_pa = pa.table({"a": [1, 2, 3]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").sample(fraction=1.0, with_replacement=True))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+2 3
+0 1
+2 3
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 2 │
+│ 3 │
+│ 3 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[1,3,3]]
+
shift(n)
+
+Shift values by n
positions.
pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to shift
+and fill missing values with 0
in a Int64 column, you could
+do:
nw.col("a").shift(1).fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(a_shift=nw.col("a").shift(n=1))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a_shift
+0 NaN
+1 1.0
+2 1.0
+3 3.0
+4 5.0
+>>> func(df_pl)
+shape: (5, 1)
+┌─────────┐
+│ a_shift │
+│ --- │
+│ i64 │
+╞═════════╡
+│ null │
+│ 1 │
+│ 1 │
+│ 3 │
+│ 5 │
+└─────────┘
+>>> func(df_pa)
+pyarrow.Table
+a_shift: int64
+----
+a_shift: [[null,1,1,3,5]]
+
sort(*, descending=False, nulls_last=False)
+
+Sort this column. Place null values first.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ descending
+ |
+
+ bool
+ |
+
+
+
+ Sort in descending order. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last instead of first. + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> df_pd = pd.DataFrame({"a": [5, None, 1, 2]})
+>>> df_pl = pl.DataFrame({"a": [5, None, 1, 2]})
+>>> df_pa = pa.table({"a": [5, None, 1, 2]})
+
Let's define dataframe-agnostic functions:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").sort())
+
>>> def func_descend(df):
+... df = nw.from_native(df)
+... df = df.select(nw.col("a").sort(descending=True))
+... return nw.to_native(df)
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+1 NaN
+2 1.0
+3 2.0
+0 5.0
+>>> func(df_pl)
+shape: (4, 1)
+┌──────┐
+│ a │
+│ --- │
+│ i64 │
+╞══════╡
+│ null │
+│ 1 │
+│ 2 │
+│ 5 │
+└──────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[null,1,2,5]]
+
>>> func_descend(df_pd)
+ a
+1 NaN
+0 5.0
+3 2.0
+2 1.0
+>>> func_descend(df_pl)
+shape: (4, 1)
+┌──────┐
+│ a │
+│ --- │
+│ i64 │
+╞══════╡
+│ null │
+│ 5 │
+│ 2 │
+│ 1 │
+└──────┘
+>>> func_descend(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[null,5,2,1]]
+
std(*, ddof=1)
+
+Get standard deviation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ ddof
+ |
+
+ int
+ |
+
+
+
+ “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, + where N represents the number of elements. By default ddof is 1. + |
+
+ 1
+ |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+>>> df_pl = pl.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+>>> df_pa = pa.table({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").std(ddof=0))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 17.79513 1.265789
+>>> func(df_pl)
+shape: (1, 2)
+┌──────────┬──────────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════════╪══════════╡
+│ 17.79513 ┆ 1.265789 │
+└──────────┴──────────┘
+>>> func(df_pa)
+pyarrow.Table
+a: double
+b: double
+----
+a: [[17.795130420052185]]
+b: [[1.2657891697365016]]
+
sum()
+
+Return the sum value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [5, 10], "b": [50, 100]})
+>>> df_pl = pl.DataFrame({"a": [5, 10], "b": [50, 100]})
+>>> df_pa = pa.table({"a": [5, 10], "b": [50, 100]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").sum())
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 15 150
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 15 ┆ 150 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[15]]
+b: [[150]]
+
tail(n=10)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"a": list(range(10))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function that returns the last 3 rows:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").tail(3))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+7 7
+8 8
+9 9
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 7 │
+│ 8 │
+│ 9 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[7,8,9]]
+
unique(*, maintain_order=False)
+
+Return unique values of this expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Keep the same order as the original expression. This may be more
+expensive to compute. Settings this to |
+
+ False
+ |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").unique(maintain_order=True))
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 1 2
+1 3 4
+2 5 6
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+│ 3 ┆ 4 │
+│ 5 ┆ 6 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,3,5]]
+b: [[2,4,6]]
+
narwhals.Expr.cat
get_categories()
+
+Get unique categories from column.
+ + +Examples:
+Let's create some dataframes:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["apple", "mango", "mango"]}
+>>> df_pd = pd.DataFrame(data, dtype="category")
+>>> df_pl = pl.DataFrame(data, schema={"fruits": pl.Categorical})
+
We define a dataframe-agnostic function to get unique categories +from column 'fruits':
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("fruits").cat.get_categories())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits
+0 apple
+1 mango
+>>> func(df_pl)
+shape: (2, 1)
+┌────────┐
+│ fruits │
+│ --- │
+│ str │
+╞════════╡
+│ apple │
+│ mango │
+└────────┘
+
narwhals.Expr.dt
convert_time_zone(time_zone)
+
+Convert to a new time zone.
+If converting from a time-zone-naive column, then conversion happens +as if converting from UTC.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Examples:
+>>> from datetime import datetime, timezone
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").dt.convert_time_zone("Asia/Kathmandu"))
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> func(df_pd)
+ a
+0 2024-01-01 05:45:00+05:45
+1 2024-01-02 05:45:00+05:45
+>>> func(df_pl)
+shape: (2, 1)
+┌──────────────────────────────┐
+│ a │
+│ --- │
+│ datetime[μs, Asia/Kathmandu] │
+╞══════════════════════════════╡
+│ 2024-01-01 05:45:00 +0545 │
+│ 2024-01-02 05:45:00 +0545 │
+└──────────────────────────────┘
+>>> func(df_pa)
+pyarrow.Table
+a: timestamp[us, tz=Asia/Kathmandu]
+----
+a: [[2024-01-01 00:00:00.000000Z,2024-01-02 00:00:00.000000Z]]
+
date()
+
+Extract the date from underlying DateTime representation.
+ + +Raises:
+Type | +Description | +
---|---|
+ NotImplementedError
+ |
+
+
+
+ If pandas default backend is being used. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {"a": [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)]}
+>>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow")
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").dt.date())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+0 2012-01-07
+1 2023-03-10
+
>>> func(df_pl) # docetst
+shape: (2, 1)
+┌────────────┐
+│ a │
+│ --- │
+│ date │
+╞════════════╡
+│ 2012-01-07 │
+│ 2023-03-10 │
+└────────────┘
+
day()
+
+Extract day from underlying DateTime representation.
+Returns the day of month starting from 1. The return value ranges from 1 to 31. (The last day of month differs by months.)
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.year().alias("year"),
+... nw.col("datetime").dt.month().alias("month"),
+... nw.col("datetime").dt.day().alias("day"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime year month day
+0 1978-06-01 1978 6 1
+1 2024-12-13 2024 12 13
+2 2065-01-01 2065 1 1
+>>> func(df_pl)
+shape: (3, 4)
+┌─────────────────────┬──────┬───────┬─────┐
+│ datetime ┆ year ┆ month ┆ day │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i32 ┆ i8 ┆ i8 │
+╞═════════════════════╪══════╪═══════╪═════╡
+│ 1978-06-01 00:00:00 ┆ 1978 ┆ 6 ┆ 1 │
+│ 2024-12-13 00:00:00 ┆ 2024 ┆ 12 ┆ 13 │
+│ 2065-01-01 00:00:00 ┆ 2065 ┆ 1 ┆ 1 │
+└─────────────────────┴──────┴───────┴─────┘
+
hour()
+
+Extract hour from underlying DateTime representation.
+Returns the hour number from 0 to 23.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1),
+... datetime(2024, 10, 13, 5),
+... datetime(2065, 1, 1, 10),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(nw.col("datetime").dt.hour().alias("hour"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour
+0 1978-01-01 01:00:00 1
+1 2024-10-13 05:00:00 5
+2 2065-01-01 10:00:00 10
+>>> func(df_pl)
+shape: (3, 2)
+┌─────────────────────┬──────┐
+│ datetime ┆ hour │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i8 │
+╞═════════════════════╪══════╡
+│ 1978-01-01 01:00:00 ┆ 1 │
+│ 2024-10-13 05:00:00 ┆ 5 │
+│ 2065-01-01 10:00:00 ┆ 10 │
+└─────────────────────┴──────┘
+
microsecond()
+
+Extract microseconds from underlying DateTime representation.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 505000),
+... datetime(2065, 1, 1, 10, 20, 30, 67000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... nw.col("datetime").dt.microsecond().alias("microsecond"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour minute second microsecond
+0 1978-01-01 01:01:01.000 1 1 1 0
+1 2024-10-13 05:30:14.505 5 30 14 505000
+2 2065-01-01 10:20:30.067 10 20 30 67000
+>>> func(df_pl)
+shape: (3, 5)
+┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
+│ datetime ┆ hour ┆ minute ┆ second ┆ microsecond │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
+╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
+│ 2024-10-13 05:30:14.505 ┆ 5 ┆ 30 ┆ 14 ┆ 505000 │
+│ 2065-01-01 10:20:30.067 ┆ 10 ┆ 20 ┆ 30 ┆ 67000 │
+└─────────────────────────┴──────┴────────┴────────┴─────────────┘
+
millisecond()
+
+Extract milliseconds from underlying DateTime representation.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 505000),
+... datetime(2065, 1, 1, 10, 20, 30, 67000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... nw.col("datetime").dt.millisecond().alias("millisecond"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour minute second millisecond
+0 1978-01-01 01:01:01.000 1 1 1 0
+1 2024-10-13 05:30:14.505 5 30 14 505
+2 2065-01-01 10:20:30.067 10 20 30 67
+>>> func(df_pl)
+shape: (3, 5)
+┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
+│ datetime ┆ hour ┆ minute ┆ second ┆ millisecond │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
+╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
+│ 2024-10-13 05:30:14.505 ┆ 5 ┆ 30 ┆ 14 ┆ 505 │
+│ 2065-01-01 10:20:30.067 ┆ 10 ┆ 20 ┆ 30 ┆ 67 │
+└─────────────────────────┴──────┴────────┴────────┴─────────────┘
+
minute()
+
+Extract minutes from underlying DateTime representation.
+Returns the minute number from 0 to 59.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1),
+... datetime(2024, 10, 13, 5, 30),
+... datetime(2065, 1, 1, 10, 20),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour minute
+0 1978-01-01 01:01:00 1 1
+1 2024-10-13 05:30:00 5 30
+2 2065-01-01 10:20:00 10 20
+>>> func(df_pl)
+shape: (3, 3)
+┌─────────────────────┬──────┬────────┐
+│ datetime ┆ hour ┆ minute │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 │
+╞═════════════════════╪══════╪════════╡
+│ 1978-01-01 01:01:00 ┆ 1 ┆ 1 │
+│ 2024-10-13 05:30:00 ┆ 5 ┆ 30 │
+│ 2065-01-01 10:20:00 ┆ 10 ┆ 20 │
+└─────────────────────┴──────┴────────┘
+
month()
+
+Extract month from underlying DateTime representation.
+Returns the month number starting from 1. The return value ranges from 1 to 12.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.year().alias("year"),
+... nw.col("datetime").dt.month().alias("month"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime year month
+0 1978-06-01 1978 6
+1 2024-12-13 2024 12
+2 2065-01-01 2065 1
+>>> func(df_pl)
+shape: (3, 3)
+┌─────────────────────┬──────┬───────┐
+│ datetime ┆ year ┆ month │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i32 ┆ i8 │
+╞═════════════════════╪══════╪═══════╡
+│ 1978-06-01 00:00:00 ┆ 1978 ┆ 6 │
+│ 2024-12-13 00:00:00 ┆ 2024 ┆ 12 │
+│ 2065-01-01 00:00:00 ┆ 2065 ┆ 1 │
+└─────────────────────┴──────┴───────┘
+
nanosecond()
+
+Extract Nanoseconds from underlying DateTime representation
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 500000),
+... datetime(2065, 1, 1, 10, 20, 30, 60000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... nw.col("datetime").dt.nanosecond().alias("nanosecond"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour minute second nanosecond
+0 1978-01-01 01:01:01.000 1 1 1 0
+1 2024-10-13 05:30:14.500 5 30 14 500000000
+2 2065-01-01 10:20:30.060 10 20 30 60000000
+>>> func(df_pl)
+shape: (3, 5)
+┌─────────────────────────┬──────┬────────┬────────┬────────────┐
+│ datetime ┆ hour ┆ minute ┆ second ┆ nanosecond │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
+╞═════════════════════════╪══════╪════════╪════════╪════════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
+│ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000000 │
+│ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000000 │
+└─────────────────────────┴──────┴────────┴────────┴────────────┘
+
ordinal_day()
+
+Get ordinal day.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {"a": [datetime(2020, 1, 1), datetime(2020, 8, 3)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(a_ordinal_day=nw.col("a").dt.ordinal_day())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_ordinal_day
+0 2020-01-01 1
+1 2020-08-03 216
+>>> func(df_pl)
+shape: (2, 2)
+┌─────────────────────┬───────────────┐
+│ a ┆ a_ordinal_day │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i16 │
+╞═════════════════════╪═══════════════╡
+│ 2020-01-01 00:00:00 ┆ 1 │
+│ 2020-08-03 00:00:00 ┆ 216 │
+└─────────────────────┴───────────────┘
+
replace_time_zone(time_zone)
+
+Replace time zone.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str | None
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Examples:
+>>> from datetime import datetime, timezone
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").dt.replace_time_zone("Asia/Kathmandu"))
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> func(df_pd)
+ a
+0 2024-01-01 00:00:00+05:45
+1 2024-01-02 00:00:00+05:45
+>>> func(df_pl)
+shape: (2, 1)
+┌──────────────────────────────┐
+│ a │
+│ --- │
+│ datetime[μs, Asia/Kathmandu] │
+╞══════════════════════════════╡
+│ 2024-01-01 00:00:00 +0545 │
+│ 2024-01-02 00:00:00 +0545 │
+└──────────────────────────────┘
+>>> func(df_pa)
+pyarrow.Table
+a: timestamp[us, tz=Asia/Kathmandu]
+----
+a: [[2023-12-31 18:15:00.000000Z,2024-01-01 18:15:00.000000Z]]
+
second()
+
+Extract seconds from underlying DateTime representation.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1),
+... datetime(2024, 10, 13, 5, 30, 14),
+... datetime(2065, 1, 1, 10, 20, 30),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour minute second
+0 1978-01-01 01:01:01 1 1 1
+1 2024-10-13 05:30:14 5 30 14
+2 2065-01-01 10:20:30 10 20 30
+>>> func(df_pl)
+shape: (3, 4)
+┌─────────────────────┬──────┬────────┬────────┐
+│ datetime ┆ hour ┆ minute ┆ second │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 │
+╞═════════════════════╪══════╪════════╪════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 │
+│ 2024-10-13 05:30:14 ┆ 5 ┆ 30 ┆ 14 │
+│ 2065-01-01 10:20:30 ┆ 10 ┆ 20 ┆ 30 │
+└─────────────────────┴──────┴────────┴────────┘
+
timestamp(time_unit='us')
+
+Return a timestamp in the given time unit.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['ns', 'us', 'ms']
+ |
+
+
+
+ {'ns', 'us', 'ms'} +Time unit. + |
+
+ 'us'
+ |
+
Examples:
+>>> from datetime import date
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {"date": [date(2001, 1, 1), None, date(2001, 1, 3)]}
+>>> df_pd = pd.DataFrame(data, dtype="datetime64[ns]")
+>>> df_pl = pl.DataFrame(data)
+>>> df_pa = pa.table(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("date").dt.timestamp().alias("timestamp_us"),
+... nw.col("date").dt.timestamp("ms").alias("timestamp_ms"),
+... )
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> func(df_pd)
+ date timestamp_us timestamp_ms
+0 2001-01-01 9.783072e+14 9.783072e+11
+1 NaT NaN NaN
+2 2001-01-03 9.784800e+14 9.784800e+11
+>>> func(df_pl)
+shape: (3, 3)
+┌────────────┬─────────────────┬──────────────┐
+│ date ┆ timestamp_us ┆ timestamp_ms │
+│ --- ┆ --- ┆ --- │
+│ date ┆ i64 ┆ i64 │
+╞════════════╪═════════════════╪══════════════╡
+│ 2001-01-01 ┆ 978307200000000 ┆ 978307200000 │
+│ null ┆ null ┆ null │
+│ 2001-01-03 ┆ 978480000000000 ┆ 978480000000 │
+└────────────┴─────────────────┴──────────────┘
+>>> func(df_pa)
+pyarrow.Table
+date: date32[day]
+timestamp_us: int64
+timestamp_ms: int64
+----
+date: [[2001-01-01,null,2001-01-03]]
+timestamp_us: [[978307200000000,null,978480000000000]]
+timestamp_ms: [[978307200000,null,978480000000]]
+
total_microseconds()
+
+Get total microseconds.
+ + +The function outputs the total microseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = {
+... "a": [
+... timedelta(microseconds=10),
+... timedelta(milliseconds=1, microseconds=200),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... a_total_microseconds=nw.col("a").dt.total_microseconds()
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_total_microseconds
+0 0 days 00:00:00.000010 10
+1 0 days 00:00:00.001200 1200
+>>> func(df_pl)
+shape: (2, 2)
+┌──────────────┬──────────────────────┐
+│ a ┆ a_total_microseconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪══════════════════════╡
+│ 10µs ┆ 10 │
+│ 1200µs ┆ 1200 │
+└──────────────┴──────────────────────┘
+
total_milliseconds()
+
+Get total milliseconds.
+ + +The function outputs the total milliseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = {
+... "a": [
+... timedelta(milliseconds=10),
+... timedelta(milliseconds=20, microseconds=40),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... a_total_milliseconds=nw.col("a").dt.total_milliseconds()
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_total_milliseconds
+0 0 days 00:00:00.010000 10
+1 0 days 00:00:00.020040 20
+>>> func(df_pl)
+shape: (2, 2)
+┌──────────────┬──────────────────────┐
+│ a ┆ a_total_milliseconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪══════════════════════╡
+│ 10ms ┆ 10 │
+│ 20040µs ┆ 20 │
+└──────────────┴──────────────────────┘
+
total_minutes()
+
+Get total minutes.
+ + +The function outputs the total minutes in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = {"a": [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(a_total_minutes=nw.col("a").dt.total_minutes())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_total_minutes
+0 0 days 00:10:00 10
+1 0 days 00:20:40 20
+>>> func(df_pl)
+shape: (2, 2)
+┌──────────────┬─────────────────┐
+│ a ┆ a_total_minutes │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪═════════════════╡
+│ 10m ┆ 10 │
+│ 20m 40s ┆ 20 │
+└──────────────┴─────────────────┘
+
total_nanoseconds()
+
+Get total nanoseconds.
+ + +The function outputs the total nanoseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"]
+>>> df_pd = pd.DataFrame({"a": pd.to_datetime(data)})
+>>> df_pl = pl.DataFrame({"a": data}).with_columns(
+... pl.col("a").str.to_datetime(time_unit="ns")
+... )
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... a_diff_total_nanoseconds=nw.col("a").diff().dt.total_nanoseconds()
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_diff_total_nanoseconds
+0 2024-01-01 00:00:00.000000001 NaN
+1 2024-01-01 00:00:00.000000002 1.0
+>>> func(df_pl)
+shape: (2, 2)
+┌───────────────────────────────┬──────────────────────────┐
+│ a ┆ a_diff_total_nanoseconds │
+│ --- ┆ --- │
+│ datetime[ns] ┆ i64 │
+╞═══════════════════════════════╪══════════════════════════╡
+│ 2024-01-01 00:00:00.000000001 ┆ null │
+│ 2024-01-01 00:00:00.000000002 ┆ 1 │
+└───────────────────────────────┴──────────────────────────┘
+
total_seconds()
+
+Get total seconds.
+ + +The function outputs the total seconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = {"a": [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(a_total_seconds=nw.col("a").dt.total_seconds())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_total_seconds
+0 0 days 00:00:10 10
+1 0 days 00:00:20.040000 20
+>>> func(df_pl)
+shape: (2, 2)
+┌──────────────┬─────────────────┐
+│ a ┆ a_total_seconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪═════════════════╡
+│ 10s ┆ 10 │
+│ 20s 40ms ┆ 20 │
+└──────────────┴─────────────────┘
+
to_string(format)
+
+Convert a Date/Time/Datetime column into a String column with the given format.
+ + +Unfortunately, different libraries interpret format directives a bit +differently.
+"%.f"
for fractional seconds,
+ whereas pandas and Python stdlib use ".%f"
."%S"
as "seconds, including fractional seconds"
+ whereas most other tools interpret it as "just seconds, as 2 digits".Therefore, we make the following adjustments:
+"%S.%f"
with "%S%.f"
."%S.%f"
with "%S"
.Workarounds like these don't make us happy, and we try to avoid them as +much as possible, but here we feel like it's the best compromise.
+If you just want to format a date/datetime Series as a local datetime +string, and have it work as consistently as possible across libraries, +we suggest using:
+"%Y-%m-%dT%H:%M:%S%.f"
for datetimes"%Y-%m-%d"
for datesthough note that, even then, different tools may return a different number +of trailing zeros. Nonetheless, this is probably consistent enough for +most applications.
+If you have an application where this is not enough, please open an issue +and let us know.
+Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = [
+... datetime(2020, 3, 1),
+... datetime(2020, 4, 1),
+... datetime(2020, 5, 1),
+... ]
+>>> df_pd = pd.DataFrame({"a": data})
+>>> df_pl = pl.DataFrame({"a": data})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").dt.to_string("%Y/%m/%d %H:%M:%S"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+0 2020/03/01 00:00:00
+1 2020/04/01 00:00:00
+2 2020/05/01 00:00:00
+
>>> func(df_pl)
+shape: (3, 1)
+┌─────────────────────┐
+│ a │
+│ --- │
+│ str │
+╞═════════════════════╡
+│ 2020/03/01 00:00:00 │
+│ 2020/04/01 00:00:00 │
+│ 2020/05/01 00:00:00 │
+└─────────────────────┘
+
year()
+
+Extract year from underlying DateTime representation.
+Returns the year number in the calendar date.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(nw.col("datetime").dt.year().alias("year"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime year
+0 1978-06-01 1978
+1 2024-12-13 2024
+2 2065-01-01 2065
+>>> func(df_pl)
+shape: (3, 2)
+┌─────────────────────┬──────┐
+│ datetime ┆ year │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i32 │
+╞═════════════════════╪══════╡
+│ 1978-06-01 00:00:00 ┆ 1978 │
+│ 2024-12-13 00:00:00 ┆ 2024 │
+│ 2065-01-01 00:00:00 ┆ 2065 │
+└─────────────────────┴──────┘
+
narwhals.Expr.name
keep()
+
+Keep the original root name of the expression.
+ + +This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("foo").alias("alias_for_foo").name.keep())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd).columns
+Index(['foo'], dtype='object')
+>>> func(df_pl).columns
+['foo']
+
map(function)
+
+Rename the output of an expression by mapping a function over the root name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ function
+ |
+
+ Callable[[str], str]
+ |
+
+
+
+ Function that maps a root name to a new name. + |
+ + required + | +
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> renaming_func = lambda s: s[::-1] # reverse column name
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("foo", "BAR").name.map(renaming_func))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd).columns
+Index(['oof', 'RAB'], dtype='object')
+>>> func(df_pl).columns
+['oof', 'RAB']
+
prefix(prefix)
+
+Add a prefix to the root column name of the expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ prefix
+ |
+
+ str
+ |
+
+
+
+ Prefix to add to the root column name. + |
+ + required + | +
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def add_colname_prefix(df, prefix):
+... return df.select(nw.col("foo", "BAR").name.prefix(prefix))
+
We can then pass either pandas or Polars to func
:
>>> add_colname_prefix(df_pd, "with_prefix_").columns
+Index(['with_prefix_foo', 'with_prefix_BAR'], dtype='object')
+
>>> add_colname_prefix(df_pl, "with_prefix_").columns
+['with_prefix_foo', 'with_prefix_BAR']
+
suffix(suffix)
+
+Add a suffix to the root column name of the expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ suffix
+ |
+
+ str
+ |
+
+
+
+ Suffix to add to the root column name. + |
+ + required + | +
This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def add_colname_suffix(df, suffix):
+... return df.select(nw.col("foo", "BAR").name.suffix(suffix))
+
We can then pass either pandas or Polars to func
:
>>> add_colname_suffix(df_pd, "_with_suffix").columns
+Index(['foo_with_suffix', 'BAR_with_suffix'], dtype='object')
+>>> add_colname_suffix(df_pl, "_with_suffix").columns
+['foo_with_suffix', 'BAR_with_suffix']
+
to_lowercase()
+
+Make the root column name lowercase.
+ + +This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def to_lower(df):
+... return df.select(nw.col("foo", "BAR").name.to_lowercase())
+
We can then pass either pandas or Polars to func
:
>>> to_lower(df_pd).columns
+Index(['foo', 'bar'], dtype='object')
+>>> to_lower(df_pl).columns
+['foo', 'bar']
+
to_uppercase()
+
+Make the root column name uppercase.
+ + +This will undo any previous renaming operations on the expression. +Due to implementation constraints, this method can only be called as the last +expression in a chain. Only one name operation per expression will work.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"foo": [1, 2], "BAR": [4, 5]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def to_upper(df):
+... return df.select(nw.col("foo", "BAR").name.to_uppercase())
+
We can then pass either pandas or Polars to func
:
>>> to_upper(df_pd).columns
+Index(['FOO', 'BAR'], dtype='object')
+>>> to_upper(df_pl).columns
+['FOO', 'BAR']
+
narwhals.Expr.str
contains(pattern, *, literal=False)
+
+Check if string contains a substring that matches a pattern.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A Character sequence or valid regular expression pattern. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ If True, treats the pattern as a literal string. + If False, assumes the pattern is a regular expression. + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... default_match=nw.col("pets").str.contains("parrot|Dove"),
+... case_insensitive_match=nw.col("pets").str.contains("(?i)parrot|Dove"),
+... literal_match=nw.col("pets").str.contains(
+... "parrot|Dove", literal=True
+... ),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ pets default_match case_insensitive_match literal_match
+0 cat False False False
+1 dog False False False
+2 rabbit and parrot True True False
+3 dove False True False
+4 None None None None
+>>> func(df_pl)
+shape: (5, 4)
+┌───────────────────┬───────────────┬────────────────────────┬───────────────┐
+│ pets ┆ default_match ┆ case_insensitive_match ┆ literal_match │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ str ┆ bool ┆ bool ┆ bool │
+╞═══════════════════╪═══════════════╪════════════════════════╪═══════════════╡
+│ cat ┆ false ┆ false ┆ false │
+│ dog ┆ false ┆ false ┆ false │
+│ rabbit and parrot ┆ true ┆ true ┆ false │
+│ dove ┆ false ┆ true ┆ false │
+│ null ┆ null ┆ null ┆ null │
+└───────────────────┴───────────────┴────────────────────────┴───────────────┘
+
ends_with(suffix)
+
+Check if string values end with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ suffix
+ |
+
+ str
+ |
+
+
+
+ suffix substring + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(has_suffix=nw.col("fruits").str.ends_with("ngo"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits has_suffix
+0 apple False
+1 mango True
+2 None None
+
>>> func(df_pl)
+shape: (3, 2)
+┌────────┬────────────┐
+│ fruits ┆ has_suffix │
+│ --- ┆ --- │
+│ str ┆ bool │
+╞════════╪════════════╡
+│ apple ┆ false │
+│ mango ┆ true │
+│ null ┆ null │
+└────────┴────────────┘
+
head(n=5)
+
+Take the first n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is not supported. + |
+
+ 5
+ |
+
If the length of the string has fewer than n
characters, the full string is returned.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(lyrics_head=nw.col("lyrics").str.head())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ lyrics lyrics_head
+0 Atatata Atata
+1 taata taata
+2 taatatata taata
+3 zukkyun zukky
+
>>> func(df_pl)
+shape: (4, 2)
+┌───────────┬─────────────┐
+│ lyrics ┆ lyrics_head │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═══════════╪═════════════╡
+│ Atatata ┆ Atata │
+│ taata ┆ taata │
+│ taatatata ┆ taata │
+│ zukkyun ┆ zukky │
+└───────────┴─────────────┘
+
len_chars()
+
+Return the length of each string as the number of characters.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"words": ["foo", "Café", "345", "東京", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(words_len=nw.col("words").str.len_chars())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ words words_len
+0 foo 3.0
+1 Café 4.0
+2 345 3.0
+3 東京 2.0
+4 None NaN
+
>>> func(df_pl)
+shape: (5, 2)
+┌───────┬───────────┐
+│ words ┆ words_len │
+│ --- ┆ --- │
+│ str ┆ u32 │
+╞═══════╪═══════════╡
+│ foo ┆ 3 │
+│ Café ┆ 4 │
+│ 345 ┆ 3 │
+│ 東京 ┆ 2 │
+│ null ┆ null │
+└───────┴───────────┘
+
replace(pattern, value, *, literal=False, n=1)
+
+Replace first matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of matches to replace. + |
+
+ 1
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"foo": ["123abc", "abc abc123"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... df = df.with_columns(replaced=nw.col("foo").str.replace("abc", ""))
+... return df.to_dict(as_series=False)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+{'foo': ['123abc', 'abc abc123'], 'replaced': ['123', ' abc123']}
+
>>> func(df_pl)
+{'foo': ['123abc', 'abc abc123'], 'replaced': ['123', ' abc123']}
+
replace_all(pattern, value, *, literal=False)
+
+Replace all matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"foo": ["123abc", "abc abc123"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... df = df.with_columns(replaced=nw.col("foo").str.replace_all("abc", ""))
+... return df.to_dict(as_series=False)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+{'foo': ['123abc', 'abc abc123'], 'replaced': ['123', ' 123']}
+
>>> func(df_pl)
+{'foo': ['123abc', 'abc abc123'], 'replaced': ['123', ' 123']}
+
slice(offset, length=None)
+
+Create subslices of the string values of an expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ offset
+ |
+
+ int
+ |
+
+
+
+ Start index. Negative indexing is supported. + |
+ + required + | +
+ length
+ |
+
+ int | None
+ |
+
+
+
+ Length of the slice. If set to |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"s": ["pear", None, "papaya", "dragonfruit"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(s_sliced=nw.col("s").str.slice(4, length=3))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ s s_sliced
+0 pear
+1 None None
+2 papaya ya
+3 dragonfruit onf
+
>>> func(df_pl)
+shape: (4, 2)
+┌─────────────┬──────────┐
+│ s ┆ s_sliced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═════════════╪══════════╡
+│ pear ┆ │
+│ null ┆ null │
+│ papaya ┆ ya │
+│ dragonfruit ┆ onf │
+└─────────────┴──────────┘
+
Using negative indexes:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(s_sliced=nw.col("s").str.slice(-3))
+
>>> func(df_pd)
+ s s_sliced
+0 pear ear
+1 None None
+2 papaya aya
+3 dragonfruit uit
+
>>> func(df_pl)
+shape: (4, 2)
+┌─────────────┬──────────┐
+│ s ┆ s_sliced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═════════════╪══════════╡
+│ pear ┆ ear │
+│ null ┆ null │
+│ papaya ┆ aya │
+│ dragonfruit ┆ uit │
+└─────────────┴──────────┘
+
starts_with(prefix)
+
+Check if string values start with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ prefix
+ |
+
+ str
+ |
+
+
+
+ prefix substring + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(has_prefix=nw.col("fruits").str.starts_with("app"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits has_prefix
+0 apple True
+1 mango False
+2 None None
+
>>> func(df_pl)
+shape: (3, 2)
+┌────────┬────────────┐
+│ fruits ┆ has_prefix │
+│ --- ┆ --- │
+│ str ┆ bool │
+╞════════╪════════════╡
+│ apple ┆ true │
+│ mango ┆ false │
+│ null ┆ null │
+└────────┴────────────┘
+
strip_chars(characters=None)
+
+Remove leading and trailing characters.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ characters
+ |
+
+ str | None
+ |
+
+
+
+ The set of characters to be removed. All combinations of this set of characters will be stripped from the start and end of the string. If set to None (default), all leading and trailing whitespace is removed instead. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["apple", "\nmango"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... df = df.with_columns(stripped=nw.col("fruits").str.strip_chars())
+... return df.to_dict(as_series=False)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+{'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']}
+
>>> func(df_pl)
+{'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']}
+
tail(n=5)
+
+Take the last n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is not supported. + |
+
+ 5
+ |
+
If the length of the string has fewer than n
characters, the full string is returned.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(lyrics_tail=nw.col("lyrics").str.tail())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ lyrics lyrics_tail
+0 Atatata atata
+1 taata taata
+2 taatatata atata
+3 zukkyun kkyun
+
>>> func(df_pl)
+shape: (4, 2)
+┌───────────┬─────────────┐
+│ lyrics ┆ lyrics_tail │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═══════════╪═════════════╡
+│ Atatata ┆ atata │
+│ taata ┆ taata │
+│ taatatata ┆ atata │
+│ zukkyun ┆ kkyun │
+└───────────┴─────────────┘
+
to_datetime(format=None)
+
+Convert to Datetime dtype.
+ + +pandas defaults to nanosecond time unit, Polars to microsecond. +Prior to pandas 2.0, nanoseconds were the only time unit supported +in pandas, with no ability to set any other one. The ability to +set the time unit in pandas, if the version permits, will arrive.
+As different backends auto-infer format in different ways, if format=None
+there is no guarantee that the result will be equal.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ format
+ |
+
+ str | None
+ |
+
+
+
+ Format to use for conversion. If set to None (default), the format is +inferred from the data. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = ["2020-01-01", "2020-01-02"]
+>>> df_pd = pd.DataFrame({"a": data})
+>>> df_pl = pl.DataFrame({"a": data})
+>>> df_pa = pa.table({"a": data})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").str.to_datetime(format="%Y-%m-%d"))
+
We can then pass any supported library such as pandas, Polars, or PyArrow:
+>>> func(df_pd)
+ a
+0 2020-01-01
+1 2020-01-02
+>>> func(df_pl)
+shape: (2, 1)
+┌─────────────────────┐
+│ a │
+│ --- │
+│ datetime[μs] │
+╞═════════════════════╡
+│ 2020-01-01 00:00:00 │
+│ 2020-01-02 00:00:00 │
+└─────────────────────┘
+>>> func(df_pa)
+pyarrow.Table
+a: timestamp[us]
+----
+a: [[2020-01-01 00:00:00.000000,2020-01-02 00:00:00.000000]]
+
to_lowercase()
+
+Transform string to lowercase variant.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["APPLE", "MANGO", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(lower_col=nw.col("fruits").str.to_lowercase())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits lower_col
+0 APPLE apple
+1 MANGO mango
+2 None None
+
>>> func(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ lower_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ APPLE ┆ apple │
+│ MANGO ┆ mango │
+│ null ┆ null │
+└────────┴───────────┘
+
to_uppercase()
+
+Transform string to uppercase variant.
+ + +The PyArrow backend will convert 'ß' to 'ẞ' instead of 'SS'. +For more info see the related issue. +There may be other unicode-edge-case-related variations across implementations.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(upper_col=nw.col("fruits").str.to_uppercase())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits upper_col
+0 apple APPLE
+1 mango MANGO
+2 None None
+
>>> func(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ upper_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ apple ┆ APPLE │
+│ mango ┆ MANGO │
+│ null ┆ null │
+└────────┴───────────┘
+
narwhals.GroupBy
agg(*aggs, **named_aggs)
+
+Compute aggregations for each group of a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ aggs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Aggregations to compute for each group of the group by operation, +specified as positional arguments. + |
+
+ ()
+ |
+
+ named_aggs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional aggregations, specified as keyword arguments. + |
+
+ {}
+ |
+
Examples:
+Group by one column or by multiple columns and call agg
to compute
+the grouped sum of another column.
>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+
We define library agnostic functions:
+>>> @nw.narwhalify
+... def func(df):
+... return df.group_by("a").agg(nw.col("b").sum()).sort("a")
+
>>> @nw.narwhalify
+... def func_mult_col(df):
+... return df.group_by("a", "b").agg(nw.sum("c")).sort("a", "b")
+
We can then pass either pandas or Polars to func
and func_mult_col
:
>>> func(df_pd)
+ a b
+0 a 2
+1 b 5
+2 c 3
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+>>> func_mult_col(df_pd)
+ a b c
+0 a 1 8
+1 b 2 4
+2 b 3 2
+3 c 3 1
+>>> func_mult_col(df_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 8 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
narwhals.LazyFrame
Narwhals DataFrame, backed by a native dataframe.
+The native dataframe might be pandas.DataFrame, polars.LazyFrame, ...
+This class is not meant to be instantiated directly - instead, use
+narwhals.from_native
.
columns: list[str]
+
+
+ property
+
+
+Get column names.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.columns
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+['foo', 'bar', 'ham']
+>>> func(lf_pl)
+['foo', 'bar', 'ham']
+
schema: Schema
+
+
+ property
+
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> lf = nw.from_native(lf_pl)
+>>> lf.schema
+Schema({'foo': Int64, 'bar': Float64, 'ham', String})
+
clone()
+
+Create a copy of this DataFrame.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function in which we copy the DataFrame:
+>>> @nw.narwhalify
+... def func(df):
+... return df.clone()
+
>>> func(df_pd)
+ a b
+0 1 3
+1 2 4
+
>>> func(df_pl).collect()
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
collect()
+
+Materialize this LazyFrame into a DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ DataFrame + |
+
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> lf_pl = pl.LazyFrame(
+... {
+... "a": ["a", "b", "a", "b", "b", "c"],
+... "b": [1, 2, 3, 4, 5, 6],
+... "c": [6, 5, 4, 3, 2, 1],
+... }
+... )
+>>> lf = nw.from_native(lf_pl)
+>>> lf
+┌───────────────────────────────────────┐
+| Narwhals LazyFrame |
+| Use `.to_native` to see native output |
+└───────────────────────────────────────┘
+>>> df = lf.group_by("a").agg(nw.all().sum()).collect()
+>>> df.to_native().sort("a")
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 4 ┆ 10 │
+│ b ┆ 11 ┆ 10 │
+│ c ┆ 6 ┆ 1 │
+└─────┴─────┴─────┘
+
collect_schema()
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> lf = nw.from_native(lf_pl)
+>>> lf.collect_schema()
+Schema({'foo': Int64, 'bar': Float64, 'ham': String})
+
drop(*columns, strict=True)
+
+Remove columns from the LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *columns
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of the columns that should be removed from the dataframe. + |
+
+ ()
+ |
+
+ strict
+ |
+
+ bool
+ |
+
+
+
+ Validate that all column names exist in the schema and throw an +exception if a column name does not exist in the schema. + |
+
+ True
+ |
+
strict
argument is ignored for polars<1.0.0
.
Please consider upgrading to a newer version or pass to eager mode.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop("ham")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar
+0 1 6.0
+1 2 7.0
+2 3 8.0
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 1 ┆ 6.0 │
+│ 2 ┆ 7.0 │
+│ 3 ┆ 8.0 │
+└─────┴─────┘
+
Use positional arguments to drop multiple columns.
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop("foo", "ham")
+
>>> func(df_pd)
+ bar
+0 6.0
+1 7.0
+2 8.0
+>>> func(lf_pl).collect()
+shape: (3, 1)
+┌─────┐
+│ bar │
+│ --- │
+│ f64 │
+╞═════╡
+│ 6.0 │
+│ 7.0 │
+│ 8.0 │
+└─────┘
+
drop_nulls(subset=None)
+
+Drop null values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) for which null values are considered. If set to None +(default), use all columns. + |
+
+ None
+ |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop_nulls()
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ a ba
+0 1.0 1.0
+>>> func(df_pl).collect()
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ ba │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 1.0 ┆ 1.0 │
+└─────┴─────┘
+
filter(*predicates)
+
+Filter the rows in the LazyFrame based on a predicate expression.
+The original order of the remaining rows is preserved.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *predicates
+ |
+
+ IntoExpr | Iterable[IntoExpr] | list[bool]
+ |
+
+
+
+ Expression that evaluates to a boolean Series. Can +also be a (single!) boolean list. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function in which we filter on +one condition.
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter(nw.col("foo") > 1)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+1 2 7 b
+2 3 8 c
+>>> func(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Filter on multiple conditions:
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a"))
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
Provide multiple filters using *args
syntax:
>>> @nw.narwhalify
+... def func(df):
+... dframe = df.filter(
+... nw.col("foo") == 1,
+... nw.col("ham") == "a",
+... )
+... return dframe
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
Filter on an OR condition:
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter((nw.col("foo") == 1) | (nw.col("ham") == "c"))
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+2 3 8 c
+>>> func(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
gather_every(n, offset=0)
+
+Take every nth row in the DataFrame and return as a new DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function in which gather every 2 rows, +starting from a offset of 1:
+>>> @nw.narwhalify
+... def func(df):
+... return df.gather_every(n=2, offset=1)
+
>>> func(df_pd)
+ a b
+1 2 6
+3 4 8
+
>>> func(lf_pl).collect()
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 2 ┆ 6 │
+│ 4 ┆ 8 │
+└─────┴─────┘
+
group_by(*keys, drop_null_keys=False)
+
+Start a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *keys
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to group by. Accepts expression input. Strings are +parsed as column names. + |
+
+ ()
+ |
+
+ drop_null_keys
+ |
+
+ bool
+ |
+
+
+
+ if True, then groups where any key is null won't be +included in the result. + |
+
+ False
+ |
+
Examples:
+Group by one column and call agg
to compute the grouped sum of
+another column.
>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
Let's define a dataframe-agnostic function in which we group by one column
+and call agg
to compute the grouped sum of another column.
>>> @nw.narwhalify
+... def func(df):
+... return df.group_by("a").agg(nw.col("b").sum()).sort("a")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 a 2
+1 b 5
+2 c 3
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+
Group by multiple columns by passing a list of column names.
+>>> @nw.narwhalify
+... def func(df):
+... return df.group_by(["a", "b"]).agg(nw.max("c")).sort(["a", "b"])
+>>> func(df_pd)
+ a b c
+0 a 1 5
+1 b 2 4
+2 b 3 2
+3 c 3 1
+>>> func(df_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
head(n=5)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 5
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "a": [1, 2, 3, 4, 5, 6],
+... "b": [7, 8, 9, 10, 11, 12],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function that gets the first 3 rows.
+>>> @nw.narwhalify
+... def func(df):
+... return df.head(3)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 1 7
+1 2 8
+2 3 9
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
join(other, on=None, how='inner', *, left_on=None, right_on=None, suffix='_right')
+
+Add a join operation to the Logical Plan.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ Lazy DataFrame to join with. + |
+ + required + | +
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Name(s) of the join columns in both DataFrames. If set, |
+
+ None
+ |
+
+ how
+ |
+
+ Literal['inner', 'left', 'cross', 'semi', 'anti']
+ |
+
+
+
+ Join strategy. +
|
+
+ 'inner'
+ |
+
+ left_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the left DataFrame. + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the right DataFrame. + |
+
+ None
+ |
+
+ suffix
+ |
+
+ str
+ |
+
+
+
+ Suffix to append to columns with a duplicate name. + |
+
+ '_right'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined LazyFrame + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> data_other = {
+... "apple": ["x", "y", "z"],
+... "ham": ["a", "b", "d"],
+... }
+
>>> df_pd = pd.DataFrame(data)
+>>> other_pd = pd.DataFrame(data_other)
+
>>> df_pl = pl.LazyFrame(data)
+>>> other_pl = pl.LazyFrame(data_other)
+
Let's define a dataframe-agnostic function in which we join over "ham" column:
+>>> @nw.narwhalify
+... def join_on_ham(df, other_any):
+... return df.join(other_any, left_on="ham", right_on="ham")
+
We can now pass either pandas or Polars to the function:
+>>> join_on_ham(df_pd, other_pd)
+ foo bar ham apple
+0 1 6.0 a x
+1 2 7.0 b y
+
>>> join_on_ham(df_pl, other_pl).collect()
+shape: (2, 4)
+┌─────┬─────┬─────┬───────┐
+│ foo ┆ bar ┆ ham ┆ apple │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str ┆ str │
+╞═════╪═════╪═════╪═══════╡
+│ 1 ┆ 6.0 ┆ a ┆ x │
+│ 2 ┆ 7.0 ┆ b ┆ y │
+└─────┴─────┴─────┴───────┘
+
join_asof(other, *, left_on=None, right_on=None, on=None, by_left=None, by_right=None, by=None, strategy='backward')
+
+Perform an asof join.
+This is similar to a left-join except that we match on nearest key rather than equal keys.
+Both DataFrames must be sorted by the asof_join key.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Self
+ |
+
+
+
+ DataFrame to join with. + |
+ + required + | +
+ left_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the left join column(s). + |
+
+ None
+ |
+
+ right_on
+ |
+
+ str | None
+ |
+
+
+
+ Name(s) of the right join column(s). + |
+
+ None
+ |
+
+ on
+ |
+
+ str | None
+ |
+
+
+
+ Join column of both DataFrames. If set, left_on and right_on should be None. + |
+
+ None
+ |
+
+ by_left
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ by_right
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ by
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ join on these columns before doing asof join + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['backward', 'forward', 'nearest']
+ |
+
+
+
+ Join strategy. The default is "backward". +
|
+
+ 'backward'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined DataFrame + |
+
Examples:
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data_gdp = {
+... "datetime": [
+... datetime(2016, 1, 1),
+... datetime(2017, 1, 1),
+... datetime(2018, 1, 1),
+... datetime(2019, 1, 1),
+... datetime(2020, 1, 1),
+... ],
+... "gdp": [4164, 4411, 4566, 4696, 4827],
+... }
+>>> data_population = {
+... "datetime": [
+... datetime(2016, 3, 1),
+... datetime(2018, 8, 1),
+... datetime(2019, 1, 1),
+... ],
+... "population": [82.19, 82.66, 83.12],
+... }
+>>> gdp_pd = pd.DataFrame(data_gdp)
+>>> population_pd = pd.DataFrame(data_population)
+>>> gdp_pl = pl.LazyFrame(data_gdp).sort("datetime")
+>>> population_pl = pl.LazyFrame(data_population).sort("datetime")
+
Let's define a dataframe-agnostic function in which we join over "datetime" column:
+>>> @nw.narwhalify
+... def join_asof_datetime(df, other_any, strategy):
+... return df.join_asof(other_any, on="datetime", strategy=strategy)
+
We can now pass either pandas or Polars to the function:
+>>> join_asof_datetime(population_pd, gdp_pd, strategy="backward")
+ datetime population gdp
+0 2016-03-01 82.19 4164
+1 2018-08-01 82.66 4566
+2 2019-01-01 83.12 4696
+
>>> join_asof_datetime(population_pl, gdp_pl, strategy="backward").collect()
+shape: (3, 3)
+┌─────────────────────┬────────────┬──────┐
+│ datetime ┆ population ┆ gdp │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ f64 ┆ i64 │
+╞═════════════════════╪════════════╪══════╡
+│ 2016-03-01 00:00:00 ┆ 82.19 ┆ 4164 │
+│ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │
+│ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │
+└─────────────────────┴────────────┴──────┘
+
Here is a real-world times-series example that uses by
argument.
>>> from datetime import datetime
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data_quotes = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 30),
+... datetime(2016, 5, 25, 13, 30, 0, 41),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 49),
+... datetime(2016, 5, 25, 13, 30, 0, 72),
+... datetime(2016, 5, 25, 13, 30, 0, 75),
+... ],
+... "ticker": [
+... "GOOG",
+... "MSFT",
+... "MSFT",
+... "MSFT",
+... "GOOG",
+... "AAPL",
+... "GOOG",
+... "MSFT",
+... ],
+... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
+... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],
+... }
+>>> data_trades = {
+... "datetime": [
+... datetime(2016, 5, 25, 13, 30, 0, 23),
+... datetime(2016, 5, 25, 13, 30, 0, 38),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... datetime(2016, 5, 25, 13, 30, 0, 48),
+... ],
+... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
+... "price": [51.95, 51.95, 720.77, 720.92, 98.0],
+... "quantity": [75, 155, 100, 100, 100],
+... }
+>>> quotes_pd = pd.DataFrame(data_quotes)
+>>> trades_pd = pd.DataFrame(data_trades)
+>>> quotes_pl = pl.LazyFrame(data_quotes).sort("datetime")
+>>> trades_pl = pl.LazyFrame(data_trades).sort("datetime")
+
Let's define a dataframe-agnostic function in which we join over "datetime" and by "ticker" columns:
+>>> @nw.narwhalify
+... def join_asof_datetime_by_ticker(df, other_any):
+... return df.join_asof(other_any, on="datetime", by="ticker")
+
We can now pass either pandas or Polars to the function:
+>>> join_asof_datetime_by_ticker(trades_pd, quotes_pd)
+ datetime ticker price quantity bid ask
+0 2016-05-25 13:30:00.000023 MSFT 51.95 75 51.95 51.96
+1 2016-05-25 13:30:00.000038 MSFT 51.95 155 51.97 51.98
+2 2016-05-25 13:30:00.000048 GOOG 720.77 100 720.50 720.93
+3 2016-05-25 13:30:00.000048 GOOG 720.92 100 720.50 720.93
+4 2016-05-25 13:30:00.000048 AAPL 98.00 100 NaN NaN
+
>>> join_asof_datetime_by_ticker(trades_pl, quotes_pl).collect()
+shape: (5, 6)
+┌────────────────────────────┬────────┬────────┬──────────┬───────┬────────┐
+│ datetime ┆ ticker ┆ price ┆ quantity ┆ bid ┆ ask │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ str ┆ f64 ┆ i64 ┆ f64 ┆ f64 │
+╞════════════════════════════╪════════╪════════╪══════════╪═══════╪════════╡
+│ 2016-05-25 13:30:00.000023 ┆ MSFT ┆ 51.95 ┆ 75 ┆ 51.95 ┆ 51.96 │
+│ 2016-05-25 13:30:00.000038 ┆ MSFT ┆ 51.95 ┆ 155 ┆ 51.97 ┆ 51.98 │
+│ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.77 ┆ 100 ┆ 720.5 ┆ 720.93 │
+│ 2016-05-25 13:30:00.000048 ┆ GOOG ┆ 720.92 ┆ 100 ┆ 720.5 ┆ 720.93 │
+│ 2016-05-25 13:30:00.000048 ┆ AAPL ┆ 98.0 ┆ 100 ┆ null ┆ null │
+└────────────────────────────┴────────┴────────┴──────────┴───────┴────────┘
+
lazy()
+
+Lazify the DataFrame (if possible).
+If a library does not support lazy execution, then this is a no-op.
+ + +Examples:
+Construct pandas and Polars objects:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.LazyFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.lazy()
+
Note that then, pandas dataframe stay eager, and the Polars LazyFrame stays lazy:
+>>> func(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> func(df_pl)
+<LazyFrame ...>
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.pipe(lambda _df: _df.select("a"))
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ a
+0 1
+1 2
+2 3
+>>> func(df_pl).collect()
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
rename(mapping)
+
+Rename column names.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ mapping
+ |
+
+ dict[str, str]
+ |
+
+
+
+ Key value pairs that map from old name to new name, or a + function that takes the old name as input and returns the + new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.rename({"foo": "apple"})
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ apple bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+>>> func(lf_pl).collect()
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ apple ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═══════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└───────┴─────┴─────┘
+
select(*exprs, **named_exprs)
+
+Select columns from this LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to select, specified as positional arguments. +Accepts expression input. Strings are parsed as column names. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to select, specified as keyword arguments. +The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
If you'd like to select a column whose name isn't a string (for example,
+if you're working with pandas) then you should explicitly use nw.col
instead
+of just passing the column name. For example, to select a column named
+0
use df.select(nw.col(0))
, not df.select(0)
.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
Let's define a dataframe-agnostic function in which we pass the name of a +column to select that column.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select("foo")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo
+0 1
+1 2
+2 3
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+>>> func(lf_pl).collect()
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
Multiple columns can be selected by passing a list of column names.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(["foo", "bar"])
+>>> func(df_pd)
+ foo bar
+0 1 6
+1 2 7
+2 3 8
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+
Multiple columns can also be selected using positional arguments instead of a +list. Expressions are also accepted.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("foo"), nw.col("bar") + 1)
+>>> func(df_pd)
+ foo bar
+0 1 7
+1 2 8
+2 3 9
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
Use keyword arguments to easily name your expression inputs.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(threshold=nw.col("foo") * 2)
+>>> func(df_pd)
+ threshold
+0 2
+1 4
+2 6
+>>> func(df_pl)
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+>>> func(lf_pl).collect()
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+
sort(by, *more_by, descending=False, nulls_last=False)
+
+Sort the LazyFrame by the given columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ by
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) names to sort by. + |
+ + required + | +
+ *more_by
+ |
+
+ str
+ |
+
+
+
+ Additional columns to sort by, specified as positional arguments. + |
+
+ ()
+ |
+
+ descending
+ |
+
+ bool | Sequence[bool]
+ |
+
+
+
+ Sort in descending order. When sorting by multiple columns, can be +specified per column by passing a sequence of booleans. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last; can specify a single boolean applying to +all columns or a sequence of booleans for per-column control. + |
+
+ False
+ |
+
Unlike Polars, it is not possible to specify a sequence of booleans for
+nulls_last
in order to control per-column behaviour. Instead a single
+boolean is applied for all by
columns.
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "a": [1, 2, None],
+... "b": [6.0, 5.0, 4.0],
+... "c": ["a", "c", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_lf = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function in which we sort by multiple +columns in different orders
+>>> @nw.narwhalify
+... def func(df):
+... return df.sort("c", "a", descending=[False, True])
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b c
+0 1.0 6.0 a
+2 NaN 4.0 b
+1 2.0 5.0 c
+>>> func(df_lf).collect()
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ null ┆ 4.0 ┆ b │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
tail(n=5)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 5
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "a": [1, 2, 3, 4, 5, 6],
+... "b": [7, 8, 9, 10, 11, 12],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function that gets the last 3 rows.
+>>> @nw.narwhalify
+... def func(df):
+... return df.tail(3)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+3 4 10
+4 5 11
+5 6 12
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 4 ┆ 10 │
+│ 5 ┆ 11 │
+│ 6 ┆ 12 │
+└─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 4 ┆ 10 │
+│ 5 ┆ 11 │
+│ 6 ┆ 12 │
+└─────┴─────┘
+
to_native()
+
+Convert Narwhals LazyFrame to native one.
+ + +Returns:
+Type | +Description | +
---|---|
+ FrameT
+ |
+
+
+
+ Object of class that user started with. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.LazyFrame(data)
+>>> df_pa = pa.table(data)
+
Calling to_native
on a Narwhals DataFrame returns the native object:
>>> nw.from_native(df_pd).lazy().to_native()
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> nw.from_native(df_pl).to_native().collect()
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ 2 ┆ 7.0 ┆ b │
+│ 3 ┆ 8.0 ┆ c │
+└─────┴─────┴─────┘
+
unique(subset=None, *, keep='any', maintain_order=False)
+
+Drop duplicate rows from this LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ subset
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column name(s) to consider when identifying duplicate rows.
+ If set to |
+
+ None
+ |
+
+ keep
+ |
+
+ Literal['any', 'first', 'last', 'none']
+ |
+
+
+
+ {'first', 'last', 'any', 'none'} +Which of the duplicate rows to keep. +
|
+
+ 'any'
+ |
+
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Keep the same order as the original DataFrame. This may be more
+expensive to compute. Settings this to |
+
+ False
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
LazyFrame |
+ Self
+ |
+
+
+
+ LazyFrame with unique rows. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3, 1],
+... "bar": ["a", "a", "a", "a"],
+... "ham": ["b", "b", "b", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.unique(["bar", "ham"])
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 a b
+>>> func(lf_pl).collect()
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+└─────┴─────┴─────┘
+
unpivot(on=None, *, index=None, variable_name=None, value_name=None)
+
+Unpivot a DataFrame from wide to long format.
+Optionally leaves identifiers set.
+This function is useful to massage a DataFrame into a format where one or more +columns are identifier variables (index) while all other columns, considered +measured variables (on), are "unpivoted" to the row axis leaving just +two non-identifier columns, 'variable' and 'value'.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ on
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as values variables; if |
+
+ None
+ |
+
+ index
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ Column(s) to use as identifier variables. + |
+
+ None
+ |
+
+ variable_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
+ value_name
+ |
+
+ str | None
+ |
+
+
+
+ Name to give to the |
+
+ None
+ |
+
If you're coming from pandas, this is similar to pandas.DataFrame.melt
,
+but with index
replacing id_vars
and on
replacing value_vars
.
+In other frameworks, you might know this operation as pivot_longer
.
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> data = {
+... "a": ["x", "y", "z"],
+... "b": [1, 3, 5],
+... "c": [2, 4, 6],
+... }
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(lf):
+... return (
+... lf.unpivot(on=["b", "c"], index="a").sort(["variable", "a"]).collect()
+... )
+
>>> func(pl.LazyFrame(data))
+shape: (6, 3)
+┌─────┬──────────┬───────┐
+│ a ┆ variable ┆ value │
+│ --- ┆ --- ┆ --- │
+│ str ┆ str ┆ i64 │
+╞═════╪══════════╪═══════╡
+│ x ┆ b ┆ 1 │
+│ y ┆ b ┆ 3 │
+│ z ┆ b ┆ 5 │
+│ x ┆ c ┆ 2 │
+│ y ┆ c ┆ 4 │
+│ z ┆ c ┆ 6 │
+└─────┴──────────┴───────┘
+
with_columns(*exprs, **named_exprs)
+
+Add columns to this LazyFrame.
+Added columns will replace existing columns with the same name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
+ **named_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
LazyFrame |
+ Self
+ |
+
+
+
+ A new LazyFrame with the columns added. + |
+
Creating a new LazyFrame using this method does not create a new copy of +existing data.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "a": [1, 2, 3, 4],
+... "b": [0.5, 4, 10, 13],
+... "c": [True, True, False, True],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
Let's define a dataframe-agnostic function in which we pass an expression +to add it as a new column:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns((nw.col("a") * 2).alias("2a"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b c 2a
+0 1 0.5 True 2
+1 2 4.0 True 4
+2 3 10.0 False 6
+3 4 13.0 True 8
+>>> func(df_pl)
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ 2a │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+>>> func(lf_pl).collect()
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ 2a │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+
with_row_index(name='index')
+
+Insert column which enumerates rows.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_row_index()
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ index a b
+0 0 1 4
+1 1 2 5
+2 2 3 6
+>>> func(df_pl).collect()
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ index ┆ a ┆ b │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ i64 ┆ i64 │
+╞═══════╪═════╪═════╡
+│ 0 ┆ 1 ┆ 4 │
+│ 1 ┆ 2 ┆ 5 │
+│ 2 ┆ 3 ┆ 6 │
+└───────┴─────┴─────┘
+
Here are the top-level functions available in Narwhals.
+ + +all()
+
+Instantiate an expression representing all columns.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+>>> df_pa = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all() * 2)
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b
+0 2 8
+1 4 10
+2 6 12
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 2 ┆ 8 │
+│ 4 ┆ 10 │
+│ 6 ┆ 12 │
+└─────┴─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[2,4,6]]
+b: [[8,10,12]]
+
all_horizontal(*exprs)
+
+Compute the bitwise AND horizontally across columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts expression input. + |
+
+ ()
+ |
+
pandas and Polars handle null values differently.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {
+... "a": [False, False, True, True, False, None],
+... "b": [False, True, True, None, None, None],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select("a", "b", all=nw.all_horizontal("a", "b"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b all
+0 False False False
+1 False True False
+2 True True True
+3 True None False
+4 False None False
+5 None None False
+
>>> func(df_pl)
+shape: (6, 3)
+┌───────┬───────┬───────┐
+│ a ┆ b ┆ all │
+│ --- ┆ --- ┆ --- │
+│ bool ┆ bool ┆ bool │
+╞═══════╪═══════╪═══════╡
+│ false ┆ false ┆ false │
+│ false ┆ true ┆ false │
+│ true ┆ true ┆ true │
+│ true ┆ null ┆ null │
+│ false ┆ null ┆ false │
+│ null ┆ null ┆ null │
+└───────┴───────┴───────┘
+
>>> func(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+all: bool
+----
+a: [[false,false,true,true,false,null]]
+b: [[false,true,true,null,null,null]]
+all: [[false,false,true,null,false,null]]
+
any_horizontal(*exprs)
+
+Compute the bitwise OR horizontally across columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts expression input. + |
+
+ ()
+ |
+
pandas and Polars handle null values differently.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {
+... "a": [False, False, True, True, False, None],
+... "b": [False, True, True, None, None, None],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select("a", "b", any=nw.any_horizontal("a", "b"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b any
+0 False False False
+1 False True True
+2 True True True
+3 True None True
+4 False None False
+5 None None False
+
>>> func(df_pl)
+shape: (6, 3)
+┌───────┬───────┬───────┐
+│ a ┆ b ┆ any │
+│ --- ┆ --- ┆ --- │
+│ bool ┆ bool ┆ bool │
+╞═══════╪═══════╪═══════╡
+│ false ┆ false ┆ false │
+│ false ┆ true ┆ true │
+│ true ┆ true ┆ true │
+│ true ┆ null ┆ true │
+│ false ┆ null ┆ null │
+│ null ┆ null ┆ null │
+└───────┴───────┴───────┘
+
>>> func(df_pa)
+pyarrow.Table
+a: bool
+b: bool
+any: bool
+----
+a: [[false,false,true,true,false,null]]
+b: [[false,true,true,null,null,null]]
+any: [[false,true,true,true,null,null]]
+
col(*names)
+
+Creates an expression that references one or more columns by their name(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ names
+ |
+
+ str | Iterable[str]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [3, 4]})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a") * nw.col("b"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 3
+1 8
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 3 │
+│ 8 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[3,8]]
+
concat(items, *, how='vertical')
+
+Concatenate multiple DataFrames, LazyFrames into a single entity.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ items
+ |
+
+ Iterable[FrameT]
+ |
+
+
+
+ DataFrames, LazyFrames to concatenate. + |
+ + required + | +
+ how
+ |
+
+ Literal['horizontal', 'vertical']
+ |
+
+
+
+ {'vertical', 'horizontal'}
+* vertical: Stacks Series from DataFrames vertically and fills with |
+
+ 'vertical'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ FrameT
+ |
+
+
+
+ A new DataFrame, Lazyframe resulting from the concatenation. + |
+
Raises:
+Type | +Description | +
---|---|
+ NotImplementedError
+ |
+
+
+
+ The items to concatenate should either all be eager, or all lazy + |
+
Examples:
+Let's take an example of vertical concatenation:
+
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> data_2 = {"a": [5, 2], "b": [1, 4]}
+
+>>> df_pd_1 = pd.DataFrame(data_1)
+>>> df_pd_2 = pd.DataFrame(data_2)
+>>> df_pl_1 = pl.DataFrame(data_1)
+>>> df_pl_2 = pl.DataFrame(data_2)
+
+Let's define a dataframe-agnostic function:
+
+>>> @nw.narwhalify
+... def func(df1, df2):
+... return nw.concat([df1, df2], how="vertical")
+
+>>> func(df_pd_1, df_pd_2)
+ a b
+0 1 4
+1 2 5
+2 3 6
+0 5 1
+1 2 4
+>>> func(df_pl_1, df_pl_2)
+shape: (5, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 4 │
+│ 2 ┆ 5 │
+│ 3 ┆ 6 │
+│ 5 ┆ 1 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
+Let's look at case a for horizontal concatenation:
+
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data_1 = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> data_2 = {"c": [5, 2], "d": [1, 4]}
+
+>>> df_pd_1 = pd.DataFrame(data_1)
+>>> df_pd_2 = pd.DataFrame(data_2)
+>>> df_pl_1 = pl.DataFrame(data_1)
+>>> df_pl_2 = pl.DataFrame(data_2)
+
+Defining a dataframe-agnostic function:
+
+>>> @nw.narwhalify
+... def func(df1, df2):
+... return nw.concat([df1, df2], how="horizontal")
+
+>>> func(df_pd_1, df_pd_2)
+ a b c d
+0 1 4 5.0 1.0
+1 2 5 2.0 4.0
+2 3 6 NaN NaN
+
+>>> func(df_pl_1, df_pl_2)
+shape: (3, 4)
+┌─────┬─────┬──────┬──────┐
+│ a ┆ b ┆ c ┆ d │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 ┆ i64 │
+╞═════╪═════╪══════╪══════╡
+│ 1 ┆ 4 ┆ 5 ┆ 1 │
+│ 2 ┆ 5 ┆ 2 ┆ 4 │
+│ 3 ┆ 6 ┆ null ┆ null │
+└─────┴─────┴──────┴──────┘
+
+
+ concat_str(exprs, *more_exprs, separator='', ignore_nulls=False)
+
+Horizontally concatenate columns into a single string column.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Columns to concatenate into a single string column. Accepts expression
+input. Strings are parsed as column names, other non-expression inputs are
+parsed as literals. Non- |
+ + required + | +
+ *more_exprs
+ |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to concatenate into a single string column, +specified as positional arguments. + |
+
+ ()
+ |
+
+ separator
+ |
+
+ str
+ |
+
+
+
+ String that will be used to separate the values of each column. + |
+
+ ''
+ |
+
+ ignore_nulls
+ |
+
+ bool
+ |
+
+
+
+ Ignore null values (default is |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": [1, 2, 3],
+... "b": ["dogs", "cats", None],
+... "c": ["play", "swim", "walk"],
+... }
+
We define a dataframe-agnostic function that computes the horizontal string +concatenation of different columns
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(
+... nw.concat_str(
+... [
+... nw.col("a") * 2,
+... nw.col("b"),
+... nw.col("c"),
+... ],
+... separator=" ",
+... ).alias("full_sentence")
+... )
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(pd.DataFrame(data))
+ full_sentence
+0 2 dogs play
+1 4 cats swim
+2 None
+
>>> func(pl.DataFrame(data))
+shape: (3, 1)
+┌───────────────┐
+│ full_sentence │
+│ --- │
+│ str │
+╞═══════════════╡
+│ 2 dogs play │
+│ 4 cats swim │
+│ null │
+└───────────────┘
+
>>> func(pa.table(data))
+pyarrow.Table
+full_sentence: string
+----
+full_sentence: [["2 dogs play","4 cats swim",null]]
+
from_dict(data, schema=None, *, native_namespace=None)
+
+Instantiate DataFrame from dictionary.
+ + +For pandas-like dataframes, conversion to schema is applied after dataframe +creation.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ data
+ |
+
+ dict[str, Any]
+ |
+
+
+
+ Dictionary to create DataFrame from. + |
+ + required + | +
+ schema
+ |
+
+ dict[str, DType] | Schema | None
+ |
+
+
+
+ The DataFrame schema as Schema or dict of {name: type}. + |
+
+ None
+ |
+
+ native_namespace
+ |
+
+ ModuleType | None
+ |
+
+
+
+ The native library to use for DataFrame creation. Only +necessary if inputs are not Narwhals Series. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A new DataFrame + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
Let's create a new dataframe of the same class as the dataframe we started with, from a dict of new data:
+>>> @nw.narwhalify
+... def func(df):
+... new_data = {"c": [5, 2], "d": [1, 4]}
+... native_namespace = nw.get_native_namespace(df)
+... return nw.from_dict(new_data, native_namespace=native_namespace)
+
Let's see what happens when passing Pandas, Polars or PyArrow input:
+>>> func(pd.DataFrame(data))
+ c d
+0 5 1
+1 2 4
+>>> func(pl.DataFrame(data))
+shape: (2, 2)
+┌─────┬─────┐
+│ c ┆ d │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 5 ┆ 1 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+>>> func(pa.table(data))
+pyarrow.Table
+c: int64
+d: int64
+----
+c: [[5,2]]
+d: [[1,4]]
+
from_native(native_object, *, strict=None, pass_through=None, eager_only=None, eager_or_interchange_only=None, series_only=None, allow_series=None)
+
+from_native(native_object: IntoDataFrameT | IntoSeriesT, *, pass_through: Literal[True], eager_only: None = ..., eager_or_interchange_only: Literal[True], series_only: None = ..., allow_series: Literal[True]) -> DataFrame[IntoDataFrameT]
+
from_native(native_object: IntoDataFrameT | IntoSeriesT, *, pass_through: Literal[True], eager_only: Literal[True], eager_or_interchange_only: None = ..., series_only: None = ..., allow_series: Literal[True]) -> DataFrame[IntoDataFrameT] | Series
+
from_native(native_object: IntoDataFrameT, *, pass_through: Literal[True], eager_only: None = ..., eager_or_interchange_only: Literal[True], series_only: None = ..., allow_series: None = ...) -> DataFrame[IntoDataFrameT]
+
from_native(native_object: T, *, pass_through: Literal[True], eager_only: None = ..., eager_or_interchange_only: Literal[True], series_only: None = ..., allow_series: None = ...) -> T
+
from_native(native_object: IntoDataFrameT, *, pass_through: Literal[True], eager_only: Literal[True], eager_or_interchange_only: None = ..., series_only: None = ..., allow_series: None = ...) -> DataFrame[IntoDataFrameT]
+
from_native(native_object: T, *, pass_through: Literal[True], eager_only: Literal[True], eager_or_interchange_only: None = ..., series_only: None = ..., allow_series: None = ...) -> T
+
from_native(native_object: IntoFrameT | IntoSeriesT, *, pass_through: Literal[True], eager_only: None = ..., eager_or_interchange_only: None = ..., series_only: None = ..., allow_series: Literal[True]) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series
+
from_native(native_object: IntoSeriesT, *, pass_through: Literal[True], eager_only: None = ..., eager_or_interchange_only: None = ..., series_only: Literal[True], allow_series: None = ...) -> Series
+
from_native(native_object: IntoFrameT, *, pass_through: Literal[True], eager_only: None = ..., eager_or_interchange_only: None = ..., series_only: None = ..., allow_series: None = ...) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]
+
from_native(native_object: T, *, pass_through: Literal[True], eager_only: None = ..., eager_or_interchange_only: None = ..., series_only: None = ..., allow_series: None = ...) -> T
+
from_native(native_object: IntoDataFrameT, *, pass_through: Literal[False] = ..., eager_only: None = ..., eager_or_interchange_only: Literal[True], series_only: None = ..., allow_series: None = ...) -> DataFrame[IntoDataFrameT]
+
from_native(native_object: IntoDataFrameT, *, pass_through: Literal[False] = ..., eager_only: Literal[True], eager_or_interchange_only: None = ..., series_only: None = ..., allow_series: None = ...) -> DataFrame[IntoDataFrameT]
+
from_native(native_object: IntoFrameT | IntoSeriesT, *, pass_through: Literal[False] = ..., eager_only: None = ..., eager_or_interchange_only: None = ..., series_only: None = ..., allow_series: Literal[True]) -> DataFrame[Any] | LazyFrame[Any] | Series
+
from_native(native_object: IntoSeriesT, *, pass_through: Literal[False] = ..., eager_only: None = ..., eager_or_interchange_only: None = ..., series_only: Literal[True], allow_series: None = ...) -> Series
+
from_native(native_object: IntoFrameT, *, pass_through: Literal[False] = ..., eager_only: None = ..., eager_or_interchange_only: None = ..., series_only: None = ..., allow_series: None = ...) -> DataFrame[IntoFrameT] | LazyFrame[IntoFrameT]
+
from_native(native_object: Any, *, pass_through: bool, eager_only: bool | None, eager_or_interchange_only: bool | None = None, series_only: bool | None, allow_series: bool | None) -> Any
+
Convert dataframe/series to Narwhals DataFrame, LazyFrame, or Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_object
+ |
+
+ Any
+ |
+
+
+
+ Raw object from user. +Depending on the other arguments, input object can be: +
|
+ + required + | +
+ strict
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if the object isn't supported by Narwhals: +
Deprecated (v1.13.0):
+ Please use |
+
+ None
+ |
+
+ pass_through
+ |
+
+ bool | None
+ |
+
+
+
+ Determine what happens if the object isn't supported by Narwhals: +
|
+
+ None
+ |
+
+ eager_only
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow eager objects. + |
+
+ None
+ |
+
+ eager_or_interchange_only
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow eager objects or objects which +implement the Dataframe Interchange Protocol. + |
+
+ None
+ |
+
+ series_only
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow series. + |
+
+ None
+ |
+
+ allow_series
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to allow series (default is only dataframe / lazyframe). + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ narwhals.DataFrame or narwhals.LazyFrame or narwhals.Series + |
+
from_arrow(native_frame, *, native_namespace)
+
+Construct a DataFrame from an object which supports the PyCapsule Interface.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ native_frame
+ |
+
+ ArrowStreamExportable
+ |
+
+
+
+ Object which implements |
+ + required + | +
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ A new DataFrame + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
Let's define a dataframe-agnostic function which creates a PyArrow +Table.
+>>> @nw.narwhalify
+... def func(df):
+... return nw.from_arrow(df, native_namespace=pa)
+
Let's see what happens when passing pandas / Polars input:
+>>> func(pd.DataFrame(data))
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,3]]
+b: [[4,5,6]]
+>>> func(pl.DataFrame(data))
+pyarrow.Table
+a: int64
+b: int64
+----
+a: [[1,2,3]]
+b: [[4,5,6]]
+
generate_temporary_column_name(n_bytes, columns)
+
+Generates a unique token of specified n_bytes
that is not present in the given
+list of columns.
It relies on python secrets token_hex +function to return a string nbytes random bytes.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n_bytes
+ |
+
+ int
+ |
+
+
+
+ The number of bytes to generate for the token. + |
+ + required + | +
+ columns
+ |
+
+ list[str]
+ |
+
+
+
+ The list of columns to check for uniqueness. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ str
+ |
+
+
+
+ A unique token that is not present in the given list of columns. + |
+
Raises:
+Type | +Description | +
---|---|
+ AssertionError
+ |
+
+
+
+ If a unique token cannot be generated after 100 attempts. + |
+
Examples:
+>>> import narwhals as nw
+>>> columns = ["abc", "xyz"]
+>>> nw.generate_temporary_column_name(n_bytes=8, columns=columns) not in columns
+True
+
get_level(obj)
+
+Level of support Narwhals has for current object.
+This can be one of:
+df.schema
)get_native_namespace(obj)
+
+Get native namespace from object.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
+>>> nw.get_native_namespace(df)
+<module 'pandas'...>
+>>> df = nw.from_native(pl.DataFrame({"a": [1, 2, 3]}))
+>>> nw.get_native_namespace(df)
+<module 'polars'...>
+
is_ordered_categorical(series)
+
+Return whether indices of categories are semantically meaningful.
+This is a convenience function to accessing what would otherwise be
+the is_ordered
property from the DataFrame Interchange Protocol,
+see https://data-apis.org/dataframe-protocol/latest/API.html.
dtype.ordering == "physical"
.dtype.cat.ordered == True
.dtype.type.ordered == True
.Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = ["x", "y"]
+>>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True))
+>>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="physical"))
+
Let's define a library-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return nw.is_ordered_categorical(s)
+
Then, we can pass any supported library to func
:
>>> func(s_pd)
+True
+>>> func(s_pl)
+True
+
len()
+
+Return the number of rows.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.len())
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ len
+0 2
+>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ len │
+│ --- │
+│ u32 │
+╞═════╡
+│ 2 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+len: int64
+----
+len: [[2]]
+
lit(value, dtype=None)
+
+Return an expression representing a literal value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ value
+ |
+
+ Any
+ |
+
+
+
+ The value to use as literal. + |
+ + required + | +
+ dtype
+ |
+
+ DType | None
+ |
+
+
+
+ The data type of the literal value. If not provided, the data type will be inferred. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"a": [1, 2]})
+>>> df_pd = pd.DataFrame({"a": [1, 2]})
+>>> df_pa = pa.table({"a": [1, 2]})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(nw.lit(3))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a literal
+0 1 3
+1 2 3
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────────┐
+│ a ┆ literal │
+│ --- ┆ --- │
+│ i64 ┆ i32 │
+╞═════╪═════════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 3 │
+└─────┴─────────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+literal: int64
+----
+a: [[1,2]]
+literal: [[3,3]]
+
max(*columns)
+
+Return the maximum value.
+ + +Syntactic sugar for nw.col(columns).max()
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. + |
+
+ ()
+ |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.max("a"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 2
+>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2]]
+
max_horizontal(*exprs)
+
+Get the maximum value horizontally across columns.
+ + +We support max_horizontal
over numeric columns only.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": [1, 8, 3],
+... "b": [4, 5, None],
+... "c": ["x", "y", "z"],
+... }
+
We define a dataframe-agnostic function that computes the horizontal max of "a" +and "b" columns:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.max_horizontal("a", "b"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(pd.DataFrame(data))
+ a
+0 4.0
+1 8.0
+2 3.0
+>>> func(pl.DataFrame(data))
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 4 │
+│ 8 │
+│ 3 │
+└─────┘
+>>> func(pa.table(data))
+pyarrow.Table
+a: int64
+----
+a: [[4,8,3]]
+
maybe_align_index(lhs, rhs)
+
+Align lhs
to the Index of rhs
, if they're both pandas-like.
This is only really intended for backwards-compatibility purposes,
+for example if your library already aligns indices for users.
+If you're designing a new library, we highly encourage you to not
+rely on the Index.
+For non-pandas-like inputs, this only checks that lhs
and rhs
+are the same length.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4])
+>>> s_pd = pd.Series([6, 7], index=[4, 3])
+>>> df = nw.from_native(df_pd)
+>>> s = nw.from_native(s_pd, series_only=True)
+>>> nw.to_native(nw.maybe_align_index(df, s))
+ a
+4 2
+3 1
+
maybe_convert_dtypes(obj, *args, **kwargs)
+
+Convert columns or series to the best possible dtypes using dtypes supporting pd.NA
, if df is pandas-like.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ T
+ |
+
+
+
+ DataFrame or Series. + |
+ + required + | +
+ *args
+ |
+
+ bool
+ |
+
+
+
+ Additional arguments which gets passed through. + |
+
+ ()
+ |
+
+ **kwargs
+ |
+
+ bool | str
+ |
+
+
+
+ Additional arguments which gets passed through. + |
+
+ {}
+ |
+
For non-pandas-like inputs, this is a no-op.
+Also, args
and kwargs
just get passed down to the underlying library as-is.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> import numpy as np
+>>> df_pd = pd.DataFrame(
+... {
+... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
+... "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
+... }
+... )
+>>> df = nw.from_native(df_pd)
+>>> nw.to_native(nw.maybe_convert_dtypes(df)).dtypes
+a Int32
+b boolean
+dtype: object
+
maybe_get_index(obj)
+
+Get the index of a DataFrame or a Series, if it's pandas-like.
+ + +This is only really intended for backwards-compatibility purposes,
+for example if your library already aligns indices for users.
+If you're designing a new library, we highly encourage you to not
+rely on the Index.
+For non-pandas-like inputs, this returns None
.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df = nw.from_native(df_pd)
+>>> nw.maybe_get_index(df)
+RangeIndex(start=0, stop=2, step=1)
+>>> series_pd = pd.Series([1, 2])
+>>> series = nw.from_native(series_pd, series_only=True)
+>>> nw.maybe_get_index(series)
+RangeIndex(start=0, stop=2, step=1)
+
maybe_reset_index(obj)
+
+Reset the index to the default integer index of a DataFrame or a Series, if it's pandas-like.
+ + +This is only really intended for backwards-compatibility purposes, +for example if your library already resets the index for users. +If you're designing a new library, we highly encourage you to not +rely on the Index. +For non-pandas-like inputs, this is a no-op.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=([6, 7]))
+>>> df = nw.from_native(df_pd)
+>>> nw.to_native(nw.maybe_reset_index(df))
+ a b
+0 1 4
+1 2 5
+>>> series_pd = pd.Series([1, 2])
+>>> series = nw.from_native(series_pd, series_only=True)
+>>> nw.maybe_get_index(series)
+RangeIndex(start=0, stop=2, step=1)
+
maybe_set_index(obj, column_names=None, *, index=None)
+
+Set the index of a DataFrame or a Series, if it's pandas-like.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ obj
+ |
+
+ T
+ |
+
+
+
+ object for which maybe set the index (can be either a Narwhals |
+ + required + | +
+ column_names
+ |
+
+ str | list[str] | None
+ |
+
+
+
+ name or list of names of the columns to set as index.
+For dataframes, only one of |
+
+ None
+ |
+
+ index
+ |
+
+ Series | list[Series] | None
+ |
+
+
+
+ series or list of series to set as index. + |
+
+ None
+ |
+
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If one of the following condition happens: +
|
+
This is only really intended for backwards-compatibility purposes, for example if +your library already aligns indices for users. +If you're designing a new library, we highly encourage you to not +rely on the Index.
+For non-pandas-like inputs, this is a no-op.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df = nw.from_native(df_pd)
+>>> nw.to_native(nw.maybe_set_index(df, "b"))
+ a
+b
+4 1
+5 2
+
mean(*columns)
+
+Get the mean value.
+ + +Syntactic sugar for nw.col(columns).mean()
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"a": [1, 8, 3]})
+>>> df_pd = pd.DataFrame({"a": [1, 8, 3]})
+>>> df_pa = pa.table({"a": [1, 8, 3]})
+
We define a dataframe agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.mean("a"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 4.0
+>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 4.0 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[4]]
+
mean_horizontal(*exprs)
+
+Compute the mean of all values horizontally across columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {
+... "a": [1, 8, 3],
+... "b": [4, 5, None],
+... "c": ["x", "y", "z"],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function that computes the horizontal mean of "a" +and "b" columns:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.mean_horizontal("a", "b"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 2.5
+1 6.5
+2 3.0
+
>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 2.5 │
+│ 6.5 │
+│ 3.0 │
+└─────┘
+
>>> func(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[2.5,6.5,3]]
+
median(*columns)
+
+Get the median value.
+ + +nw.col(columns).median()
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [4, 5, 2]})
+>>> df_pl = pl.DataFrame({"a": [4, 5, 2]})
+>>> df_pa = pa.table({"a": [4, 5, 2]})
+
Let's define a dataframe agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.median("a"))
+
We can then pass any supported library such as pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 4.0
+>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 4.0 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: double
+----
+a: [[4]]
+
min(*columns)
+
+Return the minimum value.
+ + +Syntactic sugar for nw.col(columns).min()
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. + |
+
+ ()
+ |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pa = pa.table({"a": [1, 2], "b": [5, 10]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.min("b"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ b
+0 5
+>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ b │
+│ --- │
+│ i64 │
+╞═════╡
+│ 5 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+b: int64
+----
+b: [[5]]
+
min_horizontal(*exprs)
+
+Get the minimum value horizontally across columns.
+ + +We support min_horizontal
over numeric columns only.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = {
+... "a": [1, 8, 3],
+... "b": [4, 5, None],
+... "c": ["x", "y", "z"],
+... }
+
We define a dataframe-agnostic function that computes the horizontal min of "a" +and "b" columns:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.min_horizontal("a", "b"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(pd.DataFrame(data))
+ a
+0 1.0
+1 5.0
+2 3.0
+>>> func(pl.DataFrame(data))
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 5 │
+│ 3 │
+└─────┘
+>>> func(pa.table(data))
+pyarrow.Table
+a: int64
+----
+a: [[1,5,3]]
+
narwhalify(func=None, *, strict=None, pass_through=None, eager_only=False, eager_or_interchange_only=False, series_only=False, allow_series=True)
+
+Decorate function so it becomes dataframe-agnostic.
+narwhalify
will try to convert any dataframe/series-like object into the narwhal
+respective DataFrame/Series, while leaving the other parameters as they are.
Similarly, if the output of the function is a narwhals DataFrame or Series, it will be +converted back to the original dataframe/series type, while if the output is another +type it will be left as is.
+By setting strict=True
, then every input and every output will be required to be a
+dataframe/series-like object.
Instead of writing
+import narwhals as nw
+
+
+def func(df):
+ df = nw.from_native(df, strict=False)
+ df = df.group_by("a").agg(nw.col("b").sum())
+ return nw.to_native(df)
+
you can just write
+import narwhals as nw
+
+
+@nw.narwhalify
+def func(df):
+ return df.group_by("a").agg(nw.col("b").sum())
+
You can also pass in extra arguments, e.g.
+@nw.narwhalify(eager_only=True)
+
that will get passed down to nw.from_native
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ func
+ |
+
+ Callable[..., Any] | None
+ |
+
+
+
+ Function to wrap in a |
+
+ None
+ |
+
+ strict
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to raise if object can't be converted or to just leave it as-is +(default). + |
+
+ None
+ |
+
+ eager_only
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow eager objects. + |
+
+ False
+ |
+
+ eager_or_interchange_only
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow eager objects or objects which +implement the Dataframe Interchange Protocol. + |
+
+ False
+ |
+
+ series_only
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow series. + |
+
+ False
+ |
+
+ allow_series
+ |
+
+ bool | None
+ |
+
+
+
+ Whether to allow series (default is only dataframe / lazyframe). + |
+
+ True
+ |
+
new_series(name, values, dtype=None, *, native_namespace)
+
+Instantiate Narwhals Series from iterable (e.g. list or array).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ Name of resulting Series. + |
+ + required + | +
+ values
+ |
+
+ Any
+ |
+
+
+
+ Values of make Series from. + |
+ + required + | +
+ dtype
+ |
+
+ DType | type[DType] | None
+ |
+
+
+
+ (Narwhals) dtype. If not provided, the native library
+may auto-infer it from |
+
+ None
+ |
+
+ native_namespace
+ |
+
+ ModuleType
+ |
+
+
+
+ The native library to use for DataFrame creation. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Series
+ |
+
+
+
+ A new Series + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... values = [4, 1, 2]
+... native_namespace = nw.get_native_namespace(df)
+... return nw.new_series(
+... name="c",
+... values=values,
+... dtype=nw.Int32,
+... native_namespace=native_namespace,
+... )
+
Let's see what happens when passing pandas / Polars input:
+>>> func(pd.DataFrame(data))
+0 4
+1 1
+2 2
+Name: c, dtype: int32
+>>> func(pl.DataFrame(data))
+shape: (3,)
+Series: 'c' [i32]
+[
+ 4
+ 1
+ 2
+]
+
nth(*indices)
+
+Creates an expression that references one or more columns by their index(es).
+ + +nth
is not supported for Polars version<1.0.0. Please use col
instead.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ indices
+ |
+
+ int | Sequence[int]
+ |
+
+
+
+ One or more indices representing the columns to retrieve. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.nth(0) * 2)
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 2
+1 4
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+│ 4 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[2,4]]
+
sum(*columns)
+
+Sum all values.
+ + +Syntactic sugar for nw.col(columns).sum()
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ columns
+ |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"a": [1, 2]})
+>>> df_pd = pd.DataFrame({"a": [1, 2]})
+>>> df_pa = pa.table({"a": [1, 2]})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.sum("a"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 3
+>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 3 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[3]]
+
sum_horizontal(*exprs)
+
+Sum all values horizontally across columns.
+ + +Unlike Polars, we support horizontal sum over numeric columns only.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ exprs
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts +expression input. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "b": [5, 10, None]}
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+>>> df_pa = pa.table(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.sum_horizontal("a", "b"))
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a
+0 6.0
+1 12.0
+2 3.0
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 6 │
+│ 12 │
+│ 3 │
+└─────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+----
+a: [[6,12,3]]
+
when(*predicates)
+
+Start a when-then-otherwise
expression.
Expression similar to an if-else
statement in Python. Always initiated by a pl.when(<condition>).then(<value if condition>)
., and optionally followed by chaining one or more .when(<condition>).then(<value>)
statements.
+Chained when-then operations should be read as Python if, elif, ... elif
blocks, not as if, if, ... if
, i.e. the first condition that evaluates to True
will be picked.
+If none of the conditions are True
, an optional .otherwise(<value if all statements are false>)
can be appended at the end. If not appended, and none of the conditions are True
, None
will be returned.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ predicates
+ |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Condition(s) that must be met in order to apply the subsequent statement. Accepts one or more boolean expressions, which are implicitly combined with |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
+>>> df_pa = pa.table({"a": [1, 2, 3], "b": [5, 10, 15]})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df_any):
+... return df_any.with_columns(
+... nw.when(nw.col("a") < 3).then(5).otherwise(6).alias("a_when")
+... )
+
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd)
+ a b a_when
+0 1 5 5
+1 2 10 5
+2 3 15 6
+>>> func(df_pl)
+shape: (3, 3)
+┌─────┬─────┬────────┐
+│ a ┆ b ┆ a_when │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i32 │
+╞═════╪═════╪════════╡
+│ 1 ┆ 5 ┆ 5 │
+│ 2 ┆ 10 ┆ 5 │
+│ 3 ┆ 15 ┆ 6 │
+└─────┴─────┴────────┘
+>>> func(df_pa)
+pyarrow.Table
+a: int64
+b: int64
+a_when: int64
+----
+a: [[1,2,3]]
+b: [[5,10,15]]
+a_when: [[5,5,6]]
+
show_versions()
+
+Print useful debugging information
+Examples:
+>>> from narwhals import show_versions
+>>> show_versions() # doctest: +SKIP
+
+
+ to_native(narwhals_object, *, strict=None, pass_through=None)
+
+to_native(narwhals_object: DataFrame[IntoDataFrameT], *, pass_through: Literal[False] = ...) -> IntoDataFrameT
+
to_native(narwhals_object: LazyFrame[IntoFrameT], *, pass_through: Literal[False] = ...) -> IntoFrameT
+
to_native(narwhals_object: Series, *, pass_through: Literal[False] = ...) -> Any
+
to_native(narwhals_object: Any, *, pass_through: bool) -> Any
+
Convert Narwhals object to native one.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ narwhals_object
+ |
+
+ DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series
+ |
+
+
+
+ Narwhals object. + |
+ + required + | +
+ strict
+ |
+
+ bool | None
+ |
+
+
+
+ whether to raise on non-Narwhals input. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ IntoFrameT | Any
+ |
+
+
+
+ Object of class that user started with. + |
+
to_py_scalar(scalar_like)
+
+If a scalar is not Python native, converts it to Python native.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If the object is not convertible to a scalar. + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
+>>> nw.to_py_scalar(df["a"].item(0))
+1
+>>> import pyarrow as pa
+>>> df = nw.from_native(pa.table({"a": [1, 2, 3]}))
+>>> nw.to_py_scalar(df["a"].item(0))
+1
+>>> nw.to_py_scalar(1)
+1
+
narwhals.Schema
Ordered mapping of column names to their data type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ schema
+ |
+
+ Mapping[str, DType] | Iterable[tuple[str, DType]] | None
+ |
+
+
+
+ Mapping[str, DType] | Iterable[tuple[str, DType]] | None +The schema definition given by column names and their associated. +instantiated Narwhals data type. Accepts a mapping or an iterable of tuples. + |
+
+ None
+ |
+
Examples:
+Define a schema by passing instantiated data types.
+>>> import narwhals as nw
+>>> schema = nw.Schema({"foo": nw.Int8(), "bar": nw.String()})
+>>> schema
+Schema({'foo': Int8, 'bar': String})
+
Access the data type associated with a specific column name.
+>>> schema["foo"]
+Int8
+
Access various schema properties using the names
, dtypes
, and len
methods.
>>> schema.names()
+['foo', 'bar']
+>>> schema.dtypes()
+[Int8, String]
+>>> schema.len()
+2
+
names()
+
+Get the column names of the schema.
+ +dtypes()
+
+Get the data types of the schema.
+ +len()
+
+Get the number of columns in the schema.
+ +narwhals.selectors
The following selectors are all supported. In addition, just like in Polars, the following +set operations are supported:
+&
|
-
~
boolean()
+
+Select boolean columns.
+ + +Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select boolean +dtypes:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.boolean())
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ c
+0 False
+1 True
+>>> func(df_pl)
+shape: (2, 1)
+┌───────┐
+│ c │
+│ --- │
+│ bool │
+╞═══════╡
+│ false │
+│ true │
+└───────┘
+
by_dtype(*dtypes)
+
+Select columns based on their dtype.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ dtypes
+ |
+
+ Any
+ |
+
+
+
+ one or data types to select + |
+
+ ()
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select int64 and float64 +dtypes and multiplies each value by 2:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.by_dtype(nw.Int64, nw.Float64) * 2)
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ a c
+0 2 8.2
+1 4 4.6
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ c │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 8.2 │
+│ 4 ┆ 4.6 │
+└─────┴─────┘
+
categorical()
+
+Select categorical columns.
+ + +Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data).astype({"b": "category"})
+>>> df_pl = pl.DataFrame(data, schema_overrides={"b": pl.Categorical})
+
Let's define a dataframe-agnostic function to select string +dtypes:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.categorical())
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ b
+0 x
+1 y
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ b │
+│ --- │
+│ cat │
+╞═════╡
+│ x │
+│ y │
+└─────┘
+
numeric()
+
+Select numeric columns.
+ + +Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select numeric +dtypes and multiplies each value by 2:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.numeric() * 2)
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ a c
+0 2 8.2
+1 4 4.6
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ c │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 8.2 │
+│ 4 ┆ 4.6 │
+└─────┴─────┘
+
string()
+
+Select string columns.
+ + +Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select string +dtypes:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(ncs.string())
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ b
+0 x
+1 y
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ b │
+│ --- │
+│ str │
+╞═════╡
+│ x │
+│ y │
+└─────┘
+
narwhals.Series
Narwhals Series, backed by a native series.
+The native series might be pandas.Series, polars.Series, ...
+This class is not meant to be instantiated directly - instead, use
+narwhals.from_native
, making sure to pass allow_series=True
or
+series_only=True
.
dtype: DType
+
+
+ property
+
+
+Get the data type of the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dtype
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+Int64
+>>> func(s_pl)
+Int64
+
name: str
+
+
+ property
+
+
+Get the name of the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="foo")
+>>> s_pl = pl.Series("foo", s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.name
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+'foo'
+>>> func(s_pl)
+'foo'
+
shape: tuple[int]
+
+
+ property
+
+
+Get the shape of the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.shape
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+(3,)
+>>> func(s_pl)
+(3,)
+
__arrow_c_stream__(requested_schema=None)
+
+Export a Series via the Arrow PyCapsule Interface.
+Narwhals doesn't implement anything itself here:
+to_arrow
and then defer to PyArrow's implementationSee PyCapsule Interface +for more.
+ +__getitem__(idx)
+
+__getitem__(idx: int) -> Any
+
__getitem__(idx: slice | Sequence[int]) -> Self
+
__iter__()
+
+abs()
+
+Calculate the absolute value of each element.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [2, -4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.abs()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2
+1 4
+2 3
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 4
+ 3
+]
+
alias(name)
+
+Rename the Series.
+ + +This method is very cheap, but does not guarantee that data +will be copied. For example:
+s1: nw.Series
+s2 = s1.alias("foo")
+arr = s2.to_numpy()
+arr[0] = 999
+
may (depending on the backend, and on the version) result in
+s1
's data being modified. We recommend:
- if you need to alias an object and don't need the original
+ one around any more, just use `alias` without worrying about it.
+- if you were expecting `alias` to copy data, then explicily call
+ `.clone` before calling `alias`.
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="foo")
+>>> s_pl = pl.Series("foo", s)
+>>> s_pa = pa.chunked_array([s])
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.alias("bar")
+
We can then pass any supported library such as pandas, Polars, or PyArrow:
+>>> func(s_pd)
+0 1
+1 2
+2 3
+Name: bar, dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: 'bar' [i64]
+[
+ 1
+ 2
+ 3
+]
+>>> func(s_pa)
+<pyarrow.lib.ChunkedArray object at 0x...>
+[
+ [
+ 1,
+ 2,
+ 3
+ ]
+]
+
all()
+
+Return whether all values in the Series are True.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [True, False, True]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.all()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.False_
+>>> func(s_pl)
+False
+
any()
+
+Return whether any of the values in the Series are True.
+ + +Only works on Series of data type Boolean.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [False, True, False]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.any()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.True_
+>>> func(s_pl)
+True
+
arg_true()
+
+Find elements where boolean Series is True.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = [1, None, None, 2]
+>>> s_pd = pd.Series(data, name="a")
+>>> s_pl = pl.Series("a", data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.is_null().arg_true()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+1 1
+2 2
+Name: a, dtype: int64
+>>> func(s_pl)
+shape: (2,)
+Series: 'a' [u32]
+[
+ 1
+ 2
+]
+
cast(dtype)
+
+Cast between data types.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ dtype
+ |
+
+ DType | type[DType]
+ |
+
+
+
+ Data type that the object will be cast into. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [True, False, True]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.cast(nw.Int64)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1
+1 0
+2 1
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 0
+ 1
+]
+
clip(lower_bound=None, upper_bound=None)
+
+Clip values in the Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Any | None
+ |
+
+
+
+ Lower bound value. + |
+
+ None
+ |
+
+ upper_bound
+ |
+
+ Any | None
+ |
+
+
+
+ Upper bound value. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>>
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func_lower(s):
+... return s.clip(2)
+
We can then pass either pandas or Polars to func_lower
:
>>> func_lower(s_pd)
+0 2
+1 2
+2 3
+dtype: int64
+>>> func_lower(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 2
+ 3
+]
+
We define another library agnostic function:
+>>> @nw.narwhalify
+... def func_upper(s):
+... return s.clip(upper_bound=2)
+
We can then pass either pandas or Polars to func_upper
:
>>> func_upper(s_pd)
+0 1
+1 2
+2 2
+dtype: int64
+>>> func_upper(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 2
+]
+
We can have both at the same time
+>>> s = [-1, 1, -3, 3, -5, 5]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.clip(-1, 3)
+
We can pass either pandas or Polars to func
:
>>> func(s_pd)
+0 -1
+1 1
+2 -1
+3 3
+4 -1
+5 3
+dtype: int64
+>>> func(s_pl)
+shape: (6,)
+Series: '' [i64]
+[
+ -1
+ 1
+ -1
+ 3
+ -1
+ 3
+]
+
count()
+
+Returns the number of non-null elements in the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.count()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.int64(3)
+>>> func(s_pl)
+3
+
cum_sum()
+
+Calculate the cumulative sum.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [2, 4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.cum_sum()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2
+1 6
+2 9
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 6
+ 9
+]
+
diff()
+
+Calculate the difference with the previous element, for each element.
+ + +pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to calculate
+the diff and fill missing values with 0
in a Int64 column, you could
+do:
s.diff().fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [2, 4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.diff()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 NaN
+1 2.0
+2 -1.0
+dtype: float64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ null
+ 2
+ -1
+]
+
drop_nulls()
+
+Drop all null values.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import numpy as np
+>>> import narwhals as nw
+>>> s_pd = pd.Series([2, 4, None, 3, 5])
+>>> s_pl = pl.Series("a", [2, 4, None, 3, 5])
+
Now define a dataframe-agnostic function with a column
argument for the column to evaluate :
>>> @nw.narwhalify
+... def func(s):
+... return s.drop_nulls()
+
Then we can pass either Series (polars or pandas) to func
:
>>> func(s_pd)
+0 2.0
+1 4.0
+3 3.0
+4 5.0
+dtype: float64
+>>> func(s_pl)
+shape: (4,)
+Series: 'a' [i64]
+[
+ 2
+ 4
+ 3
+ 5
+]
+
fill_null(value=None, strategy=None, limit=None)
+
+Fill null values using the specified value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ value
+ |
+
+ Any | None
+ |
+
+
+
+ Value used to fill null values. + |
+
+ None
+ |
+
+ strategy
+ |
+
+ Literal['forward', 'backward'] | None
+ |
+
+
+
+ Strategy used to fill null values. + |
+
+ None
+ |
+
+ limit
+ |
+
+ int | None
+ |
+
+
+
+ Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. + |
+
+ None
+ |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, None]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.fill_null(5)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1.0
+1 2.0
+2 5.0
+dtype: float64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 5
+]
+
Using a strategy:
+>>> @nw.narwhalify
+... def func_strategies(s):
+... return s.fill_null(strategy="forward", limit=1)
+
>>> func_strategies(s_pd)
+0 1.0
+1 2.0
+2 2.0
+dtype: float64
+
>>> func_strategies(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 2
+]
+
filter(other)
+
+Filter elements in the Series based on a condition.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [4, 10, 15, 34, 50]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.filter(s > 10)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+2 15
+3 34
+4 50
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 15
+ 34
+ 50
+]
+
gather_every(n, offset=0)
+
+Take every nth value in the Series and return as new Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Gather every n-th row. + |
+ + required + | +
+ offset
+ |
+
+ int
+ |
+
+
+
+ Starting index. + |
+
+ 0
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1, 2, 3, 4]
+>>> s_pd = pd.Series(name="a", data=data)
+>>> s_pl = pl.Series(name="a", values=data)
+
Let's define a dataframe-agnostic function in which gather every 2 rows, +starting from a offset of 1:
+>>> @nw.narwhalify
+... def func(s):
+... return s.gather_every(n=2, offset=1)
+
>>> func(s_pd)
+1 2
+3 4
+Name: a, dtype: int64
+
>>> func(s_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 2
+ 4
+]
+
head(n=10)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = list(range(10))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that returns the first 3 rows:
+>>> @nw.narwhalify
+... def func(s):
+... return s.head(3)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 0
+1 1
+2 2
+dtype: int64
+
>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 0
+ 1
+ 2
+]
+
is_between(lower_bound, upper_bound, closed='both')
+
+Get a boolean mask of the values that are between the given lower/upper bounds.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ lower_bound
+ |
+
+ Any
+ |
+
+
+
+ Lower bound value. + |
+ + required + | +
+ upper_bound
+ |
+
+ Any
+ |
+
+
+
+ Upper bound value. + |
+ + required + | +
+ closed
+ |
+
+ str
+ |
+
+
+
+ Define which sides of the interval are closed (inclusive). + |
+
+ 'both'
+ |
+
If the value of the lower_bound
is greater than that of the upper_bound
,
+then the values will be False, as no value can satisfy the condition.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s_pd = pd.Series([1, 2, 3, 4, 5])
+>>> s_pl = pl.Series([1, 2, 3, 4, 5])
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.is_between(2, 4, "right")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 False
+2 True
+3 True
+4 False
+dtype: bool
+>>> func(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+ true
+ false
+]
+
is_duplicated()
+
+Get a mask of all duplicated rows in the Series.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 2, 3, 1])
+>>> s_pl = pl.Series([1, 2, 3, 1])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.is_duplicated()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 True
+1 False
+2 False
+3 True
+dtype: bool
+>>> func(s_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ true
+ false
+ false
+ true
+]
+
is_empty()
+
+Check if the series is empty.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
Let's define a dataframe-agnostic function that filters rows in which "foo" +values are greater than 10, and then checks if the result is empty or not:
+>>> @nw.narwhalify
+... def func(s):
+... return s.filter(s > 10).is_empty()
+
We can then pass either pandas or Polars to func
:
>>> s_pd = pd.Series([1, 2, 3])
+>>> s_pl = pl.Series([1, 2, 3])
+>>> func(s_pd), func(s_pl)
+(True, True)
+
>>> s_pd = pd.Series([100, 2, 3])
+>>> s_pl = pl.Series([100, 2, 3])
+>>> func(s_pd), func(s_pl)
+(False, False)
+
is_first_distinct()
+
+Return a boolean mask indicating the first occurrence of each distinct value.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 1, 2, 3, 2])
+>>> s_pl = pl.Series([1, 1, 2, 3, 2])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.is_first_distinct()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 True
+1 False
+2 True
+3 True
+4 False
+dtype: bool
+
>>> func(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ true
+ false
+ true
+ true
+ false
+]
+
is_in(other)
+
+Check if the elements of this Series are in the other sequence.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ other
+ |
+
+ Any
+ |
+
+
+
+ Sequence of primitive type. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s_pd = pd.Series([1, 2, 3])
+>>> s_pl = pl.Series([1, 2, 3])
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.is_in([3, 2, 8])
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 True
+2 True
+dtype: bool
+>>> func(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+]
+
is_last_distinct()
+
+Return a boolean mask indicating the last occurrence of each distinct value.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 1, 2, 3, 2])
+>>> s_pl = pl.Series([1, 1, 2, 3, 2])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.is_last_distinct()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 True
+2 False
+3 True
+4 True
+dtype: bool
+
>>> func(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ true
+ false
+ true
+ true
+]
+
is_null()
+
+Returns a boolean Series indicating which values are null.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, None]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.is_null()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 False
+2 True
+dtype: bool
+>>> func(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+]
+
is_sorted(*, descending=False)
+
+Check if the Series is sorted.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ descending
+ |
+
+ bool
+ |
+
+
+
+ Check if the Series is sorted in descending order. + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> unsorted_data = [1, 3, 2]
+>>> sorted_data = [3, 2, 1]
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s, descending=False):
+... return s.is_sorted(descending=descending)
+
We can then pass either pandas or Polars to func
:
>>> func(pl.Series(unsorted_data))
+False
+>>> func(pl.Series(sorted_data), descending=True)
+True
+>>> func(pd.Series(unsorted_data))
+False
+>>> func(pd.Series(sorted_data), descending=True)
+True
+
is_unique()
+
+Get a mask of all unique rows in the Series.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 2, 3, 1])
+>>> s_pl = pl.Series([1, 2, 3, 1])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.is_unique()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 True
+2 True
+3 False
+dtype: bool
+
>>> func(s_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+ false
+]
+
item(index=None)
+
+Return the Series as a scalar, or return the element at the given index.
+If no index is provided, this is equivalent to s[0]
, with a check
+that the shape is (1,). With an index, this is equivalent to s[index]
.
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
Let's define a dataframe-agnostic function that returns item at given index
+>>> @nw.narwhalify
+... def func(s, index=None):
+... return s.item(index)
+
We can then pass either pandas or Polars to func
:
>>> func(pl.Series("a", [1]), None), func(pd.Series([1]), None)
+(1, np.int64(1))
+
>>> func(pl.Series("a", [9, 8, 7]), -1), func(pl.Series([9, 8, 7]), -2)
+(7, 8)
+
len()
+
+Return the number of elements in the Series.
+Null values count towards the total.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1, 2, None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that computes the len of the series:
+>>> @nw.narwhalify
+... def func(s):
+... return s.len()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+3
+>>> func(s_pl)
+3
+
max()
+
+Get the maximum value in this Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.max()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.int64(3)
+>>> func(s_pl)
+3
+
mean()
+
+Reduce this Series to the mean value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.mean()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.float64(2.0)
+>>> func(s_pl)
+2.0
+
median()
+
+Reduce this Series to the median value.
+ + +Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> s = [5, 3, 8]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+>>> s_pa = pa.chunked_array([s])
+
Let's define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.median()
+
We can then pass any supported library such as pandas, Polars, or PyArrow to func
:
>>> func(s_pd)
+np.float64(5.0)
+>>> func(s_pl)
+5.0
+>>> func(s_pa)
+<pyarrow.DoubleScalar: 5.0>
+
min()
+
+Get the minimal value in this Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.min()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.int64(1)
+>>> func(s_pl)
+1
+
mode()
+
+Compute the most occurring value(s).
+Can return multiple values.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+
>>> data = [1, 1, 2, 2, 3]
+>>> s_pd = pd.Series(name="a", data=data)
+>>> s_pl = pl.Series(name="a", values=data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.mode().sort()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1
+1 2
+Name: a, dtype: int64
+
>>> func(s_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 1
+ 2
+]
+
n_unique()
+
+Count the number of unique values.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.n_unique()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+3
+>>> func(s_pl)
+3
+
null_count()
+
+Create a new Series that shows the null counts per column.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, None, 3])
+>>> s_pl = pl.Series([1, None, None])
+
Let's define a dataframe-agnostic function that returns the null count of +the series:
+>>> @nw.narwhalify
+... def func(s):
+... return s.null_count()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.int64(1)
+>>> func(s_pl)
+2
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> s_pd = pd.Series([1, 2, 3, 4])
+>>> s_pl = pl.Series([1, 2, 3, 4])
+
Lets define a function to pipe into
+>>> @nw.narwhalify
+... def func(s):
+... return s.pipe(lambda x: x + 2)
+
Now apply it to the series
+>>> func(s_pd)
+0 3
+1 4
+2 5
+3 6
+dtype: int64
+>>> func(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ 3
+ 4
+ 5
+ 6
+]
+
quantile(quantile, interpolation)
+
+Get quantile value of the series.
+ + +pandas and Polars may have implementation differences for a given interpolation method.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ quantile
+ |
+
+ float
+ |
+
+
+
+ Quantile between 0.0 and 1.0. + |
+ + required + | +
+ interpolation
+ |
+
+ Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']
+ |
+
+
+
+ Interpolation method. + |
+ + required + | +
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = list(range(50))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return [
+... s.quantile(quantile=q, interpolation="nearest")
+... for q in (0.1, 0.25, 0.5, 0.75, 0.9)
+... ]
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+[np.int64(5), np.int64(12), np.int64(24), np.int64(37), np.int64(44)]
+
>>> func(s_pl)
+[5.0, 12.0, 25.0, 37.0, 44.0]
+
rename(name)
+
+Rename the Series.
+Alias for Series.alias()
.
This method is very cheap, but does not guarantee that data +will be copied. For example:
+s1: nw.Series
+s2 = s1.rename("foo")
+arr = s2.to_numpy()
+arr[0] = 999
+
may (depending on the backend, and on the version) result in
+s1
's data being modified. We recommend:
- if you need to rename an object and don't need the original
+ one around any more, just use `rename` without worrying about it.
+- if you were expecting `rename` to copy data, then explicily call
+ `.clone` before calling `rename`.
+
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ name
+ |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="foo")
+>>> s_pl = pl.Series("foo", s)
+>>> s_pa = pa.chunked_array([s])
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.rename("bar")
+
We can then pass any supported library such as pandas, Polars, or PyArrow:
+>>> func(s_pd)
+0 1
+1 2
+2 3
+Name: bar, dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: 'bar' [i64]
+[
+ 1
+ 2
+ 3
+]
+>>> func(s_pa)
+<pyarrow.lib.ChunkedArray object at 0x...>
+[
+ [
+ 1,
+ 2,
+ 3
+ ]
+]
+
replace_strict(old, new=None, *, return_dtype=None)
+
+Replace all values by different values.
+This function must replace all non-null input values (else it raises an error).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ old
+ |
+
+ Sequence[Any] | Mapping[Any, Any]
+ |
+
+
+
+ Sequence of values to replace. It also accepts a mapping of values to
+their replacement as syntactic sugar for
+ |
+ + required + | +
+ new
+ |
+
+ Sequence[Any] | None
+ |
+
+
+
+ Sequence of values to replace by. Length must match the length of |
+
+ None
+ |
+
+ return_dtype
+ |
+
+ DType | type[DType] | None
+ |
+
+
+
+ The data type of the resulting expression. If set to |
+
+ None
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> df_pd = pd.DataFrame({"a": [3, 0, 1, 2]})
+>>> df_pl = pl.DataFrame({"a": [3, 0, 1, 2]})
+>>> df_pa = pa.table({"a": [3, 0, 1, 2]})
+
Let's define dataframe-agnostic functions:
+>>> @nw.narwhalify
+... def func(s):
+... return s.replace_strict(
+... [0, 1, 2, 3], ["zero", "one", "two", "three"], return_dtype=nw.String
+... )
+
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> func(df_pd["a"])
+0 three
+1 zero
+2 one
+3 two
+Name: a, dtype: object
+>>> func(df_pl["a"])
+shape: (4,)
+Series: 'a' [str]
+[
+ "three"
+ "zero"
+ "one"
+ "two"
+]
+>>> func(df_pa["a"])
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ "three",
+ "zero",
+ "one",
+ "two"
+ ]
+]
+
round(decimals=0)
+
+Round underlying floating point data by decimals
digits.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ decimals
+ |
+
+ int
+ |
+
+
+
+ Number of decimals to round by. + |
+
+ 0
+ |
+
For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
+pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and +4.5 to 4.0, etc..).
+Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1.12345, 2.56789, 3.901234]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that rounds to the first decimal:
+>>> @nw.narwhalify
+... def func(s):
+... return s.round(1)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1.1
+1 2.6
+2 3.9
+dtype: float64
+
>>> func(s_pl)
+shape: (3,)
+Series: '' [f64]
+[
+ 1.1
+ 2.6
+ 3.9
+]
+
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
+
+Sample randomly from this Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int | None
+ |
+
+
+
+ Number of items to return. Cannot be used with fraction. + |
+
+ None
+ |
+
+ fraction
+ |
+
+ float | None
+ |
+
+
+
+ Fraction of items to return. Cannot be used with n. + |
+
+ None
+ |
+
+ with_replacement
+ |
+
+ bool
+ |
+
+
+
+ Allow values to be sampled more than once. + |
+
+ False
+ |
+
+ seed
+ |
+
+ int | None
+ |
+
+
+
+ Seed for the random number generator. If set to None (default), a random +seed is generated for each sample operation. + |
+
+ None
+ |
+
The sample
method returns a Series with a specified number of
+randomly selected items chosen from this Series.
+The results are not consistent across libraries.
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
>>> s_pd = pd.Series([1, 2, 3, 4])
+>>> s_pl = pl.Series([1, 2, 3, 4])
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.sample(fraction=1.0, with_replacement=True)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+ a
+2 3
+1 2
+3 4
+3 4
+>>> func(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ 1
+ 4
+ 3
+ 4
+]
+
scatter(indices, values)
+
+Set value(s) at given position(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ indices
+ |
+
+ int | Sequence[int]
+ |
+
+
+
+ Position(s) to set items at. + |
+ + required + | +
+ values
+ |
+
+ Any
+ |
+
+
+
+ Values to set. + |
+ + required + | +
This method always returns a new Series, without modifying the original one. +Using this function in a for-loop is an anti-pattern, we recommend building +up your positions and values beforehand and doing an update in one go.
+For example, instead of
+for i in [1, 3, 2]:
+ value = some_function(i)
+ s = s.scatter(i, value)
+
prefer
+positions = [1, 3, 2]
+values = [some_function(x) for x in positions]
+s = s.scatter(positions, values)
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(df["a"].scatter([0, 1], [999, 888]))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 999 4
+1 888 5
+2 3 6
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 999 ┆ 4 │
+│ 888 ┆ 5 │
+│ 3 ┆ 6 │
+└─────┴─────┘
+
shift(n)
+
+Shift values by n
positions.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of indices to shift forward. If a negative value is passed, +values are shifted in the opposite direction instead. + |
+ + required + | +
pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to shift
+and fill missing values with 0
in a Int64 column, you could
+do:
s.shift(1).fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [2, 4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.shift(1)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 NaN
+1 2.0
+2 4.0
+dtype: float64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ null
+ 2
+ 4
+]
+
sort(*, descending=False, nulls_last=False)
+
+Sort this Series. Place null values first.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ descending
+ |
+
+ bool
+ |
+
+
+
+ Sort in descending order. + |
+
+ False
+ |
+
+ nulls_last
+ |
+
+ bool
+ |
+
+
+
+ Place null values last instead of first. + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [5, None, 1, 2]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define library agnostic functions:
+>>> @nw.narwhalify
+... def func(s):
+... return s.sort()
+
>>> @nw.narwhalify
+... def func_descend(s):
+... return s.sort(descending=True)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+1 NaN
+2 1.0
+3 2.0
+0 5.0
+dtype: float64
+>>> func(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ null
+ 1
+ 2
+ 5
+]
+>>> func_descend(s_pd)
+1 NaN
+0 5.0
+3 2.0
+2 1.0
+dtype: float64
+>>> func_descend(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ null
+ 5
+ 2
+ 1
+]
+
std(*, ddof=1)
+
+Get the standard deviation of this Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ ddof
+ |
+
+ int
+ |
+
+
+
+ “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, + where N represents the number of elements. + |
+
+ 1
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.std()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.float64(1.0)
+>>> func(s_pl)
+1.0
+
sum()
+
+Reduce this Series to the sum value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.sum()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.int64(6)
+>>> func(s_pl)
+6
+
tail(n=10)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 10
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = list(range(10))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that returns the last 3 rows:
+>>> @nw.narwhalify
+... def func(s):
+... return s.tail(3)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+7 7
+8 8
+9 9
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 7
+ 8
+ 9
+]
+
to_arrow()
+
+Convert to arrow.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1, 2, 3, 4]
+>>> s_pd = pd.Series(name="a", data=data)
+>>> s_pl = pl.Series(name="a", values=data)
+
Let's define a dataframe-agnostic function that converts to arrow:
+>>> @nw.narwhalify
+... def func(s):
+... return s.to_arrow()
+
>>> func(s_pd)
+<pyarrow.lib.Int64Array object at ...>
+[
+ 1,
+ 2,
+ 3,
+ 4
+]
+
>>> func(s_pl)
+<pyarrow.lib.Int64Array object at ...>
+[
+ 1,
+ 2,
+ 3,
+ 4
+]
+
to_dummies(*, separator='_', drop_first=False)
+
+Get dummy/indicator variables.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ separator
+ |
+
+ str
+ |
+
+
+
+ Separator/delimiter used when generating column names. + |
+
+ '_'
+ |
+
+ drop_first
+ |
+
+ bool
+ |
+
+
+
+ Remove the first category from the variable being encoded. + |
+
+ False
+ |
+
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1, 2, 3]
+>>> s_pd = pd.Series(data, name="a")
+>>> s_pl = pl.Series("a", data)
+
Let's define a dataframe-agnostic function that rounds to the first decimal:
+>>> @nw.narwhalify
+... def func(s, drop_first: bool = False):
+... return s.to_dummies(drop_first=drop_first)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+ a_1 a_2 a_3
+0 1 0 0
+1 0 1 0
+2 0 0 1
+
>>> func(s_pd, drop_first=True)
+ a_2 a_3
+0 0 0
+1 1 0
+2 0 1
+
>>> func(s_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ a_1 ┆ a_2 ┆ a_3 │
+│ --- ┆ --- ┆ --- │
+│ i8 ┆ i8 ┆ i8 │
+╞═════╪═════╪═════╡
+│ 1 ┆ 0 ┆ 0 │
+│ 0 ┆ 1 ┆ 0 │
+│ 0 ┆ 0 ┆ 1 │
+└─────┴─────┴─────┘
+>>> func(s_pl, drop_first=True)
+shape: (3, 2)
+┌─────┬─────┐
+│ a_2 ┆ a_3 │
+│ --- ┆ --- │
+│ i8 ┆ i8 │
+╞═════╪═════╡
+│ 0 ┆ 0 │
+│ 1 ┆ 0 │
+│ 0 ┆ 1 │
+└─────┴─────┘
+
to_frame()
+
+Convert to dataframe.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.to_frame()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+ a
+0 1
+1 2
+2 3
+>>> func(s_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
to_list()
+
+Convert to list.
+ + +This function converts to Python scalars. It's typically +more efficient to keep your data in the format native to +your original dataframe, so we recommend only calling this +when you absolutely need to.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.to_list()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+[1, 2, 3]
+>>> func(s_pl)
+[1, 2, 3]
+
to_numpy()
+
+Convert to numpy.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.to_numpy()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+array([1, 2, 3]...)
+>>> func(s_pl)
+array([1, 2, 3]...)
+
to_pandas()
+
+Convert to pandas.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.to_pandas()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1
+1 2
+2 3
+Name: a, dtype: int64
+>>> func(s_pl)
+0 1
+1 2
+2 3
+Name: a, dtype: int64
+
to_native()
+
+Convert Narwhals series to native series.
+ + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ Series of class that user started with. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.to_native()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1
+1 2
+2 3
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 3
+]
+
unique(*, maintain_order=False)
+
+Returns unique values of the series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ maintain_order
+ |
+
+ bool
+ |
+
+
+
+ Keep the same order as the original series. This may be more
+expensive to compute. Settings this to |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [2, 4, 4, 6]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.unique(maintain_order=True)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2
+1 4
+2 6
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 4
+ 6
+]
+
value_counts(*, sort=False, parallel=False, name=None, normalize=False)
+
+Count the occurrences of unique values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ sort
+ |
+
+ bool
+ |
+
+
+
+ Sort the output by count in descending order. If set to False (default), +the order of the output is random. + |
+
+ False
+ |
+
+ parallel
+ |
+
+ bool
+ |
+
+
+
+ Execute the computation in parallel. Used for Polars only. + |
+
+ False
+ |
+
+ name
+ |
+
+ str | None
+ |
+
+
+
+ Give the resulting count column a specific name; if |
+
+ None
+ |
+
+ normalize
+ |
+
+ bool
+ |
+
+
+
+ If true gives relative frequencies of the unique values + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 1, 2, 3, 2], name="s")
+>>> s_pl = pl.Series(values=[1, 1, 2, 3, 2], name="s")
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.value_counts(sort=True)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+ s count
+0 1 2
+1 2 2
+2 3 1
+
>>> func(s_pl)
+shape: (3, 2)
+┌─────┬───────┐
+│ s ┆ count │
+│ --- ┆ --- │
+│ i64 ┆ u32 │
+╞═════╪═══════╡
+│ 1 ┆ 2 │
+│ 2 ┆ 2 │
+│ 3 ┆ 1 │
+└─────┴───────┘
+
zip_with(mask, other)
+
+Take values from self or other based on the given mask.
+Where mask evaluates true, take values from self. Where mask evaluates false, +take values from other.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ mask
+ |
+
+ Self
+ |
+
+
+
+ Boolean Series + |
+ + required + | +
+ other
+ |
+
+ Self
+ |
+
+
+
+ Series of same type. + |
+ + required + | +
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s1_pl = pl.Series([1, 2, 3, 4, 5])
+>>> s2_pl = pl.Series([5, 4, 3, 2, 1])
+>>> mask_pl = pl.Series([True, False, True, False, True])
+>>> s1_pd = pd.Series([1, 2, 3, 4, 5])
+>>> s2_pd = pd.Series([5, 4, 3, 2, 1])
+>>> mask_pd = pd.Series([True, False, True, False, True])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s1_any, mask_any, s2_any):
+... return s1_any.zip_with(mask_any, s2_any)
+
We can then pass either pandas or Polars to func
:
>>> func(s1_pl, mask_pl, s2_pl)
+shape: (5,)
+Series: '' [i64]
+[
+ 1
+ 4
+ 3
+ 2
+ 5
+]
+>>> func(s1_pd, mask_pd, s2_pd)
+0 1
+1 4
+2 3
+3 2
+4 5
+dtype: int64
+
narwhals.Series.cat
get_categories()
+
+Get unique categories from column.
+ + +Examples:
+Let's create some series:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["apple", "mango", "mango"]
+>>> s_pd = pd.Series(data, dtype="category")
+>>> s_pl = pl.Series(data, dtype=pl.Categorical)
+
We define a dataframe-agnostic function to get unique categories +from column 'fruits':
+>>> @nw.narwhalify(series_only=True)
+... def func(s):
+... return s.cat.get_categories()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 apple
+1 mango
+dtype: object
+>>> func(s_pl)
+shape: (2,)
+Series: '' [str]
+[
+ "apple"
+ "mango"
+]
+
narwhals.Series.dt
convert_time_zone(time_zone)
+
+Convert time zone.
+If converting from a time-zone-naive column, then conversion happens +as if converting from UTC.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Examples:
+>>> from datetime import datetime, timezone
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.convert_time_zone("Asia/Kathmandu")
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> func(s_pd)
+0 2024-01-01 05:45:00+05:45
+1 2024-01-02 05:45:00+05:45
+dtype: datetime64[ns, Asia/Kathmandu]
+>>> func(s_pl)
+shape: (2,)
+Series: '' [datetime[μs, Asia/Kathmandu]]
+[
+ 2024-01-01 05:45:00 +0545
+ 2024-01-02 05:45:00 +0545
+]
+>>> func(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2024-01-01 00:00:00.000000Z,
+ 2024-01-02 00:00:00.000000Z
+ ]
+]
+
date()
+
+Get the date in a datetime series.
+ + +Raises:
+Type | +Description | +
---|---|
+ NotImplementedError
+ |
+
+
+
+ If pandas default backend is being used. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)]
+>>> s_pd = pd.Series(dates).convert_dtypes(dtype_backend="pyarrow")
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.date()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2012-01-07
+1 2023-03-10
+dtype: date32[day][pyarrow]
+
>>> func(s_pl)
+shape: (2,)
+Series: '' [date]
+[
+ 2012-01-07
+ 2023-03-10
+]
+
day()
+
+Extracts the day in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2022, 1, 1), datetime(2022, 1, 5)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.day()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1
+1 5
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 1
+ 5
+]
+
hour()
+
+Extracts the hour in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.hour()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 5
+1 9
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 5
+ 9
+]
+
microsecond()
+
+Extracts the microseconds in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [
+... datetime(2023, 5, 21, 12, 55, 10, 400000),
+... datetime(2023, 5, 21, 12, 55, 10, 600000),
+... datetime(2023, 5, 21, 12, 55, 10, 800000),
+... datetime(2023, 5, 21, 12, 55, 11, 0),
+... datetime(2023, 5, 21, 12, 55, 11, 200000),
+... ]
+
>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.microsecond().alias("datetime")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 400000
+1 600000
+2 800000
+3 0
+4 200000
+Name: datetime, dtype: int...
+>>> func(s_pl)
+shape: (5,)
+Series: 'datetime' [i32]
+[
+ 400000
+ 600000
+ 800000
+ 0
+ 200000
+]
+
millisecond()
+
+Extracts the milliseconds in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [
+... datetime(2023, 5, 21, 12, 55, 10, 400000),
+... datetime(2023, 5, 21, 12, 55, 10, 600000),
+... datetime(2023, 5, 21, 12, 55, 10, 800000),
+... datetime(2023, 5, 21, 12, 55, 11, 0),
+... datetime(2023, 5, 21, 12, 55, 11, 200000),
+... ]
+
>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.millisecond().alias("datetime")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 400
+1 600
+2 800
+3 0
+4 200
+Name: datetime, dtype: int...
+>>> func(s_pl)
+shape: (5,)
+Series: 'datetime' [i32]
+[
+ 400
+ 600
+ 800
+ 0
+ 200
+]
+
minute()
+
+Extracts the minute in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.minute()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 3
+1 12
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 3
+ 12
+]
+
month()
+
+Gets the month in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2023, 2, 1), datetime(2023, 8, 3)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.month()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2
+1 8
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 2
+ 8
+]
+
nanosecond()
+
+Extracts the nanosecond(s) in a date series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [
+... datetime(2022, 1, 1, 5, 3, 10, 500000),
+... datetime(2022, 1, 5, 9, 12, 4, 60000),
+... ]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.nanosecond()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 500000000
+1 60000000
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i32]
+[
+ 500000000
+ 60000000
+]
+
ordinal_day()
+
+Get ordinal day.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = [datetime(2020, 1, 1), datetime(2020, 8, 3)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.ordinal_day()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1
+1 216
+dtype: int32
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i16]
+[
+ 1
+ 216
+]
+
replace_time_zone(time_zone)
+
+Replace time zone.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_zone
+ |
+
+ str | None
+ |
+
+
+
+ Target time zone. + |
+ + required + | +
Examples:
+>>> from datetime import datetime, timezone
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [
+... datetime(2024, 1, 1, tzinfo=timezone.utc),
+... datetime(2024, 1, 2, tzinfo=timezone.utc),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.replace_time_zone("Asia/Kathmandu")
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> func(s_pd)
+0 2024-01-01 00:00:00+05:45
+1 2024-01-02 00:00:00+05:45
+dtype: datetime64[ns, Asia/Kathmandu]
+>>> func(s_pl)
+shape: (2,)
+Series: '' [datetime[μs, Asia/Kathmandu]]
+[
+ 2024-01-01 00:00:00 +0545
+ 2024-01-02 00:00:00 +0545
+]
+>>> func(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 2023-12-31 18:15:00.000000Z,
+ 2024-01-01 18:15:00.000000Z
+ ]
+]
+
second()
+
+Extracts the second(s) in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2022, 1, 1, 5, 3, 10), datetime(2022, 1, 5, 9, 12, 4)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.second()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 10
+1 4
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 10
+ 4
+]
+
timestamp(time_unit='us')
+
+Return a timestamp in the given time unit.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ time_unit
+ |
+
+ Literal['ns', 'us', 'ms']
+ |
+
+
+
+ {'ns', 'us', 'ms'} +Time unit. + |
+
+ 'us'
+ |
+
Examples:
+>>> from datetime import date
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> data = [date(2001, 1, 1), None, date(2001, 1, 3)]
+>>> s_pd = pd.Series(data, dtype="datetime64[ns]")
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.timestamp("ms")
+
We can then pass pandas / PyArrow / Polars / any other supported library:
+>>> func(s_pd)
+0 9.783072e+11
+1 NaN
+2 9.784800e+11
+dtype: float64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 978307200000
+ null
+ 978480000000
+]
+>>> func(s_pa)
+<pyarrow.lib.ChunkedArray object at ...>
+[
+ [
+ 978307200000,
+ null,
+ 978480000000
+ ]
+]
+
total_microseconds()
+
+Get total microseconds.
+ + +The function outputs the total microseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = [
+... timedelta(microseconds=10),
+... timedelta(milliseconds=1, microseconds=200),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.total_microseconds()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 10
+1 1200
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 1200
+]
+
total_milliseconds()
+
+Get total milliseconds.
+ + +The function outputs the total milliseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = [
+... timedelta(milliseconds=10),
+... timedelta(milliseconds=20, microseconds=40),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.total_milliseconds()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 10
+1 20
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
total_minutes()
+
+Get total minutes.
+ + +The function outputs the total minutes in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.total_minutes()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 10
+1 20
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
total_nanoseconds()
+
+Get total nanoseconds.
+ + +The function outputs the total nanoseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"]
+>>> s_pd = pd.to_datetime(pd.Series(data))
+>>> s_pl = pl.Series(data).str.to_datetime(time_unit="ns")
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.diff().dt.total_nanoseconds()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 NaN
+1 1.0
+dtype: float64
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ null
+ 1
+]
+
total_seconds()
+
+Get total seconds.
+ + +The function outputs the total seconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.total_seconds()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 10
+1 20
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
to_string(format)
+
+Convert a Date/Time/Datetime series into a String series with the given format.
+ + +Unfortunately, different libraries interpret format directives a bit +differently.
+"%.f"
for fractional seconds,
+ whereas pandas and Python stdlib use ".%f"
."%S"
as "seconds, including fractional seconds"
+ whereas most other tools interpret it as "just seconds, as 2 digits".Therefore, we make the following adjustments:
+"%S.%f"
with "%S%.f"
."%S.%f"
with "%S"
.Workarounds like these don't make us happy, and we try to avoid them as +much as possible, but here we feel like it's the best compromise.
+If you just want to format a date/datetime Series as a local datetime +string, and have it work as consistently as possible across libraries, +we suggest using:
+"%Y-%m-%dT%H:%M:%S%.f"
for datetimes"%Y-%m-%d"
for datesthough note that, even then, different tools may return a different number +of trailing zeros. Nonetheless, this is probably consistent enough for +most applications.
+If you have an application where this is not enough, please open an issue +and let us know.
+Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = [
+... datetime(2020, 3, 1),
+... datetime(2020, 4, 1),
+... datetime(2020, 5, 1),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.to_string("%Y/%m/%d")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2020/03/01
+1 2020/04/01
+2 2020/05/01
+dtype: object
+
>>> func(s_pl)
+shape: (3,)
+Series: '' [str]
+[
+ "2020/03/01"
+ "2020/04/01"
+ "2020/05/01"
+]
+
year()
+
+Get the year in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2012, 1, 7), datetime(2023, 3, 10)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.dt.year()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2012
+1 2023
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i32]
+[
+ 2012
+ 2023
+]
+
narwhals.Series.str
contains(pattern, *, literal=False)
+
+Check if string contains a substring that matches a pattern.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A Character sequence or valid regular expression pattern. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ If True, treats the pattern as a literal string. + If False, assumes the pattern is a regular expression. + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> pets = ["cat", "dog", "rabbit and parrot", "dove", None]
+>>> s_pd = pd.Series(pets)
+>>> s_pl = pl.Series(pets)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.str.contains("parrot|dove")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 False
+2 True
+3 True
+4 None
+dtype: object
+
>>> func(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+ true
+ null
+]
+
ends_with(suffix)
+
+Check if string values end with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ suffix
+ |
+
+ str
+ |
+
+
+
+ suffix substring + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["apple", "mango", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.str.ends_with("ngo")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 True
+2 None
+dtype: object
+
>>> func(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ true
+ null
+]
+
head(n=5)
+
+Take the first n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is supported (see note (1.)) + |
+
+ 5
+ |
+
n
input is negative, head
returns characters up to the n-th from the end of the string.
+ For example, if n = -3
, then all characters except the last three are returned.n
characters, the full string is returned.Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lyrics = ["Atatata", "taata", "taatatata", "zukkyun"]
+>>> s_pd = pd.Series(lyrics)
+>>> s_pl = pl.Series(lyrics)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.str.head()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 Atata
+1 taata
+2 taata
+3 zukky
+dtype: object
+>>> func(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "Atata"
+ "taata"
+ "taata"
+ "zukky"
+]
+
len_chars()
+
+Return the length of each string as the number of characters.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["foo", "Café", "345", "東京", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.str.len_chars()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 3.0
+1 4.0
+2 3.0
+3 2.0
+4 NaN
+dtype: float64
+
>>> func(s_pl)
+shape: (5,)
+Series: '' [u32]
+[
+ 3
+ 4
+ 3
+ 2
+ null
+]
+
replace(pattern, value, *, literal=False, n=1)
+
+Replace first matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of matches to replace. + |
+
+ 1
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["123abc", "abc abc123"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... s = s.str.replace("abc", "")
+... return s.to_list()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+['123', ' abc123']
+
>>> func(s_pl)
+['123', ' abc123']
+
replace_all(pattern, value, *, literal=False)
+
+Replace all matching regex/literal substring with a new string value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ pattern
+ |
+
+ str
+ |
+
+
+
+ A valid regular expression pattern. + |
+ + required + | +
+ value
+ |
+
+ str
+ |
+
+
+
+ String that will replace the matched substring. + |
+ + required + | +
+ literal
+ |
+
+ bool
+ |
+
+
+
+ Treat |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["123abc", "abc abc123"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... s = s.str.replace_all("abc", "")
+... return s.to_list()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+['123', ' 123']
+
>>> func(s_pl)
+['123', ' 123']
+
slice(offset, length=None)
+
+Create subslices of the string values of a Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ offset
+ |
+
+ int
+ |
+
+
+
+ Start index. Negative indexing is supported. + |
+ + required + | +
+ length
+ |
+
+ int | None
+ |
+
+
+
+ Length of the slice. If set to |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["pear", None, "papaya", "dragonfruit"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.str.slice(4, length=3)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0
+1 None
+2 ya
+3 onf
+dtype: object
+
>>> func(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ ""
+ null
+ "ya"
+ "onf"
+]
+
Using negative indexes:
+>>> @nw.narwhalify
+... def func(s):
+... return s.str.slice(-3)
+
>>> func(s_pd)
+0 ear
+1 None
+2 aya
+3 uit
+dtype: object
+
>>> func(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "ear"
+ null
+ "aya"
+ "uit"
+]
+
starts_with(prefix)
+
+Check if string values start with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ prefix
+ |
+
+ str
+ |
+
+
+
+ prefix substring + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["apple", "mango", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.str.starts_with("app")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 True
+1 False
+2 None
+dtype: object
+
>>> func(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ true
+ false
+ null
+]
+
strip_chars(characters=None)
+
+Remove leading and trailing characters.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ characters
+ |
+
+ str | None
+ |
+
+
+
+ The set of characters to be removed. All combinations of this set of characters will be stripped from the start and end of the string. If set to None (default), all leading and trailing whitespace is removed instead. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["apple", "\nmango"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... s = s.str.strip_chars()
+... return s.to_list()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+['apple', 'mango']
+
>>> func(s_pl)
+['apple', 'mango']
+
tail(n=5)
+
+Take the last n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ n
+ |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is supported (see note (1.)) + |
+
+ 5
+ |
+
n
input is negative, tail
returns characters starting from the n-th from the beginning of
+ the string. For example, if n = -3
, then all characters except the first three are returned.n
characters, the full string is returned.Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lyrics = ["Atatata", "taata", "taatatata", "zukkyun"]
+>>> s_pd = pd.Series(lyrics)
+>>> s_pl = pl.Series(lyrics)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.str.tail()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 atata
+1 taata
+2 atata
+3 kkyun
+dtype: object
+>>> func(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "atata"
+ "taata"
+ "atata"
+ "kkyun"
+]
+
to_datetime(format=None)
+
+Parse Series with strings to a Series with Datetime dtype.
+ + +pandas defaults to nanosecond time unit, Polars to microsecond. +Prior to pandas 2.0, nanoseconds were the only time unit supported +in pandas, with no ability to set any other one. The ability to +set the time unit in pandas, if the version permits, will arrive.
+As different backends auto-infer format in different ways, if format=None
+there is no guarantee that the result will be equal.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ format
+ |
+
+ str | None
+ |
+
+
+
+ Format to use for conversion. If set to None (default), the format is +inferred from the data. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import pyarrow as pa
+>>> import narwhals as nw
+>>> data = ["2020-01-01", "2020-01-02"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+>>> s_pa = pa.chunked_array([data])
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return s.str.to_datetime(format="%Y-%m-%d")
+
We can then pass any supported library such as pandas, Polars, or PyArrow::
+>>> func(s_pd)
+0 2020-01-01
+1 2020-01-02
+dtype: datetime64[ns]
+>>> func(s_pl)
+shape: (2,)
+Series: '' [datetime[μs]]
+[
+ 2020-01-01 00:00:00
+ 2020-01-02 00:00:00
+]
+>>> func(s_pa)
+<pyarrow.lib.ChunkedArray object at 0x...>
+[
+ [
+ 2020-01-01 00:00:00.000000,
+ 2020-01-02 00:00:00.000000
+ ]
+]
+
to_lowercase()
+
+Transform string to lowercase variant.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["APPLE", "MANGO", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(lower_col=nw.col("fruits").str.to_lowercase())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits lower_col
+0 APPLE apple
+1 MANGO mango
+2 None None
+
>>> func(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ lower_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ APPLE ┆ apple │
+│ MANGO ┆ mango │
+│ null ┆ null │
+└────────┴───────────┘
+
to_uppercase()
+
+Transform string to uppercase variant.
+ + +The PyArrow backend will convert 'ß' to 'ẞ' instead of 'SS'. +For more info see: https://github.com/apache/arrow/issues/34599 +There may be other unicode-edge-case-related variations across implementations.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(upper_col=nw.col("fruits").str.to_uppercase())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits upper_col
+0 apple APPLE
+1 mango MANGO
+2 None None
+
>>> func(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ upper_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ apple ┆ APPLE │
+│ mango ┆ MANGO │
+│ null ┆ null │
+└────────┴───────────┘
+
narwhals.typing
Narwhals comes fully statically typed. In addition to nw.DataFrame
, nw.Expr
,
+nw.Series
, nw.LazyFrame
, we also provide the following type hints:
DataFrameT
A TypeVar
bound to nw.DataFrame
. Use this when you have a function which
+accepts a nw.DataFrame
and returns a nw.DataFrame
backed by the same backend, for example:
import narwhals as nw
+from narwhals.typing import DataFrameT
+
+
+@nw.narwhalify
+def func(df: DataFrameT) -> DataFrameT:
+ return df.with_columns(c=df["a"] + 1)
+
Frame
Either a nw.DataFrame
or nw.LazyFrame
. Use this if your function can work on
+either and your function doesn't care about its backend, for example:
import narwhals as nw
+from narwhals.typing import Frame
+
+
+@nw.narwhalify
+def func(df: Frame) -> list[str]:
+ return df.columns
+
FrameT
A TypeVar
bound to Frame
. Use this if your function accepts either nw.DataFrame
+or nw.LazyFrame
and returns an object backed by the same backend, for example:
import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def func(df: FrameT) -> FrameT:
+ return df.with_columns(c=nw.col("a") + 1)
+
IntoDataFrame
An object which can be converted to nw.DataFrame
(e.g. pd.DataFrame
, pl.DataFrame
).
+Use this if your function accepts a narwhalifiable object but doesn't care about its backend:
from __future__ import annotations
+
+import narwhals as nw
+from narwhals.typing import IntoDataFrame
+
+
+def func(df_native: IntoDataFrame) -> tuple[int, int]:
+ df = nw.from_native(df_native, eager_only=True)
+ return df.shape
+
IntoDataFrameT
A TypeVar
bound to IntoDataFrame
. Use this if your function accepts
+a function which can be converted to nw.DataFrame
and returns an object of the same
+class:
import narwhals as nw
+from narwhals.typing import IntoDataFrameT
+
+
+def func(df_native: IntoDataFrameT) -> IntoDataFrameT:
+ df = nw.from_native(df_native, eager_only=True)
+ return nw.to_native(df.with_columns(c=df["a"] + 1))
+
IntoExpr
Use this to mean "either a Narwhals expression, or something
+which can be converted into one". For example, exprs
in DataFrame.select
is
+typed to accept IntoExpr
, as it can either accept a nw.Expr
(e.g. df.select(nw.col('a'))
)
+or a string which will be interpreted as a nw.Expr
, e.g. df.select('a')
.
IntoFrame
An object which can be converted to nw.DataFrame
or nw.LazyFrame
+(e.g. pd.DataFrame
, pl.DataFrame
, pl.LazyFrame
). Use this if your function can accept
+an object which can be converted to either nw.DataFrame
or nw.LazyFrame
and it doesn't
+care about its backend:
import narwhals as nw
+from narwhals.typing import IntoFrame
+
+
+def func(df_native: IntoFrame) -> list[str]:
+ df = nw.from_native(df_native)
+ return df.columns
+
IntoFrameT
A TypeVar
bound to IntoFrame
. Use this if your function accepts an
+object which is convertible to nw.DataFrame
or nw.LazyFrame
and returns an object
+of the same type:
import narwhals as nw
+from narwhals.typing import IntoFrameT
+
+
+def func(df_native: IntoFrameT) -> IntoFrameT:
+ df = nw.from_native(df_native)
+ return nw.to_native(df.with_columns(c=nw.col("a") + 1))
+
nw.narwhalify
, or nw.from_native
?Although the former is more readable, the latter is better at preserving type hints.
+Here's an example: +
import polars as pl
+import narwhals as nw
+from narwhals.typing import IntoDataFrameT, DataFrameT
+
+df = pl.DataFrame({"a": [1, 2, 3]})
+
+
+def func(df: IntoDataFrameT) -> IntoDataFrameT:
+ df = nw.from_native(df, eager_only=True)
+ return nw.to_native(df.select(b=nw.col("a")))
+
+
+reveal_type(func(df))
+
+
+@nw.narwhalify(strict=True)
+def func_2(df: DataFrameT) -> DataFrameT:
+ return df.select(b=nw.col("a"))
+
+
+reveal_type(func_2(df))
+
Running mypy
on it gives:
+
$ mypy f.py
+f.py:11: note: Revealed type is "polars.dataframe.frame.DataFrame"
+f.py:17: note: Revealed type is "Any"
+Success: no issues found in 1 source file
+
In the first case, mypy can infer that df
is a polars.DataFrame
. In the second case, it can't.
If you want to make the most out of type hints and preserve them as much as possible, we recommend
+nw.from_native
and nw.to_native
- otherwise, nw.narwhalify
. Type hints will still be respected
+inside the function body if you type the arguments.