diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 000000000..e69de29bb diff --git a/404.html b/404.html new file mode 100644 index 000000000..8357e4daa --- /dev/null +++ b/404.html @@ -0,0 +1,1165 @@ + + + +
+ + + + + + + + + + + + + + +narwhals.DataFrame
Narwhals DataFrame, backed by a native dataframe.
+The native dataframe might be pandas.DataFrame, polars.DataFrame, ...
+This class is not meant to be instantiated directly - instead, use
+narwhals.from_native
.
columns: list[str]
+
+
+ property
+
+
+Get column names.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.columns
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+['foo', 'bar', 'ham']
+>>> func(df_pl)
+['foo', 'bar', 'ham']
+
schema: Schema
+
+
+ property
+
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... return df.schema
+
You can pass either pandas or Polars to func
:
>>> df_pd_schema = func(df_pd)
+>>> df_pd_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham', String})
+
>>> df_pl_schema = func(df_pl)
+>>> df_pl_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham', String})
+
shape: tuple[int, int]
+
+
+ property
+
+
+Get the shape of the DataFrame.
+ + +Examples:
+Construct pandas and polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3, 4, 5]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... return df.shape
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+(5, 1)
+>>> func(df_pl)
+(5, 1)
+
__getitem__(item)
+
+Extract column or slice of DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
item |
+
+ str | slice | Sequence[int] | tuple[Sequence[int], str | int]
+ |
+
+
+
+ how to slice dataframe: +
|
+ + required + | +
Notes:
+ In contrast with Polars, pandas allows non-string column names.
+ If you don't know whether the column name you're trying to extract
+ is definitely a string (e.g. df[df.columns[0]]
) then you should
+ use DataFrame.get_column
instead.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify(eager_only=True)
+... def func(df):
+... return df["a"]
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+0 1
+1 2
+Name: a, dtype: int64
+>>> func(df_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 1
+ 2
+]
+
clone()
+
+Create a copy of this DataFrame.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function in which we clone the DataFrame:
+>>> @nw.narwhalify
+... def func(df):
+... return df.clone()
+
>>> func(df_pd)
+ a b
+0 1 3
+1 2 4
+
>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
collect_schema()
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... return df.collect_schema()
+
You can pass either pandas or Polars to func
:
>>> df_pd_schema = func(df_pd)
+>>> df_pd_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham', String})
+
>>> df_pl_schema = func(df_pl)
+>>> df_pl_schema
+Schema({'foo': Int64, 'bar': Float64, 'ham', String})
+
drop(*columns)
+
+Remove columns from the dataframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*columns |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of the columns that should be removed from the dataframe. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop("ham")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar
+0 1 6.0
+1 2 7.0
+2 3 8.0
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 1 ┆ 6.0 │
+│ 2 ┆ 7.0 │
+│ 3 ┆ 8.0 │
+└─────┴─────┘
+
Use positional arguments to drop multiple columns.
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop("foo", "ham")
+
>>> func(df_pd)
+ bar
+0 6.0
+1 7.0
+2 8.0
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ bar │
+│ --- │
+│ f64 │
+╞═════╡
+│ 6.0 │
+│ 7.0 │
+│ 8.0 │
+└─────┘
+
drop_nulls()
+
+Drop null values.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.drop_nulls()
+... return nw.to_native(df)
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ a ba
+0 1.0 1.0
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ ba │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 1.0 ┆ 1.0 │
+└─────┴─────┘
+
filter(*predicates)
+
+Filter the rows in the DataFrame based on one or more predicate expressions.
+The original order of the remaining rows is preserved.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
predicates |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Expression(s) that evaluates to a boolean Series. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we filter on +one condition.
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter(nw.col("foo") > 1)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+1 2 7 b
+2 3 8 c
+>>> func(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Filter on multiple conditions, combined with and/or operators:
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a"))
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
>>> @nw.narwhalify
+... def func(df):
+... return df.filter((nw.col("foo") == 1) | (nw.col("ham") == "c"))
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+2 3 8 c
+>>> func(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Provide multiple filters using *args
syntax:
>>> @nw.narwhalify
+... def func(df):
+... dframe = df.filter(
+... nw.col("foo") <= 2,
+... ~nw.col("ham").is_in(["b", "c"]),
+... )
+... return dframe
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
get_column(name)
+
+Get a single column by name.
+ + +Although name
is typed as str
, pandas does allow non-string column
+names, and they will work when passed to this function if the
+narwhals.DataFrame
is backed by a pandas dataframe with non-string
+columns. This function can only be used to extract a column by name, so
+there is no risk of ambiguity.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify(eager_only=True)
+... def func(df):
+... name = df.columns[0]
+... return df.get_column(name)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+0 1
+1 2
+Name: a, dtype: int64
+>>> func(df_pl)
+shape: (2,)
+Series: 'a' [i64]
+[
+ 1
+ 2
+]
+
group_by(*keys)
+
+Start a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*keys |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to group by. Accepts multiple columns names as a list. + |
+
+ ()
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
GroupBy |
+ GroupBy[Self]
+ |
+
+
+
+ Object which can be used to perform aggregations. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we group by one column
+and call agg
to compute the grouped sum of another column.
>>> @nw.narwhalify
+... def func(df):
+... return df.group_by("a").agg(nw.col("b").sum()).sort("a")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 a 2
+1 b 5
+2 c 3
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+
Group by multiple columns by passing a list of column names.
+>>> @nw.narwhalify
+... def func(df):
+... return df.group_by(["a", "b"]).agg(nw.max("c")).sort("a", "b")
+>>> func(df_pd)
+ a b c
+0 a 1 5
+1 b 2 4
+2 b 3 2
+3 c 3 1
+>>> func(df_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
head(n=5)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of rows to return. If a negative value is passed, return all rows
+except the last |
+
+ 5
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3, 4, 5],
+... "bar": [6, 7, 8, 9, 10],
+... "ham": ["a", "b", "c", "d", "e"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function that gets the first 3 rows.
+>>> @nw.narwhalify
+... def func(df):
+... return df.head(3)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+>>> func(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
is_duplicated()
+
+Get a mask of all duplicated rows in this DataFrame.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> df_pd = pd.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... duplicated = df.is_duplicated()
+... return nw.to_native(duplicated)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+0 True
+1 False
+2 False
+3 True
+dtype: bool
+
>>> func(df_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ true
+ false
+ false
+ true
+]
+
is_empty()
+
+Check if the dataframe is empty.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
Let's define a dataframe-agnostic function that filters rows in which "foo" +values are greater than 10, and then checks if the result is empty or not:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... return df.filter(nw.col("foo") > 10).is_empty()
+
We can then pass either pandas or Polars to func
:
>>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
+>>> func(df_pd), func(df_pl)
+(True, True)
+
>>> df_pd = pd.DataFrame({"foo": [100, 2, 3], "bar": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"foo": [100, 2, 3], "bar": [4, 5, 6]})
+>>> func(df_pd), func(df_pl)
+(False, False)
+
is_unique()
+
+Get a mask of all unique rows in this DataFrame.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> df_pd = pd.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [1, 2, 3, 1],
+... "b": ["x", "y", "z", "x"],
+... }
+... )
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... unique = df.is_unique()
+... return nw.to_native(unique)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+0 False
+1 True
+2 True
+3 False
+dtype: bool
+
>>> func(df_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+ false
+]
+
item(row=None, column=None)
+
+Return the DataFrame as a scalar, or return the element at the given row/column.
+ + +If row/col not provided, this is equivalent to df[0,0], with a check that the shape is (1,1). +With row/col, this is equivalent to df[row,col].
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function that returns item at given row/column
+>>> def func(df_any, row, column):
+... df = nw.from_native(df_any)
+... return df.item(row, column)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd, 1, 1), func(df_pd, 2, "b")
+(5, 6)
+
>>> func(df_pl, 1, 1), func(df_pl, 2, "b")
+(5, 6)
+
iter_rows(*, named=False, buffer_size=512)
+
+Returns an iterator over the DataFrame of rows of python-native values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
named |
+
+ bool
+ |
+
+
+
+ By default, each row is returned as a tuple of values given +in the same order as the frame columns. Setting named=True will +return rows of dictionaries instead. + |
+
+ False
+ |
+
buffer_size |
+
+ int
+ |
+
+
+
+ Determines the number of rows that are buffered +internally while iterating over the data. +See https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_rows.html + |
+
+ 512
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> def func(df_any, *, named):
+... df = nw.from_native(df_any)
+... return df.iter_rows(named=named)
+
We can then pass either pandas or Polars to func
:
>>> [row for row in func(df_pd, named=False)]
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> [row for row in func(df_pd, named=True)]
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+>>> [row for row in func(df_pl, named=False)]
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> [row for row in func(df_pl, named=True)]
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+
join(other, *, how='inner', left_on=None, right_on=None)
+
+Join in SQL-like fashion.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
other |
+
+ Self
+ |
+
+
+
+ DataFrame to join with. + |
+ + required + | +
how |
+
+ Literal['inner', 'left', 'cross', 'semi', 'anti']
+ |
+
+
+
+ Join strategy. +
|
+
+ 'inner'
+ |
+
left_on |
+
+ str | list[str] | None
+ |
+
+
+
+ Name(s) of the left join column(s). + |
+
+ None
+ |
+
right_on |
+
+ str | list[str] | None
+ |
+
+
+
+ Name(s) of the right join column(s). + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined DataFrame + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> data_other = {
+... "apple": ["x", "y", "z"],
+... "ham": ["a", "b", "d"],
+... }
+
>>> df_pd = pd.DataFrame(data)
+>>> other_pd = pd.DataFrame(data_other)
+
>>> df_pl = pl.DataFrame(data)
+>>> other_pl = pl.DataFrame(data_other)
+
Let's define a dataframe-agnostic function in which we join over "ham" column:
+>>> @nw.narwhalify
+... def join_on_ham(df, other):
+... return df.join(other, left_on="ham", right_on="ham")
+
We can now pass either pandas or Polars to the function:
+>>> join_on_ham(df_pd, other_pd)
+ foo bar ham apple
+0 1 6.0 a x
+1 2 7.0 b y
+
>>> join_on_ham(df_pl, other_pl)
+shape: (2, 4)
+┌─────┬─────┬─────┬───────┐
+│ foo ┆ bar ┆ ham ┆ apple │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str ┆ str │
+╞═════╪═════╪═════╪═══════╡
+│ 1 ┆ 6.0 ┆ a ┆ x │
+│ 2 ┆ 7.0 ┆ b ┆ y │
+└─────┴─────┴─────┴───────┘
+
lazy()
+
+Lazify the DataFrame (if possible).
+If a library does not support lazy execution, then this is a no-op.
+ + +Examples:
+Construct pandas and Polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df_any):
+... return df_any.lazy()
+
Note that then, pandas dataframe stay eager, but Polars DataFrame becomes a Polars LazyFrame:
+>>> func(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> func(df_pl)
+<LazyFrame ...>
+
null_count()
+
+Create a new DataFrame that shows the null counts per column.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> df_pd = pd.DataFrame(
+... {
+... "foo": [1, None, 3],
+... "bar": [6, 7, None],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, None, 3],
+... "bar": [6, 7, None],
+... "ham": ["a", "b", "c"],
+... }
+... )
+
Let's define a dataframe-agnostic function that returns the null count of +each columns:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... null_counts = df.null_count()
+... return nw.to_native(null_counts)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 1 0
+
>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ u32 ┆ u32 │
+╞═════╪═════╪═════╡
+│ 1 ┆ 1 ┆ 0 │
+└─────┴─────┴─────┘
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.pipe(
+... lambda _df: _df.select([x for x in _df.columns if len(x) == 1])
+... )
+... return nw.to_native(df)
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ a
+0 1
+1 2
+2 3
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
rename(mapping)
+
+Rename column names.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
mapping |
+
+ dict[str, str]
+ |
+
+
+
+ Key value pairs that map from old name to new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.rename({"foo": "apple"})
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ apple bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+>>> func(df_pl)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ apple ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═══════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└───────┴─────┴─────┘
+
rows(*, named=False)
+
+Returns all data in the DataFrame as a list of rows of python-native values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
named |
+
+ bool
+ |
+
+
+
+ By default, each row is returned as a tuple of values given +in the same order as the frame columns. Setting named=True will +return rows of dictionaries instead. + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> def func(df_any, *, named):
+... df = nw.from_native(df_any)
+... return df.rows(named=named)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd, named=False)
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> func(df_pd, named=True)
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+>>> func(df_pl, named=False)
+[(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')]
+>>> func(df_pl, named=True)
+[{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}]
+
select(*exprs, **named_exprs)
+
+Select columns from this DataFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to select, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, + other non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
**named_exprs |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to select, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we pass the name of a +column to select that column.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select("foo")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo
+0 1
+1 2
+2 3
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
Multiple columns can be selected by passing a list of column names.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(["foo", "bar"])
+>>> func(df_pd)
+ foo bar
+0 1 6
+1 2 7
+2 3 8
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+
Multiple columns can also be selected using positional arguments instead of a +list. Expressions are also accepted.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("foo"), nw.col("bar") + 1)
+>>> func(df_pd)
+ foo bar
+0 1 7
+1 2 8
+2 3 9
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
Use keyword arguments to easily name your expression inputs.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(threshold=nw.col("foo") * 2)
+>>> func(df_pd)
+ threshold
+0 2
+1 4
+2 6
+>>> func(df_pl)
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+
sort(by, *more_by, descending=False)
+
+Sort the dataframe by the given columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
by |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) names to sort by. + |
+ + required + | +
*more_by |
+
+ str
+ |
+
+
+
+ Additional columns to sort by, specified as positional + arguments. + |
+
+ ()
+ |
+
descending |
+
+ bool | Sequence[bool]
+ |
+
+
+
+ Sort in descending order. When sorting by multiple + columns, can be specified per column by passing a + sequence of booleans. + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "a": [1, 2, None],
+... "b": [6.0, 5.0, 4.0],
+... "c": ["a", "c", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function in which we sort by multiple +columns in different orders
+>>> @nw.narwhalify
+... def func(df):
+... return df.sort("c", "a", descending=[False, True])
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b c
+0 1.0 6.0 a
+2 NaN 4.0 b
+1 2.0 5.0 c
+>>> func(df_pl)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ null ┆ 4.0 ┆ b │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
tail(n=5)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of rows to return. If a negative value is passed, return all rows
+except the first |
+
+ 5
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3, 4, 5],
+... "bar": [6, 7, 8, 9, 10],
+... "ham": ["a", "b", "c", "d", "e"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function that gets the last 3 rows.
+>>> @nw.narwhalify
+... def func(df):
+... return df.tail(3)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+2 3 8 c
+3 4 9 d
+4 5 10 e
+>>> func(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 3 ┆ 8 ┆ c │
+│ 4 ┆ 9 ┆ d │
+│ 5 ┆ 10 ┆ e │
+└─────┴─────┴─────┘
+
to_dict(*, as_series=True)
+
+Convert DataFrame to a dictionary mapping column name to values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
as_series |
+
+ bool
+ |
+
+
+
+ If set to true |
+
+ True
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "A": [1, 2, 3, 4, 5],
+... "fruits": ["banana", "banana", "apple", "apple", "banana"],
+... "B": [5, 4, 3, 2, 1],
+... "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
+... "optional": [28, 300, None, 2, -30],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.to_dict(as_series=False)
+... return df
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'cars': ['beetle', 'audi', 'beetle', 'beetle', 'beetle'], 'optional': [28.0, 300.0, nan, 2.0, -30.0]}
+>>> func(df_pl)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'cars': ['beetle', 'audi', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]}
+
to_numpy()
+
+Convert this DataFrame to a NumPy ndarray.
+ + +Examples:
+Construct pandas and polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.5, 7.0, 8.5], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.to_numpy()
+... return df
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+>>> func(df_pl)
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+
to_pandas()
+
+Convert this DataFrame to a pandas DataFrame.
+ + +Examples:
+Construct pandas and Polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.to_pandas()
+... return df
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> func(df_pl)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+
unique(subset)
+
+Drop duplicate rows from this dataframe.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
subset |
+
+ str | list[str]
+ |
+
+
+
+ Column name(s) to consider when identifying duplicate rows. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3, 1],
+... "bar": ["a", "a", "a", "a"],
+... "ham": ["b", "b", "b", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.unique(["bar", "ham"])
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 a b
+>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+└─────┴─────┴─────┘
+
with_columns(*exprs, **named_exprs)
+
+Add columns to this DataFrame.
+Added columns will replace existing columns with the same name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
**named_exprs |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
DataFrame |
+ Self
+ |
+
+
+
+ A new DataFrame with the columns added. + |
+
Creating a new DataFrame using this method does not create a new copy of +existing data.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "a": [1, 2, 3, 4],
+... "b": [0.5, 4, 10, 13],
+... "c": [True, True, False, True],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
Let's define a dataframe-agnostic function in which we pass an expression +to add it as a new column:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns((nw.col("a") * 2).alias("a*2"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b c a*2
+0 1 0.5 True 2
+1 2 4.0 True 4
+2 3 10.0 False 6
+3 4 13.0 True 8
+>>> func(df_pl)
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ a*2 │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+
with_row_index(name='index')
+
+Insert column which enumerates rows.
+ + +Examples:
+Construct pandas as polars DataFrames:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.with_row_index()
+... return nw.to_native(df)
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ index a b
+0 0 1 4
+1 1 2 5
+2 2 3 6
+>>> func(df_pl)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ index ┆ a ┆ b │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ i64 ┆ i64 │
+╞═══════╪═════╪═════╡
+│ 0 ┆ 1 ┆ 4 │
+│ 1 ┆ 2 ┆ 5 │
+│ 2 ┆ 3 ┆ 6 │
+└───────┴─────┴─────┘
+
write_parquet(file)
+
+Write dataframe to parquet file.
+ + +Examples:
+Construct pandas and Polars DataFrames:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+
We define a library agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df.write_parquet("foo.parquet")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+>>> func(df_pl)
+
narwhals.dependencies
get_pandas()
+
+Get pandas module (if already imported - else return None).
+ +get_polars()
+
+Get Polars module (if already imported - else return None).
+ +get_modin()
+
+Get modin.pandas module (if already imported - else return None).
+ +get_cudf()
+
+Get cudf module (if already imported - else return None).
+ +get_pyarrow()
+
+Get pyarrow module (if already imported - else return None).
+ +is_pandas_dataframe(df)
+
+Check whether df
is a pandas DataFrame without importing pandas.
narwhals.dtypes
Int64
+
+
+Int32
+
+
+Int16
+
+
+Int8
+
+
+UInt64
+
+
+UInt32
+
+
+UInt16
+
+
+UInt8
+
+
+Float64
+
+
+Float32
+
+
+Boolean
+
+
+Categorical
+
+
+Enum
+
+
+String
+
+
+Datetime
+
+
+Duration
+
+
+Object
+
+
+Unknown
+
+
+narwhals.Expr
abs()
+
+Return absolute value of each element.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1, -2], "b": [-3, 4]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").abs())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 1 3
+1 2 4
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
alias(name)
+
+Rename the expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
name |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 5]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df_any):
+... return df_any.select((nw.col("b") + 10).alias("c"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ c
+0 14
+1 15
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ c │
+│ --- │
+│ i64 │
+╞═════╡
+│ 14 │
+│ 15 │
+└─────┘
+
all()
+
+Return whether all values in the column are True
.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
+>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").all())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 False True
+>>> func(df_pl)
+shape: (1, 2)
+┌───────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪══════╡
+│ false ┆ true │
+└───────┴──────┘
+
any()
+
+Return whether any of the values in the column are True
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
+>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").any())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 True True
+>>> func(df_pl)
+shape: (1, 2)
+┌──────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞══════╪══════╡
+│ true ┆ true │
+└──────┴──────┘
+
cast(dtype)
+
+Redefine an object's data type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
dtype |
+
+ Any
+ |
+
+
+
+ Data type that the object will be cast into. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> from datetime import date
+>>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+>>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df_any):
+... return df_any.select(
+... nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar
+0 1.0 6
+1 2.0 7
+2 3.0 8
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ f32 ┆ u8 │
+╞═════╪═════╡
+│ 1.0 ┆ 6 │
+│ 2.0 ┆ 7 │
+│ 3.0 ┆ 8 │
+└─────┴─────┘
+
count()
+
+Returns the number of non-null elements in the column.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().count())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 3 2
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 3 ┆ 2 │
+└─────┴─────┘
+
cum_sum()
+
+Return cumulative sum.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").cum_sum())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 1 2
+1 2 6
+2 5 10
+3 10 16
+4 15 22
+>>> func(df_pl)
+shape: (5, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+│ 2 ┆ 6 │
+│ 5 ┆ 10 │
+│ 10 ┆ 16 │
+│ 15 ┆ 22 │
+└─────┴─────┘
+
diff()
+
+Returns the difference between each element and the previous one.
+ + +pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to calculate
+the diff and fill missing values with 0
in a Int64 column, you could
+do:
nw.col("a").diff().fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(a_diff=nw.col("a").diff())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a_diff
+0 NaN
+1 0.0
+2 2.0
+3 2.0
+4 0.0
+>>> func(df_pl)
+shape: (5, 1)
+┌────────┐
+│ a_diff │
+│ --- │
+│ i64 │
+╞════════╡
+│ null │
+│ 0 │
+│ 2 │
+│ 2 │
+│ 0 │
+└────────┘
+
drop_nulls()
+
+Remove missing values.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
>>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+>>> df_pl = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").drop_nulls())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+0 2.0
+1 4.0
+3 3.0
+5 5.0
+>>> func(df_pl) # nan != null for polars
+shape: (5, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 2.0 │
+│ 4.0 │
+│ NaN │
+│ 3.0 │
+│ 5.0 │
+└─────┘
+
fill_null(value)
+
+Fill null values with given value.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame(
+... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+... )
+>>> df_pl = pl.DataFrame(
+... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+... )
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(nw.col("a", "b").fill_null(0))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 2.0 2.0
+1 4.0 4.0
+2 0.0 0.0
+3 3.0 3.0
+4 5.0 5.0
+
>>> func(df_pl) # nan != null for polars
+shape: (5, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 2.0 │
+│ 4 ┆ 4.0 │
+│ 0 ┆ NaN │
+│ 3 ┆ 3.0 │
+│ 5 ┆ 5.0 │
+└─────┴─────┘
+
filter(*predicates)
+
+Filters elements based on a condition, returning a new expression.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
+>>> df_pl = pl.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(
+... nw.col("a").filter(nw.col("a") > 4),
+... nw.col("b").filter(nw.col("b") < 13),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+3 5 10
+4 6 11
+5 7 12
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 5 ┆ 10 │
+│ 6 ┆ 11 │
+│ 7 ┆ 12 │
+└─────┴─────┘
+
head(n=10)
+
+Get the first n
rows.
Arguments + n : int + Number of rows to return.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": list(range(10))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function that returns the first 3 rows:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").head(3))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+0 0
+1 1
+2 2
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 0 │
+│ 1 │
+│ 2 │
+└─────┘
+
is_between(lower_bound, upper_bound, closed='both')
+
+Check if this expression is between the given lower and upper bounds.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
lower_bound |
+
+ Any
+ |
+
+
+
+ Lower bound value. + |
+ + required + | +
upper_bound |
+
+ Any
+ |
+
+
+
+ Upper bound value. + |
+ + required + | +
closed |
+
+ str
+ |
+
+
+
+ Define which sides of the interval are closed (inclusive). + |
+
+ 'both'
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").is_between(2, 4, "right"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+0 False
+1 False
+2 True
+3 True
+4 False
+>>> func(df_pl)
+shape: (5, 1)
+┌───────┐
+│ a │
+│ --- │
+│ bool │
+╞═══════╡
+│ false │
+│ false │
+│ true │
+│ true │
+│ false │
+└───────┘
+
is_duplicated()
+
+Return a boolean mask indicating duplicated values.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().is_duplicated())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 True True
+1 False True
+2 False False
+3 True False
+>>> func(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ true ┆ true │
+│ false ┆ true │
+│ false ┆ false │
+│ true ┆ false │
+└───────┴───────┘
+
is_first_distinct()
+
+Return a boolean mask indicating the first occurrence of each distinct value.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().is_first_distinct())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 True True
+1 True False
+2 True True
+3 False True
+>>> func(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ true ┆ true │
+│ true ┆ false │
+│ true ┆ true │
+│ false ┆ true │
+└───────┴───────┘
+
is_in(other)
+
+Check if elements of this expression are present in the other iterable.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
other |
+
+ Any
+ |
+
+
+
+ iterable + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2, 9, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 9, 10]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(b=nw.col("a").is_in([1, 2]))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 1 True
+1 2 True
+2 9 False
+3 10 False
+
>>> func(df_pl)
+shape: (4, 2)
+┌─────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ bool │
+╞═════╪═══════╡
+│ 1 ┆ true │
+│ 2 ┆ true │
+│ 9 ┆ false │
+│ 10 ┆ false │
+└─────┴───────┘
+
is_last_distinct()
+
+Return a boolean mask indicating the last occurrence of each distinct value.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().is_last_distinct())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 False False
+1 True True
+2 True True
+3 True True
+>>> func(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ false ┆ false │
+│ true ┆ true │
+│ true ┆ true │
+│ true ┆ true │
+└───────┴───────┘
+
is_null()
+
+Returns a boolean Series indicating which values are null.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame(
+... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+... )
+>>> df_pl = pl.DataFrame(
+... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+... )
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b a_is_null b_is_null
+0 2.0 2.0 False False
+1 4.0 4.0 False False
+2 NaN NaN True True
+3 3.0 3.0 False False
+4 5.0 5.0 False False
+
>>> func(df_pl) # nan != null for polars
+shape: (5, 4)
+┌──────┬─────┬───────────┬───────────┐
+│ a ┆ b ┆ a_is_null ┆ b_is_null │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ bool │
+╞══════╪═════╪═══════════╪═══════════╡
+│ 2 ┆ 2.0 ┆ false ┆ false │
+│ 4 ┆ 4.0 ┆ false ┆ false │
+│ null ┆ NaN ┆ true ┆ false │
+│ 3 ┆ 3.0 ┆ false ┆ false │
+│ 5 ┆ 5.0 ┆ false ┆ false │
+└──────┴─────┴───────────┴───────────┘
+
is_unique()
+
+Return a boolean mask indicating unique values.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().is_unique())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 False False
+1 True False
+2 True True
+3 False True
+>>> func(df_pl)
+shape: (4, 2)
+┌───────┬───────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪═══════╡
+│ false ┆ false │
+│ true ┆ false │
+│ true ┆ true │
+│ false ┆ true │
+└───────┴───────┘
+
len()
+
+Return the number of elements in the column.
+Null values count towards the total.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function that computes the len over different values of "b" column:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(
+... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
+... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a1 a2
+0 2 1
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a1 ┆ a2 │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 2 ┆ 1 │
+└─────┴─────┘
+
max()
+
+Returns the maximum value(s) from a column(s).
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
+>>> df_pl = pl.DataFrame({"a": [10, 20], "b": [50, 100]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.max("a", "b"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 20 100
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 20 ┆ 100 │
+└─────┴─────┘
+
mean()
+
+Get mean value.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
+>>> df_pl = pl.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").mean())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 0.0 4.0
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 0.0 ┆ 4.0 │
+└─────┴─────┘
+
min()
+
+Returns the minimum value(s) from a column(s).
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 3]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.min("a", "b"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 1 3
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+└─────┴─────┘
+
null_count()
+
+Count null values.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all().null_count())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 1 2
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+└─────┴─────┘
+
n_unique()
+
+Returns count of unique values
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").n_unique())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 5 3
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 5 ┆ 3 │
+└─────┴─────┘
+
over(*keys)
+
+Compute expressions over the given groups.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
keys |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of columns to compute window expression over.
+ Must be names of columns, as opposed to expressions -
+ so, this is a bit less flexible than Polars' |
+
+ ()
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(a_min_per_group=nw.col("a").min().over("b"))
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ a b a_min_per_group
+0 1 1 1
+1 2 1 1
+2 3 2 3
+>>> func(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────────────────┐
+│ a ┆ b ┆ a_min_per_group │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞═════╪═════╪═════════════════╡
+│ 1 ┆ 1 ┆ 1 │
+│ 2 ┆ 1 ┆ 1 │
+│ 3 ┆ 2 ┆ 3 │
+└─────┴─────┴─────────────────┘
+
quantile(quantile, interpolation)
+
+Get quantile value.
+ + +pandas and Polars may have implementation differences for a given interpolation method.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
quantile |
+ + | +
+
+
+ float +Quantile between 0.0 and 1.0. + |
+ + required + | +
interpolation |
+ + | +
+
+
+ {'nearest', 'higher', 'lower', 'midpoint', 'linear'} +Interpolation method. + |
+ + required + | +
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": list(range(50)), "b": list(range(50, 100))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").quantile(0.5, interpolation="linear"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 24.5 74.5
+
>>> func(df_pl)
+shape: (1, 2)
+┌──────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════╪══════╡
+│ 24.5 ┆ 74.5 │
+└──────┴──────┘
+
round(decimals=0)
+
+Round underlying floating point data by decimals
digits.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
decimals |
+
+ int
+ |
+
+
+
+ Number of decimals to round by. + |
+
+ 0
+ |
+
For values exactly halfway between rounded decimal values pandas and Polars behave differently.
+pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and +4.5 to 4.0, etc..).
+Polars rounds away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": [1.12345, 2.56789, 3.901234]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function that rounds to the first decimal:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").round(1))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+0 1.1
+1 2.6
+2 3.9
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 1.1 │
+│ 2.6 │
+│ 3.9 │
+└─────┘
+
sample(n=None, fraction=None, *, with_replacement=False)
+
+Sample randomly from this expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int | None
+ |
+
+
+
+ Number of items to return. Cannot be used with fraction. + |
+
+ None
+ |
+
fraction |
+
+ float | None
+ |
+
+
+
+ Fraction of items to return. Cannot be used with n. + |
+
+ None
+ |
+
with_replacement |
+
+ bool
+ |
+
+
+
+ Allow values to be sampled more than once. + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
>>> df_pd = pd.DataFrame({"a": [1, 2, 3]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").sample(fraction=1.0, with_replacement=True))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+2 3
+0 1
+2 3
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 2 │
+│ 3 │
+│ 3 │
+└─────┘
+
shift(n)
+
+Shift values by n
positions.
pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to shift
+and fill missing values with 0
in a Int64 column, you could
+do:
nw.col("a").shift(1).fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(a_shift=nw.col("a").shift(n=1))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a_shift
+0 NaN
+1 1.0
+2 1.0
+3 3.0
+4 5.0
+>>> func(df_pl)
+shape: (5, 1)
+┌─────────┐
+│ a_shift │
+│ --- │
+│ i64 │
+╞═════════╡
+│ null │
+│ 1 │
+│ 1 │
+│ 3 │
+│ 5 │
+└─────────┘
+
sort(*, descending=False)
+
+Sort this column. Place null values first.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
descending |
+
+ bool
+ |
+
+
+
+ Sort in descending order. + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
>>> df_pd = pd.DataFrame({"a": [5, None, 1, 2]})
+>>> df_pl = pl.DataFrame({"a": [5, None, 1, 2]})
+
Let's define dataframe-agnostic functions:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").sort())
+
>>> def func_descend(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.col("a").sort(descending=True))
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+1 NaN
+2 1.0
+3 2.0
+0 5.0
+>>> func(df_pl)
+shape: (4, 1)
+┌──────┐
+│ a │
+│ --- │
+│ i64 │
+╞══════╡
+│ null │
+│ 1 │
+│ 2 │
+│ 5 │
+└──────┘
+
>>> func_descend(df_pd)
+ a
+1 NaN
+0 5.0
+3 2.0
+2 1.0
+>>> func_descend(df_pl)
+shape: (4, 1)
+┌──────┐
+│ a │
+│ --- │
+│ i64 │
+╞══════╡
+│ null │
+│ 5 │
+│ 2 │
+│ 1 │
+└──────┘
+
std(*, ddof=1)
+
+Get standard deviation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
ddof |
+
+ int
+ |
+
+
+
+ “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, + where N represents the number of elements. By default ddof is 1. + |
+
+ 1
+ |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+>>> df_pl = pl.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").std(ddof=0))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 17.79513 1.265789
+>>> func(df_pl)
+shape: (1, 2)
+┌──────────┬──────────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════════╪══════════╡
+│ 17.79513 ┆ 1.265789 │
+└──────────┴──────────┘
+
sum()
+
+Return the sum value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [5, 10], "b": [50, 100]})
+>>> df_pl = pl.DataFrame({"a": [5, 10], "b": [50, 100]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").sum())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 15 150
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 15 ┆ 150 │
+└─────┴─────┘
+
tail(n=10)
+
+Get the last n
rows.
Arguments + n : int + Number of rows to return.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {"a": list(range(10))}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function that returns the last 3 rows:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").tail(3))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+7 7
+8 8
+9 9
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 7 │
+│ 8 │
+│ 9 │
+└─────┘
+
unique()
+
+Return unique values
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a", "b").unique())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 1 2
+1 3 4
+2 5 6
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+│ 3 ┆ 4 │
+│ 5 ┆ 6 │
+└─────┴─────┘
+
narwhals.Expr.cat
get_categories()
+
+Get unique categories from column.
+ + +Examples:
+Let's create some dataframes:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["apple", "mango", "mango"]}
+>>> df_pd = pd.DataFrame(data, dtype="category")
+>>> df_pl = pl.DataFrame(data, schema={"fruits": pl.Categorical})
+
We define a dataframe-agnostic function to get unique categories +from column 'fruits':
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("fruits").cat.get_categories())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits
+0 apple
+1 mango
+>>> func(df_pl)
+shape: (2, 1)
+┌────────┐
+│ fruits │
+│ --- │
+│ str │
+╞════════╡
+│ apple │
+│ mango │
+└────────┘
+
narwhals.Expr.dt
year()
+
+Extract year from underlying DateTime representation.
+Returns the year number in the calendar date.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(nw.col("datetime").dt.year().alias("year"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime year
+0 1978-06-01 1978
+1 2024-12-13 2024
+2 2065-01-01 2065
+>>> func(df_pl)
+shape: (3, 2)
+┌─────────────────────┬──────┐
+│ datetime ┆ year │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i32 │
+╞═════════════════════╪══════╡
+│ 1978-06-01 00:00:00 ┆ 1978 │
+│ 2024-12-13 00:00:00 ┆ 2024 │
+│ 2065-01-01 00:00:00 ┆ 2065 │
+└─────────────────────┴──────┘
+
month()
+
+Extract month from underlying DateTime representation.
+Returns the month number starting from 1. The return value ranges from 1 to 12.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.year().alias("year"),
+... nw.col("datetime").dt.month().alias("month"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime year month
+0 1978-06-01 1978 6
+1 2024-12-13 2024 12
+2 2065-01-01 2065 1
+>>> func(df_pl)
+shape: (3, 3)
+┌─────────────────────┬──────┬───────┐
+│ datetime ┆ year ┆ month │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i32 ┆ i8 │
+╞═════════════════════╪══════╪═══════╡
+│ 1978-06-01 00:00:00 ┆ 1978 ┆ 6 │
+│ 2024-12-13 00:00:00 ┆ 2024 ┆ 12 │
+│ 2065-01-01 00:00:00 ┆ 2065 ┆ 1 │
+└─────────────────────┴──────┴───────┘
+
day()
+
+Extract day from underlying DateTime representation.
+Returns the day of month starting from 1. The return value ranges from 1 to 31. (The last day of month differs by months.)
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 6, 1),
+... datetime(2024, 12, 13),
+... datetime(2065, 1, 1),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.year().alias("year"),
+... nw.col("datetime").dt.month().alias("month"),
+... nw.col("datetime").dt.day().alias("day"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime year month day
+0 1978-06-01 1978 6 1
+1 2024-12-13 2024 12 13
+2 2065-01-01 2065 1 1
+>>> func(df_pl)
+shape: (3, 4)
+┌─────────────────────┬──────┬───────┬─────┐
+│ datetime ┆ year ┆ month ┆ day │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i32 ┆ i8 ┆ i8 │
+╞═════════════════════╪══════╪═══════╪═════╡
+│ 1978-06-01 00:00:00 ┆ 1978 ┆ 6 ┆ 1 │
+│ 2024-12-13 00:00:00 ┆ 2024 ┆ 12 ┆ 13 │
+│ 2065-01-01 00:00:00 ┆ 2065 ┆ 1 ┆ 1 │
+└─────────────────────┴──────┴───────┴─────┘
+
ordinal_day()
+
+Get ordinal day.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {"a": [datetime(2020, 1, 1), datetime(2020, 8, 3)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(a_ordinal_day=nw.col("a").dt.ordinal_day())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_ordinal_day
+0 2020-01-01 1
+1 2020-08-03 216
+>>> func(df_pl)
+shape: (2, 2)
+┌─────────────────────┬───────────────┐
+│ a ┆ a_ordinal_day │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i16 │
+╞═════════════════════╪═══════════════╡
+│ 2020-01-01 00:00:00 ┆ 1 │
+│ 2020-08-03 00:00:00 ┆ 216 │
+└─────────────────────┴───────────────┘
+
hour()
+
+Extract hour from underlying DateTime representation.
+Returns the hour number from 0 to 23.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1),
+... datetime(2024, 10, 13, 5),
+... datetime(2065, 1, 1, 10),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(nw.col("datetime").dt.hour().alias("hour"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour
+0 1978-01-01 01:00:00 1
+1 2024-10-13 05:00:00 5
+2 2065-01-01 10:00:00 10
+>>> func(df_pl)
+shape: (3, 2)
+┌─────────────────────┬──────┐
+│ datetime ┆ hour │
+│ --- ┆ --- │
+│ datetime[μs] ┆ i8 │
+╞═════════════════════╪══════╡
+│ 1978-01-01 01:00:00 ┆ 1 │
+│ 2024-10-13 05:00:00 ┆ 5 │
+│ 2065-01-01 10:00:00 ┆ 10 │
+└─────────────────────┴──────┘
+
minute()
+
+Extract minutes from underlying DateTime representation.
+Returns the minute number from 0 to 59.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1),
+... datetime(2024, 10, 13, 5, 30),
+... datetime(2065, 1, 1, 10, 20),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour minute
+0 1978-01-01 01:01:00 1 1
+1 2024-10-13 05:30:00 5 30
+2 2065-01-01 10:20:00 10 20
+>>> func(df_pl)
+shape: (3, 3)
+┌─────────────────────┬──────┬────────┐
+│ datetime ┆ hour ┆ minute │
+│ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 │
+╞═════════════════════╪══════╪════════╡
+│ 1978-01-01 01:01:00 ┆ 1 ┆ 1 │
+│ 2024-10-13 05:30:00 ┆ 5 ┆ 30 │
+│ 2065-01-01 10:20:00 ┆ 10 ┆ 20 │
+└─────────────────────┴──────┴────────┘
+
second()
+
+Extract seconds from underlying DateTime representation.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1),
+... datetime(2024, 10, 13, 5, 30, 14),
+... datetime(2065, 1, 1, 10, 20, 30),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour minute second
+0 1978-01-01 01:01:01 1 1 1
+1 2024-10-13 05:30:14 5 30 14
+2 2065-01-01 10:20:30 10 20 30
+>>> func(df_pl)
+shape: (3, 4)
+┌─────────────────────┬──────┬────────┬────────┐
+│ datetime ┆ hour ┆ minute ┆ second │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 │
+╞═════════════════════╪══════╪════════╪════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 │
+│ 2024-10-13 05:30:14 ┆ 5 ┆ 30 ┆ 14 │
+│ 2065-01-01 10:20:30 ┆ 10 ┆ 20 ┆ 30 │
+└─────────────────────┴──────┴────────┴────────┘
+
millisecond()
+
+Extract milliseconds from underlying DateTime representation.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 505000),
+... datetime(2065, 1, 1, 10, 20, 30, 67000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... nw.col("datetime").dt.millisecond().alias("millisecond"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour minute second millisecond
+0 1978-01-01 01:01:01.000 1 1 1 0
+1 2024-10-13 05:30:14.505 5 30 14 505
+2 2065-01-01 10:20:30.067 10 20 30 67
+>>> func(df_pl)
+shape: (3, 5)
+┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
+│ datetime ┆ hour ┆ minute ┆ second ┆ millisecond │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
+╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
+│ 2024-10-13 05:30:14.505 ┆ 5 ┆ 30 ┆ 14 ┆ 505 │
+│ 2065-01-01 10:20:30.067 ┆ 10 ┆ 20 ┆ 30 ┆ 67 │
+└─────────────────────────┴──────┴────────┴────────┴─────────────┘
+
microsecond()
+
+Extract microseconds from underlying DateTime representation.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 505000),
+... datetime(2065, 1, 1, 10, 20, 30, 67000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... nw.col("datetime").dt.microsecond().alias("microsecond"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour minute second microsecond
+0 1978-01-01 01:01:01.000 1 1 1 0
+1 2024-10-13 05:30:14.505 5 30 14 505000
+2 2065-01-01 10:20:30.067 10 20 30 67000
+>>> func(df_pl)
+shape: (3, 5)
+┌─────────────────────────┬──────┬────────┬────────┬─────────────┐
+│ datetime ┆ hour ┆ minute ┆ second ┆ microsecond │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
+╞═════════════════════════╪══════╪════════╪════════╪═════════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
+│ 2024-10-13 05:30:14.505 ┆ 5 ┆ 30 ┆ 14 ┆ 505000 │
+│ 2065-01-01 10:20:30.067 ┆ 10 ┆ 20 ┆ 30 ┆ 67000 │
+└─────────────────────────┴──────┴────────┴────────┴─────────────┘
+
nanosecond()
+
+Extract Nanoseconds from underlying DateTime representation
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = {
+... "datetime": [
+... datetime(1978, 1, 1, 1, 1, 1, 0),
+... datetime(2024, 10, 13, 5, 30, 14, 500000),
+... datetime(2065, 1, 1, 10, 20, 30, 60000),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... nw.col("datetime").dt.hour().alias("hour"),
+... nw.col("datetime").dt.minute().alias("minute"),
+... nw.col("datetime").dt.second().alias("second"),
+... nw.col("datetime").dt.nanosecond().alias("nanosecond"),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ datetime hour minute second nanosecond
+0 1978-01-01 01:01:01.000 1 1 1 0
+1 2024-10-13 05:30:14.500 5 30 14 500000000
+2 2065-01-01 10:20:30.060 10 20 30 60000000
+>>> func(df_pl)
+shape: (3, 5)
+┌─────────────────────────┬──────┬────────┬────────┬────────────┐
+│ datetime ┆ hour ┆ minute ┆ second ┆ nanosecond │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ datetime[μs] ┆ i8 ┆ i8 ┆ i8 ┆ i32 │
+╞═════════════════════════╪══════╪════════╪════════╪════════════╡
+│ 1978-01-01 01:01:01 ┆ 1 ┆ 1 ┆ 1 ┆ 0 │
+│ 2024-10-13 05:30:14.500 ┆ 5 ┆ 30 ┆ 14 ┆ 500000000 │
+│ 2065-01-01 10:20:30.060 ┆ 10 ┆ 20 ┆ 30 ┆ 60000000 │
+└─────────────────────────┴──────┴────────┴────────┴────────────┘
+
total_minutes()
+
+Get total minutes.
+ + +The function outputs the total minutes in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = {"a": [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(a_total_minutes=nw.col("a").dt.total_minutes())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_total_minutes
+0 0 days 00:10:00 10
+1 0 days 00:20:40 20
+>>> func(df_pl)
+shape: (2, 2)
+┌──────────────┬─────────────────┐
+│ a ┆ a_total_minutes │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪═════════════════╡
+│ 10m ┆ 10 │
+│ 20m 40s ┆ 20 │
+└──────────────┴─────────────────┘
+
total_seconds()
+
+Get total seconds.
+ + +The function outputs the total seconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = {"a": [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(a_total_seconds=nw.col("a").dt.total_seconds())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_total_seconds
+0 0 days 00:00:10 10
+1 0 days 00:00:20.040000 20
+>>> func(df_pl)
+shape: (2, 2)
+┌──────────────┬─────────────────┐
+│ a ┆ a_total_seconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪═════════════════╡
+│ 10s ┆ 10 │
+│ 20s 40ms ┆ 20 │
+└──────────────┴─────────────────┘
+
total_milliseconds()
+
+Get total milliseconds.
+ + +The function outputs the total milliseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = {
+... "a": [
+... timedelta(milliseconds=10),
+... timedelta(milliseconds=20, microseconds=40),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... a_total_milliseconds=nw.col("a").dt.total_milliseconds()
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_total_milliseconds
+0 0 days 00:00:00.010000 10
+1 0 days 00:00:00.020040 20
+>>> func(df_pl)
+shape: (2, 2)
+┌──────────────┬──────────────────────┐
+│ a ┆ a_total_milliseconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪══════════════════════╡
+│ 10ms ┆ 10 │
+│ 20040µs ┆ 20 │
+└──────────────┴──────────────────────┘
+
total_microseconds()
+
+Get total microseconds.
+ + +The function outputs the total microseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = {
+... "a": [
+... timedelta(microseconds=10),
+... timedelta(milliseconds=1, microseconds=200),
+... ]
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... a_total_microseconds=nw.col("a").dt.total_microseconds()
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_total_microseconds
+0 0 days 00:00:00.000010 10
+1 0 days 00:00:00.001200 1200
+>>> func(df_pl)
+shape: (2, 2)
+┌──────────────┬──────────────────────┐
+│ a ┆ a_total_microseconds │
+│ --- ┆ --- │
+│ duration[μs] ┆ i64 │
+╞══════════════╪══════════════════════╡
+│ 10µs ┆ 10 │
+│ 1200µs ┆ 1200 │
+└──────────────┴──────────────────────┘
+
total_nanoseconds()
+
+Get total nanoseconds.
+ + +The function outputs the total nanoseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
and cast
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"]
+>>> df_pd = pd.DataFrame({"a": pd.to_datetime(data)})
+>>> df_pl = pl.DataFrame({"a": data}).with_columns(
+... pl.col("a").str.to_datetime(time_unit="ns")
+... )
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... a_diff_total_nanoseconds=nw.col("a").diff().dt.total_nanoseconds()
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a a_diff_total_nanoseconds
+0 2024-01-01 00:00:00.000000001 NaN
+1 2024-01-01 00:00:00.000000002 1.0
+>>> func(df_pl)
+shape: (2, 2)
+┌───────────────────────────────┬──────────────────────────┐
+│ a ┆ a_diff_total_nanoseconds │
+│ --- ┆ --- │
+│ datetime[ns] ┆ i64 │
+╞═══════════════════════════════╪══════════════════════════╡
+│ 2024-01-01 00:00:00.000000001 ┆ null │
+│ 2024-01-01 00:00:00.000000002 ┆ 1 │
+└───────────────────────────────┴──────────────────────────┘
+
to_string(format)
+
+Convert a Date/Time/Datetime column into a String column with the given format.
+ + +Unfortunately, different libraries interpret format directives a bit +differently.
+"%.f"
for fractional seconds,
+ whereas pandas and Python stdlib use ".%f"
."%S"
as "seconds, including fractional seconds"
+ whereas most other tools interpret it as "just seconds, as 2 digits".Therefore, we make the following adjustments:
+"%S.%f"
with "%S%.f"
."%S.%f"
with "%S"
.Workarounds like these don't make us happy, and we try to avoid them as +much as possible, but here we feel like it's the best compromise.
+If you just want to format a date/datetime Series as a local datetime +string, and have it work as consistently as possible across libraries, +we suggest using:
+"%Y-%m-%dT%H:%M:%S%.f"
for datetimes"%Y-%m-%d"
for datesthough note that, even then, different tools may return a different number +of trailing zeros. Nonetheless, this is probably consistent enough for +most applications.
+If you have an application where this is not enough, please open an issue +and let us know.
+Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = [
+... datetime(2020, 3, 1),
+... datetime(2020, 4, 1),
+... datetime(2020, 5, 1),
+... ]
+>>> df_pd = pd.DataFrame({"a": data})
+>>> df_pl = pl.DataFrame({"a": data})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").dt.to_string("%Y/%m/%d %H:%M:%S"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+0 2020/03/01 00:00:00
+1 2020/04/01 00:00:00
+2 2020/05/01 00:00:00
+
>>> func(df_pl)
+shape: (3, 1)
+┌─────────────────────┐
+│ a │
+│ --- │
+│ str │
+╞═════════════════════╡
+│ 2020/03/01 00:00:00 │
+│ 2020/04/01 00:00:00 │
+│ 2020/05/01 00:00:00 │
+└─────────────────────┘
+
narwhals.Expr.str
contains(pattern, *, literal=False)
+
+Check if string contains a substring that matches a pattern.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
pattern |
+
+ str
+ |
+
+
+
+ A Character sequence or valid regular expression pattern. + |
+ + required + | +
literal |
+
+ bool
+ |
+
+
+
+ If True, treats the pattern as a literal string. + If False, assumes the pattern is a regular expression. + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"pets": ["cat", "dog", "rabbit and parrot", "dove", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(
+... default_match=nw.col("pets").str.contains("parrot|Dove"),
+... case_insensitive_match=nw.col("pets").str.contains("(?i)parrot|Dove"),
+... literal_match=nw.col("pets").str.contains(
+... "parrot|Dove", literal=True
+... ),
+... )
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ pets default_match case_insensitive_match literal_match
+0 cat False False False
+1 dog False False False
+2 rabbit and parrot True True False
+3 dove False True False
+4 None None None None
+>>> func(df_pl)
+shape: (5, 4)
+┌───────────────────┬───────────────┬────────────────────────┬───────────────┐
+│ pets ┆ default_match ┆ case_insensitive_match ┆ literal_match │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ str ┆ bool ┆ bool ┆ bool │
+╞═══════════════════╪═══════════════╪════════════════════════╪═══════════════╡
+│ cat ┆ false ┆ false ┆ false │
+│ dog ┆ false ┆ false ┆ false │
+│ rabbit and parrot ┆ true ┆ true ┆ false │
+│ dove ┆ false ┆ true ┆ false │
+│ null ┆ null ┆ null ┆ null │
+└───────────────────┴───────────────┴────────────────────────┴───────────────┘
+
ends_with(suffix)
+
+Check if string values end with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
suffix |
+
+ str
+ |
+
+
+
+ suffix substring + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(has_suffix=nw.col("fruits").str.ends_with("ngo"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits has_suffix
+0 apple False
+1 mango True
+2 None None
+
>>> func(df_pl)
+shape: (3, 2)
+┌────────┬────────────┐
+│ fruits ┆ has_suffix │
+│ --- ┆ --- │
+│ str ┆ bool │
+╞════════╪════════════╡
+│ apple ┆ false │
+│ mango ┆ true │
+│ null ┆ null │
+└────────┴────────────┘
+
head(n=5)
+
+Take the first n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is not supported. + |
+
+ 5
+ |
+
If the length of the string has fewer than n
characters, the full string is returned.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(lyrics_head=nw.col("lyrics").str.head())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ lyrics lyrics_head
+0 Atatata Atata
+1 taata taata
+2 taatatata taata
+3 zukkyun zukky
+
>>> func(df_pl)
+shape: (4, 2)
+┌───────────┬─────────────┐
+│ lyrics ┆ lyrics_head │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═══════════╪═════════════╡
+│ Atatata ┆ Atata │
+│ taata ┆ taata │
+│ taatatata ┆ taata │
+│ zukkyun ┆ zukky │
+└───────────┴─────────────┘
+
slice(offset, length=None)
+
+Create subslices of the string values of an expression.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
offset |
+
+ int
+ |
+
+
+
+ Start index. Negative indexing is supported. + |
+ + required + | +
length |
+
+ int | None
+ |
+
+
+
+ Length of the slice. If set to |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"s": ["pear", None, "papaya", "dragonfruit"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(s_sliced=nw.col("s").str.slice(4, length=3))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ s s_sliced
+0 pear
+1 None None
+2 papaya ya
+3 dragonfruit onf
+
>>> func(df_pl)
+shape: (4, 2)
+┌─────────────┬──────────┐
+│ s ┆ s_sliced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═════════════╪══════════╡
+│ pear ┆ │
+│ null ┆ null │
+│ papaya ┆ ya │
+│ dragonfruit ┆ onf │
+└─────────────┴──────────┘
+
Using negative indexes:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(s_sliced=nw.col("s").str.slice(-3))
+
>>> func(df_pd)
+ s s_sliced
+0 pear ear
+1 None None
+2 papaya aya
+3 dragonfruit uit
+
>>> func(df_pl)
+shape: (4, 2)
+┌─────────────┬──────────┐
+│ s ┆ s_sliced │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═════════════╪══════════╡
+│ pear ┆ ear │
+│ null ┆ null │
+│ papaya ┆ aya │
+│ dragonfruit ┆ uit │
+└─────────────┴──────────┘
+
starts_with(prefix)
+
+Check if string values start with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
prefix |
+
+ str
+ |
+
+
+
+ prefix substring + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(has_prefix=nw.col("fruits").str.starts_with("app"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits has_prefix
+0 apple True
+1 mango False
+2 None None
+
>>> func(df_pl)
+shape: (3, 2)
+┌────────┬────────────┐
+│ fruits ┆ has_prefix │
+│ --- ┆ --- │
+│ str ┆ bool │
+╞════════╪════════════╡
+│ apple ┆ true │
+│ mango ┆ false │
+│ null ┆ null │
+└────────┴────────────┘
+
tail(n=5)
+
+Take the last n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is not supported. + |
+
+ 5
+ |
+
If the length of the string has fewer than n
characters, the full string is returned.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"lyrics": ["Atatata", "taata", "taatatata", "zukkyun"]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(lyrics_tail=nw.col("lyrics").str.tail())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ lyrics lyrics_tail
+0 Atatata atata
+1 taata taata
+2 taatatata atata
+3 zukkyun kkyun
+
>>> func(df_pl)
+shape: (4, 2)
+┌───────────┬─────────────┐
+│ lyrics ┆ lyrics_tail │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═══════════╪═════════════╡
+│ Atatata ┆ atata │
+│ taata ┆ taata │
+│ taatatata ┆ atata │
+│ zukkyun ┆ kkyun │
+└───────────┴─────────────┘
+
to_datetime(format)
+
+Convert to Datetime dtype.
+ + +pandas defaults to nanosecond time unit, Polars to microsecond. +Prior to pandas 2.0, nanoseconds were the only time unit supported +in pandas, with no ability to set any other one. The ability to +set the time unit in pandas, if the version permits, will arrive.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
format |
+
+ str
+ |
+
+
+
+ Format to parse strings with. Must be passed, as different + dataframe libraries have different ways of auto-inferring + formats. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": ["2020-01-01", "2020-01-02"]})
+>>> df_pl = pl.DataFrame({"a": ["2020-01-01", "2020-01-02"]})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a").str.to_datetime(format="%Y-%m-%d"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+0 2020-01-01
+1 2020-01-02
+>>> func(df_pl)
+shape: (2, 1)
+┌─────────────────────┐
+│ a │
+│ --- │
+│ datetime[μs] │
+╞═════════════════════╡
+│ 2020-01-01 00:00:00 │
+│ 2020-01-02 00:00:00 │
+└─────────────────────┘
+
to_lowercase()
+
+Transform string to lowercase variant.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["APPLE", "MANGO", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(lower_col=nw.col("fruits").str.to_lowercase())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits lower_col
+0 APPLE apple
+1 MANGO mango
+2 None None
+
>>> func(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ lower_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ APPLE ┆ apple │
+│ MANGO ┆ mango │
+│ null ┆ null │
+└────────┴───────────┘
+
to_uppercase()
+
+Transform string to uppercase variant.
+ + +The PyArrow backend will convert 'ß' to 'ẞ' instead of 'SS'. +For more info see the related issue. +There may be other unicode-edge-case-related variations across implementations.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"fruits": ["apple", "mango", None]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(upper_col=nw.col("fruits").str.to_uppercase())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ fruits upper_col
+0 apple APPLE
+1 mango MANGO
+2 None None
+
>>> func(df_pl)
+shape: (3, 2)
+┌────────┬───────────┐
+│ fruits ┆ upper_col │
+│ --- ┆ --- │
+│ str ┆ str │
+╞════════╪═══════════╡
+│ apple ┆ APPLE │
+│ mango ┆ MANGO │
+│ null ┆ null │
+└────────┴───────────┘
+
narwhals.GroupBy
agg(*aggs, **named_aggs)
+
+Compute aggregations for each group of a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
aggs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Aggregations to compute for each group of the group by operation, +specified as positional arguments. + |
+
+ ()
+ |
+
named_aggs |
+
+ IntoExpr
+ |
+
+
+
+ Additional aggregations, specified as keyword arguments. + |
+
+ {}
+ |
+
Examples:
+Group by one column or by multiple columns and call agg
to compute
+the grouped sum of another column.
>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+
We define library agnostic functions:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.group_by("a").agg(nw.col("b").sum()).sort("a")
+... return nw.to_native(df)
+
>>> def func_mult_col(df_any):
+... df = nw.from_native(df_any)
+... df = df.group_by("a", "b").agg(nw.sum("c")).sort("a", "b")
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
and func_mult_col
:
>>> func(df_pd)
+ a b
+0 a 2
+1 b 5
+2 c 3
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+>>> func_mult_col(df_pd)
+ a b c
+0 a 1 8
+1 b 2 4
+2 b 3 2
+3 c 3 1
+>>> func_mult_col(df_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 8 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
Anything documented in the API reference is intended to work consistently among +supported backends.
+For example: +
import narwhals as nw
+
+df.with_columns(
+ a_mean = nw.col('a').mean(),
+ a_std = nw.col('a').std(),
+)
+
DataFrame.with_columns
, narwhals.col
, Expr.mean
, and Expr.std
are
+all documented in the API reference.
+However, +
import narwhals as nw
+
+df.with_columns(
+ a_ewm_mean = nw.col('a').ewm_mean(alpha=.7),
+)
+
Expr.ewm_mean
only appears in the Polars API reference, but not in the Narwhals
+one.
+In general, you should expect any fundamental dataframe operation to be supported - if +one that you need is not, please do open a feature request!
+ + + + + + + + + + + + + +narwhals.LazyFrame
Narwhals DataFrame, backed by a native dataframe.
+The native dataframe might be pandas.DataFrame, polars.LazyFrame, ...
+This class is not meant to be instantiated directly - instead, use
+narwhals.from_native
.
columns: list[str]
+
+
+ property
+
+
+Get column names.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.columns
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+['foo', 'bar', 'ham']
+>>> func(lf_pl)
+['foo', 'bar', 'ham']
+
schema: Schema
+
+
+ property
+
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> lf = nw.from_native(lf_pl)
+>>> lf.schema
+Schema({'foo': Int64, 'bar': Float64, 'ham', String})
+
clone()
+
+Create a copy of this DataFrame.
+++++++import narwhals as nw +import pandas as pd +import polars as pl +data = {"a": [1, 2], "b": [3, 4]} +df_pd = pd.DataFrame(data) +df_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function in which we copy the DataFrame:
+++ +++++@nw.narwhalify +... def func(df): +... return df.clone()
+func(df_pd) + a b +0 1 3 +1 2 4
+func(df_pl).collect() +shape: (2, 2) +┌─────┬─────┐ +│ a ┆ b │ +│ --- ┆ --- │ +│ i64 ┆ i64 │ +╞═════╪═════╡ +│ 1 ┆ 3 │ +│ 2 ┆ 4 │ +└─────┴─────┘
+
collect()
+
+Materialize this LazyFrame into a DataFrame.
+ + +Returns:
+Type | +Description | +
---|---|
+ DataFrame[Any]
+ |
+
+
+
+ DataFrame + |
+
Examples:
+>>> import narwhals as nw
+>>> import polars as pl
+>>> lf_pl = pl.LazyFrame(
+... {
+... "a": ["a", "b", "a", "b", "b", "c"],
+... "b": [1, 2, 3, 4, 5, 6],
+... "c": [6, 5, 4, 3, 2, 1],
+... }
+... )
+>>> lf = nw.from_native(lf_pl)
+>>> lf
+┌───────────────────────────────────────────────┐
+| Narwhals LazyFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> df = lf.group_by("a").agg(nw.all().sum()).collect()
+>>> nw.to_native(df).sort("a")
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 4 ┆ 10 │
+│ b ┆ 11 ┆ 10 │
+│ c ┆ 6 ┆ 1 │
+└─────┴─────┴─────┘
+
collect_schema()
+
+Get an ordered mapping of column names to their data type.
+ + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> lf = nw.from_native(lf_pl)
+>>> lf.collect_schema()
+Schema({'foo': Int64, 'bar': Float64, 'ham', String})
+
drop(*columns)
+
+Remove columns from the LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*columns |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of the columns that should be removed from the + dataframe. Accepts column selector input. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop("ham")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar
+0 1 6.0
+1 2 7.0
+2 3 8.0
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 1 ┆ 6.0 │
+│ 2 ┆ 7.0 │
+│ 3 ┆ 8.0 │
+└─────┴─────┘
+
Use positional arguments to drop multiple columns.
+>>> @nw.narwhalify
+... def func(df):
+... return df.drop("foo", "ham")
+
>>> func(df_pd)
+ bar
+0 6.0
+1 7.0
+2 8.0
+>>> func(lf_pl).collect()
+shape: (3, 1)
+┌─────┐
+│ bar │
+│ --- │
+│ f64 │
+╞═════╡
+│ 6.0 │
+│ 7.0 │
+│ 8.0 │
+└─────┘
+
drop_nulls()
+
+Drop null values.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.drop_nulls()
+... return nw.to_native(df)
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ a ba
+0 1.0 1.0
+>>> func(df_pl).collect()
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ ba │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 1.0 ┆ 1.0 │
+└─────┴─────┘
+
filter(*predicates)
+
+Filter the rows in the LazyFrame based on a predicate expression.
+The original order of the remaining rows is preserved.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*predicates |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Expression that evaluates to a boolean Series. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
Let's define a dataframe-agnostic function in which we filter on +one condition.
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter(nw.col("foo") > 1)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+1 2 7 b
+2 3 8 c
+>>> func(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Filter on multiple conditions:
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a"))
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
Provide multiple filters using *args
syntax:
>>> @nw.narwhalify
+... def func(df):
+... dframe = df.filter(
+... nw.col("foo") == 1,
+... nw.col("ham") == "a",
+... )
+... return dframe
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+>>> func(df_pl)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
Filter on an OR condition:
+>>> @nw.narwhalify
+... def func(df):
+... return df.filter((nw.col("foo") == 1) | (nw.col("ham") == "c"))
+>>> func(df_pd)
+ foo bar ham
+0 1 6 a
+2 3 8 c
+>>> func(df_pl)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
group_by(*keys)
+
+Start a group by operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*keys |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to group by. Accepts expression input. Strings are +parsed as column names. + |
+
+ ()
+ |
+
Examples:
+Group by one column and call agg
to compute the grouped sum of
+another column.
>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
Let's define a dataframe-agnostic function in which we group by one column
+and call agg
to compute the grouped sum of another column.
>>> @nw.narwhalify
+... def func(df):
+... return df.group_by("a").agg(nw.col("b").sum()).sort("a")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 a 2
+1 b 5
+2 c 3
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+
Group by multiple columns by passing a list of column names.
+>>> @nw.narwhalify
+... def func(df):
+... return df.group_by(["a", "b"]).agg(nw.max("c")).sort(["a", "b"])
+>>> func(df_pd)
+ a b c
+0 a 1 5
+1 b 2 4
+2 b 3 2
+3 c 3 1
+>>> func(df_pl)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
head(n=5)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 5
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "a": [1, 2, 3, 4, 5, 6],
+... "b": [7, 8, 9, 10, 11, 12],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function that gets the first 3 rows.
+>>> @nw.narwhalify
+... def func(df):
+... return df.head(3)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 1 7
+1 2 8
+2 3 9
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
join(other, *, how='inner', left_on=None, right_on=None)
+
+Add a join operation to the Logical Plan.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
other |
+
+ Self
+ |
+
+
+
+ Lazy DataFrame to join with. + |
+ + required + | +
how |
+
+ Literal['inner', 'left', 'cross', 'semi', 'anti']
+ |
+
+
+
+ Join strategy. +
|
+
+ 'inner'
+ |
+
left_on |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the left DataFrame. + |
+
+ None
+ |
+
right_on |
+
+ str | list[str] | None
+ |
+
+
+
+ Join column of the right DataFrame. + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined LazyFrame + |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+>>> data_other = {
+... "apple": ["x", "y", "z"],
+... "ham": ["a", "b", "d"],
+... }
+
>>> df_pd = pd.DataFrame(data)
+>>> other_pd = pd.DataFrame(data_other)
+
>>> df_pl = pl.LazyFrame(data)
+>>> other_pl = pl.LazyFrame(data_other)
+
Let's define a dataframe-agnostic function in which we join over "ham" column:
+>>> @nw.narwhalify
+... def join_on_ham(df, other):
+... return df.join(other, left_on="ham", right_on="ham")
+
We can now pass either pandas or Polars to the function:
+>>> join_on_ham(df_pd, other_pd)
+ foo bar ham apple
+0 1 6.0 a x
+1 2 7.0 b y
+
>>> join_on_ham(df_pl, other_pl).collect()
+shape: (2, 4)
+┌─────┬─────┬─────┬───────┐
+│ foo ┆ bar ┆ ham ┆ apple │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str ┆ str │
+╞═════╪═════╪═════╪═══════╡
+│ 1 ┆ 6.0 ┆ a ┆ x │
+│ 2 ┆ 7.0 ┆ b ┆ y │
+└─────┴─────┴─────┴───────┘
+
lazy()
+
+Lazify the DataFrame (if possible).
+If a library does not support lazy execution, then this is a no-op.
+ + +Examples:
+Construct pandas and Polars objects:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.LazyFrame(df)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df_any):
+... return df_any.lazy()
+
Note that then, pandas dataframe stay eager, and the Polars LazyFrame stays lazy:
+>>> func(df_pd)
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+>>> func(df_pl)
+<LazyFrame ...>
+
pipe(function, *args, **kwargs)
+
+Pipe function call.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "ba": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.pipe(lambda _df: _df.select("a"))
+... return nw.to_native(df)
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ a
+0 1
+1 2
+2 3
+>>> func(df_pl).collect()
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
rename(mapping)
+
+Rename column names.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
mapping |
+
+ dict[str, str]
+ |
+
+
+
+ Key value pairs that map from old name to new name, or a + function that takes the old name as input and returns the + new name. + |
+ + required + | +
If existing names are swapped (e.g. 'A' points to 'B' and 'B' + points to 'A'), polars will block projection and predicate + pushdowns at this node.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.rename({"foo": "apple"})
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ apple bar ham
+0 1 6 a
+1 2 7 b
+2 3 8 c
+>>> func(lf_pl).collect()
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ apple ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═══════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└───────┴─────┴─────┘
+
select(*exprs, **named_exprs)
+
+Select columns from this LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to select, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, + other non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
**named_exprs |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to select, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
Let's define a dataframe-agnostic function in which we pass the name of a +column to select that column.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select("foo")
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo
+0 1
+1 2
+2 3
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+>>> func(lf_pl).collect()
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
Multiple columns can be selected by passing a list of column names.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(["foo", "bar"])
+>>> func(df_pd)
+ foo bar
+0 1 6
+1 2 7
+2 3 8
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+
Multiple columns can also be selected using positional arguments instead of a +list. Expressions are also accepted.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("foo"), nw.col("bar") + 1)
+>>> func(df_pd)
+ foo bar
+0 1 7
+1 2 8
+2 3 9
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
Use keyword arguments to easily name your expression inputs.
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(threshold=nw.col("foo") * 2)
+>>> func(df_pd)
+ threshold
+0 2
+1 4
+2 6
+>>> func(df_pl)
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+>>> func(lf_pl).collect()
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+
sort(by, *more_by, descending=False)
+
+Sort the LazyFrame by the given columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
by |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to sort by. Accepts expression input. Strings are + parsed as column names. + |
+ + required + | +
*more_by |
+
+ str
+ |
+
+
+
+ Additional columns to sort by, specified as positional + arguments. + |
+
+ ()
+ |
+
descending |
+
+ bool | Sequence[bool]
+ |
+
+
+
+ Sort in descending order. When sorting by multiple + columns, can be specified per column by passing a + sequence of booleans. + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "a": [1, 2, None],
+... "b": [6.0, 5.0, 4.0],
+... "c": ["a", "c", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_lf = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function in which we sort by multiple +columns in different orders
+>>> @nw.narwhalify
+... def func(df):
+... return df.sort("c", "a", descending=[False, True])
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b c
+0 1.0 6.0 a
+2 NaN 4.0 b
+1 2.0 5.0 c
+>>> func(df_lf).collect()
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ null ┆ 4.0 ┆ b │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
tail(n=5)
+
+Get the last n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+
+ 5
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {
+... "a": [1, 2, 3, 4, 5, 6],
+... "b": [7, 8, 9, 10, 11, 12],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function that gets the last 3 rows.
+>>> @nw.narwhalify
+... def func(df):
+... return df.tail(3)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+3 4 10
+4 5 11
+5 6 12
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 4 ┆ 10 │
+│ 5 ┆ 11 │
+│ 6 ┆ 12 │
+└─────┴─────┘
+>>> func(lf_pl).collect()
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 4 ┆ 10 │
+│ 5 ┆ 11 │
+│ 6 ┆ 12 │
+└─────┴─────┘
+
unique(subset)
+
+Drop duplicate rows from this LazyFrame.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
subset |
+
+ str | list[str]
+ |
+
+
+
+ Column name(s) to consider when identifying duplicate rows.
+ If set to |
+ + required + | +
Returns:
+Name | Type | +Description | +
---|---|---|
LazyFrame |
+ Self
+ |
+
+
+
+ LazyFrame with unique rows. + |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {
+... "foo": [1, 2, 3, 1],
+... "bar": ["a", "a", "a", "a"],
+... "ham": ["b", "b", "b", "b"],
+... }
+>>> df_pd = pd.DataFrame(data)
+>>> lf_pl = pl.LazyFrame(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.unique(["bar", "ham"])
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ foo bar ham
+0 1 a b
+>>> func(lf_pl).collect()
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+└─────┴─────┴─────┘
+
with_columns(*exprs, **named_exprs)
+
+Add columns to this LazyFrame.
+Added columns will replace existing columns with the same name.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
**named_exprs |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
LazyFrame |
+ Self
+ |
+
+
+
+ A new LazyFrame with the columns added. + |
+
Creating a new LazyFrame using this method does not create a new copy of +existing data.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df = {
+... "a": [1, 2, 3, 4],
+... "b": [0.5, 4, 10, 13],
+... "c": [True, True, False, True],
+... }
+>>> df_pd = pd.DataFrame(df)
+>>> df_pl = pl.DataFrame(df)
+>>> lf_pl = pl.LazyFrame(df)
+
Let's define a dataframe-agnostic function in which we pass an expression +to add it as a new column:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns((nw.col("a") * 2).alias("2a"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b c 2a
+0 1 0.5 True 2
+1 2 4.0 True 4
+2 3 10.0 False 6
+3 4 13.0 True 8
+>>> func(df_pl)
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ 2a │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+>>> func(lf_pl).collect()
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ 2a │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+
with_row_index(name='index')
+
+Insert column which enumerates rows.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.LazyFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.with_row_index()
+... return nw.to_native(df)
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ index a b
+0 0 1 4
+1 1 2 5
+2 2 3 6
+>>> func(df_pl).collect()
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ index ┆ a ┆ b │
+│ --- ┆ --- ┆ --- │
+│ u32 ┆ i64 ┆ i64 │
+╞═══════╪═════╪═════╡
+│ 0 ┆ 1 ┆ 4 │
+│ 1 ┆ 2 ┆ 5 │
+│ 2 ┆ 3 ┆ 6 │
+└───────┴─────┴─────┘
+
Here are the top-level functions available in Narwhals.
+ + +all()
+
+Instantiate an expression representing all columns.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.all() * 2)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 2 8
+1 4 10
+2 6 12
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 2 ┆ 8 │
+│ 4 ┆ 10 │
+│ 6 ┆ 12 │
+└─────┴─────┘
+
all_horizontal(*exprs)
+
+Compute the bitwise AND horizontally across columns.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts expression input. + |
+
+ ()
+ |
+
pandas and Polars handle null values differently.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {
+... "a": [False, False, True, True, False, None],
+... "b": [False, True, True, None, None, None],
+... }
+>>> df_pl = pl.DataFrame(data)
+>>> df_pd = pd.DataFrame(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select("a", "b", all=nw.all_horizontal("a", "b"))
+
We can then pass either pandas or polars to func
:
>>> func(df_pd)
+ a b all
+0 False False False
+1 False True False
+2 True True True
+3 True None False
+4 False None False
+5 None None False
+
>>> func(df_pl)
+shape: (6, 3)
+┌───────┬───────┬───────┐
+│ a ┆ b ┆ all │
+│ --- ┆ --- ┆ --- │
+│ bool ┆ bool ┆ bool │
+╞═══════╪═══════╪═══════╡
+│ false ┆ false ┆ false │
+│ false ┆ true ┆ false │
+│ true ┆ true ┆ true │
+│ true ┆ null ┆ null │
+│ false ┆ null ┆ false │
+│ null ┆ null ┆ null │
+└───────┴───────┴───────┘
+
col(*names)
+
+Creates an expression that references one or more columns by their name(s).
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
names |
+
+ str | Iterable[str]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.col("a") * nw.col("b"))
+
We can then pass either pandas or polars to func
:
>>> func(df_pd)
+ a
+0 3
+1 8
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 3 │
+│ 8 │
+└─────┘
+
concat(items, *, how='vertical')
+
+from_native(native_object, *, strict=True, eager_only=None, eager_or_interchange_only=None, series_only=None, allow_series=None)
+
+Convert dataframe/series to Narwhals DataFrame, LazyFrame, or Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
native_object |
+
+ Any
+ |
+
+
+
+ Raw object from user. +Depending on the other arguments, input object can be: +
|
+ + required + | +
strict |
+
+ bool
+ |
+
+
+
+ Whether to raise if object can't be converted (default) or +to just leave it as-is. + |
+
+ True
+ |
+
eager_only |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow eager objects. + |
+
+ None
+ |
+
eager_or_interchange_only |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow eager objects or objects which +implement the Dataframe Interchange Protocol. + |
+
+ None
+ |
+
series_only |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow series. + |
+
+ None
+ |
+
allow_series |
+
+ bool | None
+ |
+
+
+
+ Whether to allow series (default is only dataframe / lazyframe). + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ narwhals.DataFrame or narwhals.LazyFrame or narwhals.Series + |
+
get_level(obj)
+
+Level of support Narwhals has for current object.
+This can be one of:
+df.schema
)get_native_namespace(obj)
+
+Get native namespace from object.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}))
+>>> nw.get_native_namespace(df)
+<module 'pandas'...>
+>>> df = nw.from_native(pl.DataFrame({"a": [1, 2, 3]}))
+>>> nw.get_native_namespace(df)
+<module 'polars'...>
+
is_ordered_categorical(series)
+
+Return whether indices of categories are semantically meaningful.
+This is a convenience function to accessing what would otherwise be
+the is_ordered
property from the DataFrame Interchange Protocol,
+see https://data-apis.org/dataframe-protocol/latest/API.html.
dtype.ordering == "physical"
.dtype.cat.ordered == True
.dtype.type.ordered == True
.Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = ["x", "y"]
+>>> s_pd = pd.Series(data, dtype=pd.CategoricalDtype(ordered=True))
+>>> s_pl = pl.Series(data, dtype=pl.Categorical(ordering="physical"))
+
Let's define a library-agnostic function:
+>>> @nw.narwhalify
+... def func(s):
+... return nw.is_ordered_categorical(s)
+
Then, we can pass any supported library to func
:
>>> func(s_pd)
+True
+>>> func(s_pl)
+True
+
len()
+
+Return the number of rows.
+ + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.len())
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ len
+0 2
+>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ len │
+│ --- │
+│ u32 │
+╞═════╡
+│ 2 │
+└─────┘
+
maybe_align_index(lhs, rhs)
+
+Align lhs
to the Index of `rhs, if they're both pandas-like.
This is only really intended for backwards-compatibility purposes,
+for example if your library already aligns indices for users.
+If you're designing a new library, we highly encourage you to not
+rely on the Index.
+For non-pandas-like inputs, this only checks that lhs
and rhs
+are the same length.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2]}, index=[3, 4])
+>>> s_pd = pd.Series([6, 7], index=[4, 3])
+>>> df = nw.from_native(df_pd)
+>>> s = nw.from_native(s_pd, series_only=True)
+>>> nw.to_native(nw.maybe_align_index(df, s))
+ a
+4 2
+3 1
+
maybe_set_index(df, column_names)
+
+Set columns columns
to be the index of df
, if df
is pandas-like.
This is only really intended for backwards-compatibility purposes, +for example if your library already aligns indices for users. +If you're designing a new library, we highly encourage you to not +rely on the Index. +For non-pandas-like inputs, this is a no-op.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
+>>> df = nw.from_native(df_pd)
+>>> nw.to_native(nw.maybe_set_index(df, "b"))
+ a
+b
+4 1
+5 2
+
maybe_convert_dtypes(df, *args, **kwargs)
+
+Convert columns to the best possible dtypes using dtypes supporting pd.NA
, if df is pandas-like.
For non-pandas-like inputs, this is a no-op.
+Also, args
and kwargs
just get passed down to the underlying library as-is.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> import numpy as np
+>>> df_pd = pd.DataFrame(
+... {
+... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
+... "b": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
+... }
+... )
+>>> df = nw.from_native(df_pd)
+>>> nw.to_native(nw.maybe_convert_dtypes(df)).dtypes
+a Int32
+b boolean
+dtype: object
+
lit(value, dtype=None)
+
+Return an expression representing a literal value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
value |
+
+ Any
+ |
+
+
+
+ The value to use as literal. + |
+ + required + | +
dtype |
+
+ DType | None
+ |
+
+
+
+ The data type of the literal value. If not provided, the data type will be inferred. + |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"a": [1, 2]})
+>>> df_pd = pd.DataFrame({"a": [1, 2]})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.with_columns(nw.lit(3).alias("b"))
+
We can then pass either pandas or polars to func
:
>>> func(df_pd)
+ a b
+0 1 3
+1 2 3
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i32 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 3 │
+└─────┴─────┘
+
max(*columns)
+
+Return the maximum value.
+ + +Syntactic sugar for nw.col(columns).max()
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
columns |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. + |
+
+ ()
+ |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.max("a"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+0 2
+>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 2 │
+└─────┘
+
mean(*columns)
+
+Get the mean value.
+ + +Syntactic sugar for nw.col(columns).mean()
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
columns |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"a": [1, 8, 3]})
+>>> df_pd = pd.DataFrame({"a": [1, 8, 3]})
+
We define a dataframe agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.mean("a"))
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+0 4.0
+>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ f64 │
+╞═════╡
+│ 4.0 │
+└─────┘
+
min(*columns)
+
+Return the minimum value.
+ + +Syntactic sugar for nw.col(columns).min()
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
columns |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. + |
+
+ ()
+ |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [5, 10]})
+>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [5, 10]})
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.min("b"))
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ b
+0 5
+>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ b │
+│ --- │
+│ i64 │
+╞═════╡
+│ 5 │
+└─────┘
+
narwhalify(func=None, *, strict=False, eager_only=False, eager_or_interchange_only=False, series_only=False, allow_series=True)
+
+Decorate function so it becomes dataframe-agnostic.
+narwhalify
will try to convert any dataframe/series-like object into the narwhal
+respective DataFrame/Series, while leaving the other parameters as they are.
Similarly, if the output of the function is a narwhals DataFrame or Series, it will be +converted back to the original dataframe/series type, while if the output is another +type it will be left as is.
+By setting strict=True
, then every input and every output will be required to be a
+dataframe/series-like object.
Instead of writing
+import narwhals as nw
+
+
+def func(df_any):
+ df = nw.from_native(df_any, strict=False)
+ df = df.group_by("a").agg(nw.col("b").sum())
+ return nw.to_native(df)
+
you can just write
+import narwhals as nw
+
+
+@nw.narwhalify
+def func(df):
+ return df.group_by("a").agg(nw.col("b").sum())
+
You can also pass in extra arguments, e.g.
+@nw.narhwalify(eager_only=True)
+
that will get passed down to nw.from_native
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
func |
+
+ Callable[..., Any] | None
+ |
+
+
+
+ Function to wrap in a |
+
+ None
+ |
+
strict |
+
+ bool
+ |
+
+
+
+ Whether to raise if object can't be converted or to just leave it as-is +(default). + |
+
+ False
+ |
+
eager_only |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow eager objects. + |
+
+ False
+ |
+
eager_or_interchange_only |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow eager objects or objects which +implement the Dataframe Interchange Protocol. + |
+
+ False
+ |
+
series_only |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow series. + |
+
+ False
+ |
+
allow_series |
+
+ bool | None
+ |
+
+
+
+ Whether to allow series (default is only dataframe / lazyframe). + |
+
+ True
+ |
+
sum(*columns)
+
+Sum all values.
+ + +Syntactic sugar for nw.col(columns).sum()
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
columns |
+
+ str
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"a": [1, 2]})
+>>> df_pd = pd.DataFrame({"a": [1, 2]})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.sum("a"))
+
We can then pass either pandas or polars to func
:
>>> func(df_pd)
+ a
+0 3
+>>> func(df_pl)
+shape: (1, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 3 │
+└─────┘
+
sum_horizontal(*exprs)
+
+Sum all values horizontally across columns
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Name(s) of the columns to use in the aggregation function. Accepts expression input. + |
+
+ ()
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
+>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [5, 10, 15]})
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(df):
+... return df.select(nw.sum_horizontal("a", "b"))
+
We can then pass either pandas or polars to func
:
>>> func(df_pd)
+ a
+0 6
+1 12
+2 18
+>>> func(df_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 6 │
+│ 12 │
+│ 18 │
+└─────┘
+
show_versions()
+
+Print useful debugging information
+Examples:
+>>> from narwhals import show_versions
+>>> show_versions() # doctest:+SKIP
+
+
+ to_native(narwhals_object, *, strict=True)
+
+Convert Narwhals object to native one.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
narwhals_object |
+
+ DataFrame[IntoFrameT] | LazyFrame[IntoFrameT] | Series
+ |
+
+
+
+ Narwhals object. + |
+ + required + | +
strict |
+
+ bool
+ |
+
+
+
+ whether to raise on non-Narwhals input. + |
+
+ True
+ |
+
Returns:
+Type | +Description | +
---|---|
+ IntoFrameT | Any
+ |
+
+
+
+ Object of class that user started with. + |
+
narwhals.Schema
Ordered mapping of column names to their data type.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
schema |
+
+ Mapping[str, DType] | Iterable[tuple[str, DType]] | None
+ |
+
+
+
+ Mapping[str, DType] | Iterable[tuple[str, DType]] | None +The schema definition given by column names and their associated. +instantiated Narwhals data type. Accepts a mapping or an iterable of tuples. + |
+
+ None
+ |
+
Examples:
+Define a schema by passing instantiated data types.
+>>> import narwhals as nw
+>>> schema = nw.Schema({"foo": nw.Int8(), "bar": nw.String()})
+>>> schema
+Schema({'foo': Int8, 'bar': String})
+
Access the data type associated with a specific column name.
+>>> schema["foo"]
+Int8
+
Access various schema properties using the names
, dtypes
, and len
methods.
>>> schema.names()
+['foo', 'bar']
+>>> schema.dtypes()
+[Int8, String]
+>>> schema.len()
+2
+
names()
+
+Get the column names of the schema.
+ +dtypes()
+
+Get the data types of the schema.
+ +len()
+
+Get the number of columns in the schema.
+ +narwhals.selectors
The following selectors are all supported. In addition, just like in Polars, the following +set operations are supported:
+&
|
-
~
boolean()
+
+Select boolean columns.
+ + +Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select boolean +dtypes:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(ncs.boolean())
+... return nw.to_native(df)
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ c
+0 False
+1 True
+>>> func(df_pl)
+shape: (2, 1)
+┌───────┐
+│ c │
+│ --- │
+│ bool │
+╞═══════╡
+│ false │
+│ true │
+└───────┘
+
by_dtype(*dtypes)
+
+Select columns based on their dtype.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
dtypes |
+
+ Any
+ |
+
+
+
+ one or data types to select + |
+
+ ()
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select int64 and float64 +dtypes and multiplies each value by 2:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(ncs.by_dtype(nw.Int64, nw.Float64) * 2)
+... return nw.to_native(df)
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ a c
+0 2 8.2
+1 4 4.6
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ c │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 8.2 │
+│ 4 ┆ 4.6 │
+└─────┴─────┘
+
categorical()
+
+Select categorical columns.
+ + +Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data).astype({"b": "category"})
+>>> df_pl = pl.DataFrame(data, schema_overrides={"b": pl.Categorical})
+
Let's define a dataframe-agnostic function to select string +dtypes:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(ncs.categorical())
+... return nw.to_native(df)
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ b
+0 x
+1 y
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ b │
+│ --- │
+│ cat │
+╞═════╡
+│ x │
+│ y │
+└─────┘
+
numeric()
+
+Select numeric columns.
+ + +Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select numeric +dtypes and multiplies each value by 2:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(ncs.numeric() * 2)
+... return nw.to_native(df)
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ a c
+0 2 8.2
+1 4 4.6
+>>> func(df_pl)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ c │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 2 ┆ 8.2 │
+│ 4 ┆ 4.6 │
+└─────┴─────┘
+
string()
+
+Select string columns.
+ + +Examples:
+>>> import narwhals as nw
+>>> import narwhals.selectors as ncs
+>>> import pandas as pd
+>>> import polars as pl
+>>>
+>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function to select string +dtypes:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(ncs.string())
+... return nw.to_native(df)
+
We can then pass either pandas or Polars dataframes:
+>>> func(df_pd)
+ b
+0 x
+1 y
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ b │
+│ --- │
+│ str │
+╞═════╡
+│ x │
+│ y │
+└─────┘
+
narwhals.Series
Narwhals Series, backed by a native series.
+The native dataframe might be pandas.Series, polars.Series, ...
+This class is not meant to be instantiated directly - instead, use
+narwhals.from_native
, making sure to pass allow_series=True
or
+series_only=True
.
dtype: Any
+
+
+ property
+
+
+Get the data type of the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dtype
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+Int64
+>>> func(s_pl)
+Int64
+
name: str
+
+
+ property
+
+
+Get the name of the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="foo")
+>>> s_pl = pl.Series("foo", s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.name
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+'foo'
+>>> func(s_pl)
+'foo'
+
shape: tuple[int]
+
+
+ property
+
+
+Get the shape of the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.shape
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+(3,)
+>>> func(s_pl)
+(3,)
+
abs()
+
+Calculate the absolute value of each element.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [2, -4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.abs()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2
+1 4
+2 3
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 4
+ 3
+]
+
alias(name)
+
+Rename the Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
name |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="foo")
+>>> s_pl = pl.Series("foo", s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.alias("bar")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1
+1 2
+2 3
+Name: bar, dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: 'bar' [i64]
+[
+ 1
+ 2
+ 3
+]
+
all()
+
+Return whether all values in the Series are True.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [True, False, True]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.all()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.False_
+>>> func(s_pl)
+False
+
any()
+
+Return whether any of the values in the Series are True.
+ + +Only works on Series of data type Boolean.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [False, True, False]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.any()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.True_
+>>> func(s_pl)
+True
+
cast(dtype)
+
+Cast between data types.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
dtype |
+
+ Any
+ |
+
+
+
+ Data type that the object will be cast into. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [True, False, True]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.cast(nw.Int64)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1
+1 0
+2 1
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 0
+ 1
+]
+
count()
+
+Returns the number of non-null elements in the Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.count()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.int64(3)
+>>> func(s_pl)
+3
+
cum_sum()
+
+Calculate the cumulative sum.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [2, 4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.cum_sum()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2
+1 6
+2 9
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 6
+ 9
+]
+
diff()
+
+Calculate the difference with the previous element, for each element.
+ + +pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to calculate
+the diff and fill missing values with 0
in a Int64 column, you could
+do:
s_any.diff().fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [2, 4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.diff()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 NaN
+1 2.0
+2 -1.0
+dtype: float64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ null
+ 2
+ -1
+]
+
drop_nulls()
+
+Drop all null values.
+ + +drop_nans
+A null value is not the same as a NaN value.
+To drop NaN values, use :func:drop_nans
.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import numpy as np
+>>> import narwhals as nw
+>>> s_pd = pd.Series([2, 4, None, 3, 5])
+>>> s_pl = pl.Series("a", [2, 4, None, 3, 5])
+
Now define a dataframe-agnostic function with a column
argument for the column to evaluate :
>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.drop_nulls()
+
Then we can pass either Series (polars or pandas) to func
:
>>> func(s_pd)
+0 2.0
+1 4.0
+3 3.0
+4 5.0
+dtype: float64
+>>> func(s_pl)
+shape: (4,)
+Series: 'a' [i64]
+[
+ 2
+ 4
+ 3
+ 5
+]
+
fill_null(value)
+
+Fill null values using the specified value.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
value |
+
+ Any
+ |
+
+
+
+ Value used to fill null values. + |
+ + required + | +
pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, None]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.fill_null(5)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1.0
+1 2.0
+2 5.0
+dtype: float64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 1
+ 2
+ 5
+]
+
filter(other)
+
+Filter elements in the Series based on a condition.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [4, 10, 15, 34, 50]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.filter(s_any > 10)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+2 15
+3 34
+4 50
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 15
+ 34
+ 50
+]
+
head(n=10)
+
+Get the first n
rows.
Arguments + n : int + Number of rows to return.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = list(range(10))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that returns the first 3 rows:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.head(3)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 0
+1 1
+2 2
+dtype: int64
+
>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 0
+ 1
+ 2
+]
+
is_between(lower_bound, upper_bound, closed='both')
+
+Get a boolean mask of the values that are between the given lower/upper bounds.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
lower_bound |
+
+ Any
+ |
+
+
+
+ Lower bound value. + |
+ + required + | +
upper_bound |
+
+ Any
+ |
+
+
+
+ Upper bound value. + |
+ + required + | +
closed |
+
+ str
+ |
+
+
+
+ Define which sides of the interval are closed (inclusive). + |
+
+ 'both'
+ |
+
If the value of the lower_bound
is greater than that of the upper_bound
,
+then the values will be False, as no value can satisfy the condition.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s_pd = pd.Series([1, 2, 3, 4, 5])
+>>> s_pl = pl.Series([1, 2, 3, 4, 5])
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.is_between(2, 4, "right")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 False
+2 True
+3 True
+4 False
+dtype: bool
+>>> func(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+ true
+ false
+]
+
is_duplicated()
+
+Get a mask of all duplicated rows in the Series.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 2, 3, 1])
+>>> s_pl = pl.Series([1, 2, 3, 1])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.is_duplicated()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 True
+1 False
+2 False
+3 True
+dtype: bool
+>>> func(s_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ true
+ false
+ false
+ true
+]
+
is_empty()
+
+Check if the series is empty.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
Let's define a dataframe-agnostic function that filters rows in which "foo" +values are greater than 10, and then checks if the result is empty or not:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.filter(s_any > 10).is_empty()
+
We can then pass either pandas or Polars to func
:
>>> s_pd = pd.Series([1, 2, 3])
+>>> s_pl = pl.Series([1, 2, 3])
+>>> func(s_pd), func(s_pl)
+(True, True)
+
>>> s_pd = pd.Series([100, 2, 3])
+>>> s_pl = pl.Series([100, 2, 3])
+>>> func(s_pd), func(s_pl)
+(False, False)
+
is_first_distinct()
+
+Return a boolean mask indicating the first occurrence of each distinct value.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 1, 2, 3, 2])
+>>> s_pl = pl.Series([1, 1, 2, 3, 2])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.is_first_distinct()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 True
+1 False
+2 True
+3 True
+4 False
+dtype: bool
+
>>> func(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ true
+ false
+ true
+ true
+ false
+]
+
is_in(other)
+
+Check if the elements of this Series are in the other sequence.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
other |
+
+ Any
+ |
+
+
+
+ Sequence of primitive type. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s_pd = pd.Series([1, 2, 3])
+>>> s_pl = pl.Series([1, 2, 3])
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.is_in([3, 2, 8])
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 True
+2 True
+dtype: bool
+>>> func(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+]
+
is_last_distinct()
+
+Return a boolean mask indicating the last occurrence of each distinct value.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 1, 2, 3, 2])
+>>> s_pl = pl.Series([1, 1, 2, 3, 2])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.is_last_distinct()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 True
+2 False
+3 True
+4 True
+dtype: bool
+
>>> func(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ true
+ false
+ true
+ true
+]
+
is_null()
+
+Returns a boolean Series indicating which values are null.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, None]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.is_null()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 False
+2 True
+dtype: bool
+>>> func(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+]
+
is_sorted(*, descending=False)
+
+Check if the Series is sorted.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
descending |
+
+ bool
+ |
+
+
+
+ Check if the Series is sorted in descending order. + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> unsorted_data = [1, 3, 2]
+>>> sorted_data = [3, 2, 1]
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any, descending=False):
+... return s_any.is_sorted(descending=descending)
+
We can then pass either pandas or Polars to func
:
>>> func(pl.Series(unsorted_data))
+False
+>>> func(pl.Series(sorted_data), descending=True)
+True
+>>> func(pd.Series(unsorted_data))
+False
+>>> func(pd.Series(sorted_data), descending=True)
+True
+
is_unique()
+
+Get a mask of all unique rows in the Series.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 2, 3, 1])
+>>> s_pl = pl.Series([1, 2, 3, 1])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.is_unique()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 True
+2 True
+3 False
+dtype: bool
+
>>> func(s_pl)
+shape: (4,)
+Series: '' [bool]
+[
+ false
+ true
+ true
+ false
+]
+
item(index=None)
+
+Return the Series as a scalar, or return the element at the given index.
+If no index is provided, this is equivalent to s[0]
, with a check
+that the shape is (1,). With an index, this is equivalent to s[index]
.
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
Let's define a dataframe-agnostic function that returns item at given index
+>>> @nw.narwhalify
+... def func(s_any, index=None):
+... return s_any.item(index)
+
We can then pass either pandas or Polars to func
:
>>> func(pl.Series("a", [1]), None), func(pd.Series([1]), None)
+(1, 1)
+
>>> func(pl.Series("a", [9, 8, 7]), -1), func(pl.Series([9, 8, 7]), -2)
+(7, 8)
+
len()
+
+Return the number of elements in the Series.
+Null values count towards the total.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1, 2, None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that computes the len of the series:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.len()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+3
+>>> func(s_pl)
+3
+
max()
+
+Get the maximum value in this Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.max()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.int64(3)
+>>> func(s_pl)
+3
+
mean()
+
+Reduce this Series to the mean value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.mean()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.float64(2.0)
+>>> func(s_pl)
+2.0
+
min()
+
+Get the minimal value in this Series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.min()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.int64(1)
+>>> func(s_pl)
+1
+
null_count()
+
+Create a new Series that shows the null counts per column.
+ + +pandas and Polars handle null values differently. Polars distinguishes +between NaN and Null, whereas pandas doesn't.
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, None, 3])
+>>> s_pl = pl.Series([1, None, None])
+
Let's define a dataframe-agnostic function that returns the null count of +the series:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.null_count()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+1
+>>> func(s_pl)
+2
+
n_unique()
+
+Count the number of unique values.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.n_unique()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+3
+>>> func(s_pl)
+3
+
quantile(quantile, interpolation)
+
+Get quantile value of the series.
+ + +pandas and Polars may have implementation differences for a given interpolation method.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
quantile |
+ + | +
+
+
+ float +Quantile between 0.0 and 1.0. + |
+ + required + | +
interpolation |
+ + | +
+
+
+ {'nearest', 'higher', 'lower', 'midpoint', 'linear'} +Interpolation method. + |
+ + required + | +
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = list(range(50))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return [
+... s_any.quantile(quantile=q, interpolation="nearest")
+... for q in (0.1, 0.25, 0.5, 0.75, 0.9)
+... ]
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+[5, 12, 24, 37, 44]
+
>>> func(s_pl)
+[5.0, 12.0, 25.0, 37.0, 44.0]
+
round(decimals=0)
+
+Round underlying floating point data by decimals
digits.
Arguments + decimals: Number of decimals to round by.
+ + +For values exactly halfway between rounded decimal values pandas and Polars behave differently.
+pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and +4.5 to 4.0, etc..).
+Polars rounds away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
+Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = [1.12345, 2.56789, 3.901234]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that rounds to the first decimal:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.round(1)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1.1
+1 2.6
+2 3.9
+dtype: float64
+
>>> func(s_pl)
+shape: (3,)
+Series: '' [f64]
+[
+ 1.1
+ 2.6
+ 3.9
+]
+
sample(n=None, fraction=None, *, with_replacement=False)
+
+Sample randomly from this Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int | None
+ |
+
+
+
+ Number of items to return. Cannot be used with fraction. + |
+
+ None
+ |
+
fraction |
+
+ float | None
+ |
+
+
+
+ Fraction of items to return. Cannot be used with n. + |
+
+ None
+ |
+
with_replacement |
+
+ bool
+ |
+
+
+
+ Allow values to be sampled more than once. + |
+
+ False
+ |
+
The sample
method returns a Series with a specified number of
+randomly selected items chosen from this Series.
+The results are not consistent across libraries.
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+
>>> s_pd = pd.Series([1, 2, 3, 4])
+>>> s_pl = pl.Series([1, 2, 3, 4])
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.sample(fraction=1.0, with_replacement=True)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+ a
+2 3
+1 2
+3 4
+3 4
+>>> func(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ 1
+ 4
+ 3
+ 4
+]
+
shift(n)
+
+Shift values by n
positions.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of indices to shift forward. If a negative value is passed, +values are shifted in the opposite direction instead. + |
+ + required + | +
pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to shift
+and fill missing values with 0
in a Int64 column, you could
+do:
s_any.shift(1).fill_null(0).cast(nw.Int64)
+
+Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [2, 4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.shift(1)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 NaN
+1 2.0
+2 4.0
+dtype: float64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ null
+ 2
+ 4
+]
+
sort(*, descending=False)
+
+Sort this Series. Place null values first.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
descending |
+
+ bool
+ |
+
+
+
+ Sort in descending order. + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [5, None, 1, 2]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define library agnostic functions:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.sort()
+
>>> @nw.narwhalify
+... def func_descend(s_any):
+... return s_any.sort(descending=True)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+1 NaN
+2 1.0
+3 2.0
+0 5.0
+dtype: float64
+>>> func(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ null
+ 1
+ 2
+ 5
+]
+>>> func_descend(s_pd)
+1 NaN
+0 5.0
+3 2.0
+2 1.0
+dtype: float64
+>>> func_descend(s_pl)
+shape: (4,)
+Series: '' [i64]
+[
+ null
+ 5
+ 2
+ 1
+]
+
std(*, ddof=1)
+
+Get the standard deviation of this Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
ddof |
+
+ int
+ |
+
+
+
+ “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, + where N represents the number of elements. + |
+
+ 1
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.std()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.float64(1.0)
+>>> func(s_pl)
+1.0
+
sum()
+
+Reduce this Series to the sum value.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.sum()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+np.int64(6)
+>>> func(s_pl)
+6
+
tail(n=10)
+
+Get the last n
rows.
Arguments + n : int + Number of rows to return.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = list(range(10))
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
Let's define a dataframe-agnostic function that returns the last 3 rows:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.tail(3)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+7 7
+8 8
+9 9
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 7
+ 8
+ 9
+]
+
to_frame()
+
+Convert to dataframe.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.to_frame()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+ a
+0 1
+1 2
+2 3
+>>> func(s_pl)
+shape: (3, 1)
+┌─────┐
+│ a │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
to_list()
+
+Convert to list.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.to_list()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+[1, 2, 3]
+>>> func(s_pl)
+[1, 2, 3]
+
to_numpy()
+
+Convert to numpy.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.to_numpy()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+array([1, 2, 3]...)
+>>> func(s_pl)
+array([1, 2, 3]...)
+
to_pandas()
+
+Convert to pandas.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [1, 2, 3]
+>>> s_pd = pd.Series(s, name="a")
+>>> s_pl = pl.Series("a", s)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.to_pandas()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1
+1 2
+2 3
+Name: a, dtype: int64
+>>> func(s_pl)
+0 1
+1 2
+2 3
+Name: a, dtype: int64
+
unique()
+
+Returns unique values
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [2, 4, 4, 6]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.unique()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2
+1 4
+2 6
+dtype: int64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ 2
+ 4
+ 6
+]
+
value_counts(*, sort=False, parallel=False)
+
+Count the occurrences of unique values.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
sort |
+
+ bool
+ |
+
+
+
+ Sort the output by count in descending order. If set to False (default), +the order of the output is random. + |
+
+ False
+ |
+
parallel |
+
+ bool
+ |
+
+
+
+ Execute the computation in parallel. Unused for pandas-like APIs. + |
+
+ False
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s_pd = pd.Series([1, 1, 2, 3, 2], name="s")
+>>> s_pl = pl.Series(values=[1, 1, 2, 3, 2], name="s")
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.value_counts(sort=True)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+ s count
+0 1 2
+1 2 2
+2 3 1
+
>>> func(s_pl)
+shape: (3, 2)
+┌─────┬───────┐
+│ s ┆ count │
+│ --- ┆ --- │
+│ i64 ┆ u32 │
+╞═════╪═══════╡
+│ 1 ┆ 2 │
+│ 2 ┆ 2 │
+│ 3 ┆ 1 │
+└─────┴───────┘
+
zip_with(mask, other)
+
+Take values from self or other based on the given mask. Where mask evaluates true, take values from self. Where mask evaluates false, take values from other.
+ + +Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> s1_pl = pl.Series([1, 2, 3, 4, 5])
+>>> s2_pl = pl.Series([5, 4, 3, 2, 1])
+>>> mask_pl = pl.Series([True, False, True, False, True])
+>>> s1_pd = pd.Series([1, 2, 3, 4, 5])
+>>> s2_pd = pd.Series([5, 4, 3, 2, 1])
+>>> mask_pd = pd.Series([True, False, True, False, True])
+
Let's define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s1_any, mask_any, s2_any):
+... return s1_any.zip_with(mask_any, s2_any)
+
We can then pass either pandas or Polars to func
:
>>> func(s1_pl, mask_pl, s2_pl)
+shape: (5,)
+Series: '' [i64]
+[
+ 1
+ 4
+ 3
+ 2
+ 5
+]
+>>> func(s1_pd, mask_pd, s2_pd)
+0 1
+1 4
+2 3
+3 2
+4 5
+dtype: int64
+
narwhals.Series.dt
year()
+
+Get the year in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2012, 1, 7), datetime(2023, 3, 10)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.year()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2012
+1 2023
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i32]
+[
+ 2012
+ 2023
+]
+
month()
+
+Gets the month in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2023, 2, 1), datetime(2023, 8, 3)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.month()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2
+1 8
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 2
+ 8
+]
+
day()
+
+Extracts the day in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2022, 1, 1), datetime(2022, 1, 5)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.day()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1
+1 5
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 1
+ 5
+]
+
ordinal_day()
+
+Get ordinal day.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> data = [datetime(2020, 1, 1), datetime(2020, 8, 3)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.ordinal_day()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 1
+1 216
+dtype: int32
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i16]
+[
+ 1
+ 216
+]
+
hour()
+
+Extracts the hour in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.hour()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 5
+1 9
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 5
+ 9
+]
+
minute()
+
+Extracts the minute in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2022, 1, 1, 5, 3), datetime(2022, 1, 5, 9, 12)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.minute()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 3
+1 12
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 3
+ 12
+]
+
second()
+
+Extracts the second(s) in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [datetime(2022, 1, 1, 5, 3, 10), datetime(2022, 1, 5, 9, 12, 4)]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.second()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 10
+1 4
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i8]
+[
+ 10
+ 4
+]
+
millisecond()
+
+Extracts the milliseconds in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [
+... datetime(2023, 5, 21, 12, 55, 10, 400000),
+... datetime(2023, 5, 21, 12, 55, 10, 600000),
+... datetime(2023, 5, 21, 12, 55, 10, 800000),
+... datetime(2023, 5, 21, 12, 55, 11, 0),
+... datetime(2023, 5, 21, 12, 55, 11, 200000),
+... ]
+
>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.millisecond().alias("datetime")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 400
+1 600
+2 800
+3 0
+4 200
+Name: datetime, dtype: int...
+>>> func(s_pl)
+shape: (5,)
+Series: 'datetime' [i32]
+[
+ 400
+ 600
+ 800
+ 0
+ 200
+]
+
microsecond()
+
+Extracts the microseconds in a datetime series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [
+... datetime(2023, 5, 21, 12, 55, 10, 400000),
+... datetime(2023, 5, 21, 12, 55, 10, 600000),
+... datetime(2023, 5, 21, 12, 55, 10, 800000),
+... datetime(2023, 5, 21, 12, 55, 11, 0),
+... datetime(2023, 5, 21, 12, 55, 11, 200000),
+... ]
+
>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.microsecond().alias("datetime")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 400000
+1 600000
+2 800000
+3 0
+4 200000
+Name: datetime, dtype: int...
+>>> func(s_pl)
+shape: (5,)
+Series: 'datetime' [i32]
+[
+ 400000
+ 600000
+ 800000
+ 0
+ 200000
+]
+
nanosecond()
+
+Extracts the nanosecond(s) in a date series.
+ + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import datetime
+>>> import narwhals as nw
+>>> dates = [
+... datetime(2022, 1, 1, 5, 3, 10, 500000),
+... datetime(2022, 1, 5, 9, 12, 4, 60000),
+... ]
+>>> s_pd = pd.Series(dates)
+>>> s_pl = pl.Series(dates)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.nanosecond()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 500000000
+1 60000000
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i32]
+[
+ 500000000
+ 60000000
+]
+
total_minutes()
+
+Get total minutes.
+ + +The function outputs the total minutes in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = [timedelta(minutes=10), timedelta(minutes=20, seconds=40)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.total_minutes()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 10
+1 20
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
total_seconds()
+
+Get total seconds.
+ + +The function outputs the total seconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.total_seconds()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 10
+1 20
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
total_milliseconds()
+
+Get total milliseconds.
+ + +The function outputs the total milliseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = [
+... timedelta(milliseconds=10),
+... timedelta(milliseconds=20, microseconds=40),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.total_milliseconds()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 10
+1 20
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 20
+]
+
total_microseconds()
+
+Get total microseconds.
+ + +The function outputs the total microseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = [
+... timedelta(microseconds=10),
+... timedelta(milliseconds=1, microseconds=200),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.total_microseconds()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 10
+1 1200
+dtype: int...
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ 10
+ 1200
+]
+
total_nanoseconds()
+
+Get total nanoseconds.
+ + +The function outputs the total nanoseconds in the int dtype by default,
+however, pandas may change the dtype to float when there are missing values,
+consider using fill_null()
in this case.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> from datetime import timedelta
+>>> import narwhals as nw
+>>> data = ["2024-01-01 00:00:00.000000001", "2024-01-01 00:00:00.000000002"]
+>>> s_pd = pd.to_datetime(pd.Series(data))
+>>> s_pl = pl.Series(data).str.to_datetime(time_unit="ns")
+
We define a library agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.diff().dt.total_nanoseconds()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 NaN
+1 1.0
+dtype: float64
+>>> func(s_pl)
+shape: (2,)
+Series: '' [i64]
+[
+ null
+ 1
+]
+
to_string(format)
+
+Convert a Date/Time/Datetime series into a String series with the given format.
+ + +Unfortunately, different libraries interpret format directives a bit +differently.
+"%.f"
for fractional seconds,
+ whereas pandas and Python stdlib use ".%f"
."%S"
as "seconds, including fractional seconds"
+ whereas most other tools interpret it as "just seconds, as 2 digits".Therefore, we make the following adjustments:
+"%S.%f"
with "%S%.f"
."%S.%f"
with "%S"
.Workarounds like these don't make us happy, and we try to avoid them as +much as possible, but here we feel like it's the best compromise.
+If you just want to format a date/datetime Series as a local datetime +string, and have it work as consistently as possible across libraries, +we suggest using:
+"%Y-%m-%dT%H:%M:%S%.f"
for datetimes"%Y-%m-%d"
for datesthough note that, even then, different tools may return a different number +of trailing zeros. Nonetheless, this is probably consistent enough for +most applications.
+If you have an application where this is not enough, please open an issue +and let us know.
+Examples:
+>>> from datetime import datetime
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = [
+... datetime(2020, 3, 1),
+... datetime(2020, 4, 1),
+... datetime(2020, 5, 1),
+... ]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.dt.to_string("%Y/%m/%d")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 2020/03/01
+1 2020/04/01
+2 2020/05/01
+dtype: object
+
>>> func(s_pl)
+shape: (3,)
+Series: '' [str]
+[
+ "2020/03/01"
+ "2020/04/01"
+ "2020/05/01"
+]
+
narwhals.Series.str
contains(pattern, *, literal=False)
+
+Check if string contains a substring that matches a pattern.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
pattern |
+
+ str
+ |
+
+
+
+ A Character sequence or valid regular expression pattern. + |
+ + required + | +
literal |
+
+ bool
+ |
+
+
+
+ If True, treats the pattern as a literal string. + If False, assumes the pattern is a regular expression. + |
+
+ False
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> pets = ["cat", "dog", "rabbit and parrot", "dove", None]
+>>> s_pd = pd.Series(pets)
+>>> s_pl = pl.Series(pets)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.str.contains("parrot|dove")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 False
+2 True
+3 True
+4 None
+dtype: object
+
>>> func(s_pl)
+shape: (5,)
+Series: '' [bool]
+[
+ false
+ false
+ true
+ true
+ null
+]
+
ends_with(suffix)
+
+Check if string values end with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
suffix |
+
+ str
+ |
+
+
+
+ suffix substring + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["apple", "mango", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(series):
+... return series.str.ends_with("ngo")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 False
+1 True
+2 None
+dtype: object
+
>>> func(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ false
+ true
+ null
+]
+
head(n=5)
+
+Take the first n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is supported (see note (1.)) + |
+
+ 5
+ |
+
n
input is negative, head
returns characters up to the n-th from the end of the string.
+ For example, if n = -3
, then all characters except the last three are returned.n
characters, the full string is returned.Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lyrics = ["Atatata", "taata", "taatatata", "zukkyun"]
+>>> s_pd = pd.Series(lyrics)
+>>> s_pl = pl.Series(lyrics)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.str.head()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 Atata
+1 taata
+2 taata
+3 zukky
+dtype: object
+>>> func(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "Atata"
+ "taata"
+ "taata"
+ "zukky"
+]
+
slice(offset, length=None)
+
+Create subslices of the string values of a Series.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
offset |
+
+ int
+ |
+
+
+
+ Start index. Negative indexing is supported. + |
+ + required + | +
length |
+
+ int | None
+ |
+
+
+
+ Length of the slice. If set to |
+
+ None
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["pear", None, "papaya", "dragonfruit"]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.str.slice(4, length=3)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0
+1 None
+2 ya
+3 onf
+dtype: object
+
>>> func(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ ""
+ null
+ "ya"
+ "onf"
+]
+
Using negative indexes:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.str.slice(-3)
+
>>> func(s_pd)
+0 ear
+1 None
+2 aya
+3 uit
+dtype: object
+
>>> func(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "ear"
+ null
+ "aya"
+ "uit"
+]
+
starts_with(prefix)
+
+Check if string values start with a substring.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
prefix |
+
+ str
+ |
+
+
+
+ prefix substring + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = ["apple", "mango", None]
+>>> s_pd = pd.Series(data)
+>>> s_pl = pl.Series(data)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(series):
+... return series.str.starts_with("app")
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 True
+1 False
+2 None
+dtype: object
+
>>> func(s_pl)
+shape: (3,)
+Series: '' [bool]
+[
+ true
+ false
+ null
+]
+
tail(n=5)
+
+Take the last n elements of each string.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of elements to take. Negative indexing is supported (see note (1.)) + |
+
+ 5
+ |
+
n
input is negative, tail
returns characters starting from the n-th from the beginning of
+ the string. For example, if n = -3
, then all characters except the first three are returned.n
characters, the full string is returned.Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lyrics = ["Atatata", "taata", "taatatata", "zukkyun"]
+>>> s_pd = pd.Series(lyrics)
+>>> s_pl = pl.Series(lyrics)
+
We define a dataframe-agnostic function:
+>>> @nw.narwhalify
+... def func(s_any):
+... return s_any.str.tail()
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 atata
+1 taata
+2 atata
+3 kkyun
+dtype: object
+>>> func(s_pl)
+shape: (4,)
+Series: '' [str]
+[
+ "atata"
+ "taata"
+ "atata"
+ "kkyun"
+]
+
narwhals.typing
Narwhals comes fully statically typed. In addition to nw.DataFrame
, nw.Expr
,
+nw.Series
, nw.LazyFrame
, we also provide the following type hints:
DataFrameT
A TypeVar
bound to nw.DataFrame
. Use this when you have a function which
+accepts a nw.DataFrame
and returns a nw.DataFrame
backed by the same backend, for example:
import narwhals as nw
+from narwhals.typing import DataFrameT
+
+@nw.narwhalify
+def func(df: DataFrameT) -> DataFrameT:
+ return df.with_columns(c=df['a']+1)
+
Frame
Either a nw.DataFrame
or nw.LazyFrame
. Use this if your function can work on
+either and your function doesn't care about its backend, for example:
import narwhals as nw
+from narwhals.typing import Frame
+
+@nw.narwhalify
+def func(df: Frame) -> list[str]:
+ return df.columns
+
FrameT
A TypeVar
bound to Frame
. Use this if your function accepts either nw.DataFrame
+or nw.LazyFrame
and returns an object backed by the same backend, for example:
import narwhals as nw
+from narwhals.typing import FrameT
+
+@nw.narwhalify
+def func(df: FrameT) -> FrameT:
+ return df.with_columns(c=nw.col('a')+1)
+
IntoDataFrame
An object which can be converted to nw.DataFrame
(e.g. pd.DataFrame
, pl.DataFrame
).
+Use this if your function accepts a narwhalifiable object but doesn't care about its backend:
from __future__ import annotations
+
+import narwhals as nw
+from narwhals.typing import IntoDataFrame
+
+def func(df_native: IntoDataFrame) -> tuple[int, int]:
+ df = nw.from_native(df_native, eager_only=True)
+ return df.shape
+
IntoDataFrameT
A TypeVar
bound to IntoDataFrame
. Use this if your function accepts
+a function which can be converted to nw.DataFrame
and returns an object of the same
+class:
import narwhals as nw
+from narwhals.typing import IntoDataFrameT
+
+def func(df_native: IntoDataFrameT) -> IntoDataFrameT:
+ df = nw.from_native(df_native, eager_only=True)
+ return nw.to_native(df.with_columns(c=df['a']+1))
+
IntoExpr
Use this to mean "either a Narwhals expression, or something
+which can be converted into one". For example, exprs
in DataFrame.select
is
+typed to accept IntoExpr
, as it can either accept a nw.Expr
(e.g. df.select(nw.col('a'))
)
+or a string which will be interpreted as a nw.Expr
, e.g. df.select('a')
.
IntoFrame
An object which can be converted to nw.DataFrame
or nw.LazyFrame
+(e.g. pd.DataFrame
, pl.DataFrame
, pl.LazyFrame
). Use this if your function can accept
+an object which can be converted to either nw.DataFrame
or nw.LazyFrame
and it doesn't
+care about its backend:
import narwhals as nw
+from narwhals.typing import IntoFrame
+
+def func(df_native: IntoFrame) -> list[str]:
+ df = nw.from_native(df_native)
+ return df.columns
+
IntoFrameT
A TypeVar
bound to IntoFrame
. Use this if your function accepts an
+object which is convertible to nw.DataFrame
or nw.LazyFrame
and returns an object
+of the same type:
import narwhals as nw
+from narwhals.typing import IntoFrameT
+
+def func(df_native: IntoFrameT) -> IntoFrameT:
+ df = nw.from_native(df_native)
+ return nw.to_native(df.with_columns(c=nw.col('a')+1))
+
nw.narwhalify
, or nw.from_native
?Although the former is more readable, the latter is better at preserving type hints.
+Here's an example: +
import polars as pl
+import narwhals as nw
+from narwhals.typing import IntoDataFrameT, DataFrameT
+
+df = pl.DataFrame({'a': [1,2,3]})
+
+def func(df_any: IntoDataFrameT) -> IntoDataFrameT:
+ df = nw.from_native(df_any, eager_only=True)
+ return nw.to_native(df.select(b=nw.col('a')))
+
+reveal_type(func(df))
+
+@nw.narwhalify(strict=True)
+def func_2(df: DataFrameT) -> DataFrameT:
+ return df.select(b=nw.col('a'))
+
+reveal_type(func_2(df))
+
Running mypy
on it gives:
+
$ mypy f.py
+f.py:11: note: Revealed type is "polars.dataframe.frame.DataFrame"
+f.py:17: note: Revealed type is "Any"
+Success: no issues found in 1 source file
+
In the first case, mypy can infer that df
is a polars.DataFrame
. In the second case, it can't.
If you want to make the most out of type hints and preserve them as much as possible, we recommend
+nw.from_native
and nw.to_native
- otherwise, nw.narwhalify
. Type hints will still be respected
+inside the function body if you type the arguments.