Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs(python): use more ergonomic syntax in select/with_columns where possible #12101

Merged
merged 2 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/getting-started/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ print(
)
```

You can also specify the specific columns that you want to return. There are two ways to do this. The first option is to create a `list` of column names, as seen below.
You can also specify the specific columns that you want to return. There are two ways to do this. The first option is to pass the column names, as seen below.

{{code_block('getting-started/expressions','select2',['select'])}}

Expand All @@ -32,7 +32,7 @@ print(
)
```

The second option is to specify each column within a `list` in the `select` statement. This option is shown below.
The second option is to specify each column using `pl.col`. This option is shown below.

{{code_block('getting-started/expressions','select3',['select'])}}

Expand Down
20 changes: 9 additions & 11 deletions docs/src/python/getting-started/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@
# --8<-- [end:select]

# --8<-- [start:select2]
df.select(pl.col(["a", "b"]))
df.select(pl.col("a", "b"))
# --8<-- [end:select2]

# --8<-- [start:select3]
df.select([pl.col("a"), pl.col("b")]).limit(3)
df.select(pl.col("a"), pl.col("b")).limit(3)
# --8<-- [end:select3]

# --8<-- [start:exclude]
df.select([pl.exclude("a")])
df.select(pl.exclude("a"))
# --8<-- [end:exclude]

# --8<-- [start:filter]
Expand All @@ -49,7 +49,7 @@
# --8<-- [end:filter2]

# --8<-- [start:with_columns]
df.with_columns([pl.col("b").sum().alias("e"), (pl.col("b") + 42).alias("b+42")])
df.with_columns(pl.col("b").sum().alias("e"), (pl.col("b") + 42).alias("b+42"))
# --8<-- [end:with_columns]

# --8<-- [start:dataframe2]
Expand All @@ -67,24 +67,22 @@

# --8<-- [start:group_by2]
df2.group_by("y", maintain_order=True).agg(
[
pl.col("*").count().alias("count"),
pl.col("*").sum().alias("sum"),
]
pl.col("*").count().alias("count"),
pl.col("*").sum().alias("sum"),
)
# --8<-- [end:group_by2]

# --8<-- [start:combine]
df_x = df.with_columns((pl.col("a") * pl.col("b")).alias("a * b")).select(
[pl.all().exclude(["c", "d"])]
pl.all().exclude(["c", "d"])
)

print(df_x)
# --8<-- [end:combine]

# --8<-- [start:combine2]
df_y = df.with_columns([(pl.col("a") * pl.col("b")).alias("a * b")]).select(
[pl.all().exclude("d")]
df_y = df.with_columns((pl.col("a") * pl.col("b")).alias("a * b")).select(
pl.all().exclude("d")
)

print(df_y)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/dataframe/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def agg(
... "c": [5, 4, 3, 2, 1],
... }
... )
>>> df.group_by("a").agg([pl.col("b"), pl.col("c")]) # doctest: +IGNORE_RESULT
>>> df.group_by("a").agg(pl.col("b"), pl.col("c")) # doctest: +IGNORE_RESULT
shape: (3, 3)
┌─────┬───────────┬───────────┐
│ a ┆ b ┆ c │
Expand Down
8 changes: 3 additions & 5 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3867,10 +3867,8 @@ def filter(self, predicate: Expr) -> Self:
... }
... )
>>> df.group_by("group_col").agg(
... [
... pl.col("b").filter(pl.col("b") < 2).sum().alias("lt"),
... pl.col("b").filter(pl.col("b") >= 2).sum().alias("gte"),
... ]
... lt=pl.col("b").filter(pl.col("b") < 2).sum(),
... gte=pl.col("b").filter(pl.col("b") >= 2).sum(),
... ).sort("group_col")
shape: (2, 3)
┌───────────┬─────┬─────┐
Expand Down Expand Up @@ -5151,7 +5149,7 @@ def is_in(self, other: Expr | Collection[Any] | Series) -> Self:
>>> df = pl.DataFrame(
... {"sets": [[1, 2, 3], [1, 2], [9, 10]], "optional_members": [1, 2, 3]}
... )
>>> df.select([pl.col("optional_members").is_in("sets").alias("contains")])
>>> df.select(pl.col("optional_members").is_in("sets").alias("contains"))
shape: (3, 1)
┌──────────┐
│ contains │
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/io/pyarrow_dataset/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def scan_pyarrow_dataset(
>>> (
... pl.scan_pyarrow_dataset(dset)
... .filter("bools")
... .select(["bools", "floats", "date"])
... .select("bools", "floats", "date")
... .collect()
... ) # doctest: +SKIP
shape: (1, 3)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ def columns(self) -> list[str]:
... "bar": [6, 7, 8],
... "ham": ["a", "b", "c"],
... }
... ).select(["foo", "bar"])
... ).select("foo", "bar")
>>> lf.columns
['foo', 'bar']

Expand Down