Skip to content

Commit

Permalink
docs(python): use more ergonomic syntax in select/with_columns where …
Browse files Browse the repository at this point in the history
…possible (#12101)

Co-authored-by: Stijn de Gooijer <[email protected]>
  • Loading branch information
MarcoGorelli and stinodego authored Oct 30, 2023
1 parent dffd125 commit b881b8a
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 21 deletions.
4 changes: 2 additions & 2 deletions docs/getting-started/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ print(
)
```

You can also specify the specific columns that you want to return. There are two ways to do this. The first option is to create a `list` of column names, as seen below.
You can also specify the specific columns that you want to return. There are two ways to do this. The first option is to pass the column names, as seen below.

{{code_block('getting-started/expressions','select2',['select'])}}

Expand All @@ -32,7 +32,7 @@ print(
)
```

The second option is to specify each column within a `list` in the `select` statement. This option is shown below.
The second option is to specify each column using `pl.col`. This option is shown below.

{{code_block('getting-started/expressions','select3',['select'])}}

Expand Down
20 changes: 9 additions & 11 deletions docs/src/python/getting-started/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@
# --8<-- [end:select]

# --8<-- [start:select2]
df.select(pl.col(["a", "b"]))
df.select(pl.col("a", "b"))
# --8<-- [end:select2]

# --8<-- [start:select3]
df.select([pl.col("a"), pl.col("b")]).limit(3)
df.select(pl.col("a"), pl.col("b")).limit(3)
# --8<-- [end:select3]

# --8<-- [start:exclude]
df.select([pl.exclude("a")])
df.select(pl.exclude("a"))
# --8<-- [end:exclude]

# --8<-- [start:filter]
Expand All @@ -49,7 +49,7 @@
# --8<-- [end:filter2]

# --8<-- [start:with_columns]
df.with_columns([pl.col("b").sum().alias("e"), (pl.col("b") + 42).alias("b+42")])
df.with_columns(pl.col("b").sum().alias("e"), (pl.col("b") + 42).alias("b+42"))
# --8<-- [end:with_columns]

# --8<-- [start:dataframe2]
Expand All @@ -67,24 +67,22 @@

# --8<-- [start:group_by2]
df2.group_by("y", maintain_order=True).agg(
[
pl.col("*").count().alias("count"),
pl.col("*").sum().alias("sum"),
]
pl.col("*").count().alias("count"),
pl.col("*").sum().alias("sum"),
)
# --8<-- [end:group_by2]

# --8<-- [start:combine]
df_x = df.with_columns((pl.col("a") * pl.col("b")).alias("a * b")).select(
[pl.all().exclude(["c", "d"])]
pl.all().exclude(["c", "d"])
)

print(df_x)
# --8<-- [end:combine]

# --8<-- [start:combine2]
df_y = df.with_columns([(pl.col("a") * pl.col("b")).alias("a * b")]).select(
[pl.all().exclude("d")]
df_y = df.with_columns((pl.col("a") * pl.col("b")).alias("a * b")).select(
pl.all().exclude("d")
)

print(df_y)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/dataframe/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def agg(
... "c": [5, 4, 3, 2, 1],
... }
... )
>>> df.group_by("a").agg([pl.col("b"), pl.col("c")]) # doctest: +IGNORE_RESULT
>>> df.group_by("a").agg(pl.col("b"), pl.col("c")) # doctest: +IGNORE_RESULT
shape: (3, 3)
┌─────┬───────────┬───────────┐
│ a ┆ b ┆ c │
Expand Down
8 changes: 3 additions & 5 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3867,10 +3867,8 @@ def filter(self, predicate: Expr) -> Self:
... }
... )
>>> df.group_by("group_col").agg(
... [
... pl.col("b").filter(pl.col("b") < 2).sum().alias("lt"),
... pl.col("b").filter(pl.col("b") >= 2).sum().alias("gte"),
... ]
... lt=pl.col("b").filter(pl.col("b") < 2).sum(),
... gte=pl.col("b").filter(pl.col("b") >= 2).sum(),
... ).sort("group_col")
shape: (2, 3)
┌───────────┬─────┬─────┐
Expand Down Expand Up @@ -5151,7 +5149,7 @@ def is_in(self, other: Expr | Collection[Any] | Series) -> Self:
>>> df = pl.DataFrame(
... {"sets": [[1, 2, 3], [1, 2], [9, 10]], "optional_members": [1, 2, 3]}
... )
>>> df.select([pl.col("optional_members").is_in("sets").alias("contains")])
>>> df.select(pl.col("optional_members").is_in("sets").alias("contains"))
shape: (3, 1)
┌──────────┐
│ contains │
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/io/pyarrow_dataset/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def scan_pyarrow_dataset(
>>> (
... pl.scan_pyarrow_dataset(dset)
... .filter("bools")
... .select(["bools", "floats", "date"])
... .select("bools", "floats", "date")
... .collect()
... ) # doctest: +SKIP
shape: (1, 3)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ def columns(self) -> list[str]:
... "bar": [6, 7, 8],
... "ham": ["a", "b", "c"],
... }
... ).select(["foo", "bar"])
... ).select("foo", "bar")
>>> lf.columns
['foo', 'bar']
Expand Down

0 comments on commit b881b8a

Please sign in to comment.