Skip to content

Commit

Permalink
docs(rust): Add API links for Rust user guide examples (#11294)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Sep 25, 2023
1 parent 37ec9be commit 97ccc52
Show file tree
Hide file tree
Showing 25 changed files with 214 additions and 108 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ on:
pull_request:
paths:
- py-polars/**
- docs/src/python/**
- crates/**
- .github/workflows/test-python.yml
push:
branches:
- main
paths:
- crates/**
- docs/src/python/**
- py-polars/**
- .github/workflows/test-python.yml

Expand Down
217 changes: 160 additions & 57 deletions docs/_build/API_REFERENCE_LINKS.yml

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions docs/getting-started/series-dataframes.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Although it is more common to work directly on a `DataFrame` object, `Series` im

There are a number of methods related to string operations in the `StringNamespace`. These only work on `Series` with the Datatype `Utf8`.

{{code_block('getting-started/series-dataframes','string',['replace'])}}
{{code_block('getting-started/series-dataframes','string',['str.replace'])}}

```python exec="on" result="text" session="getting-started/series"
--8<-- "python/getting-started/series-dataframes.py:string"
Expand All @@ -41,7 +41,7 @@ There are a number of methods related to string operations in the `StringNamespa

Similar to strings, there is a separate namespace for datetime related operations in the `DateLikeNameSpace`. These only work on `Series`with DataTypes related to dates.

{{code_block('getting-started/series-dataframes','dt',['day'])}}
{{code_block('getting-started/series-dataframes','dt',['Series.dt.day'])}}

```python exec="on" result="text" session="getting-started/series"
--8<-- "python/getting-started/series-dataframes.py:dt"
Expand Down
3 changes: 1 addition & 2 deletions docs/src/python/getting-started/series-dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
start = date(2001, 1, 1)
stop = date(2001, 1, 9)
s = pl.date_range(start, stop, interval="2d", eager=True)
s.dt.day()
print(s)
print(s.dt.day())
# --8<-- [end:dt]

# --8<-- [start:dataframe]
Expand Down
2 changes: 1 addition & 1 deletion docs/src/python/user-guide/expressions/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
}

dataset = pl.read_csv(url, dtypes=dtypes).with_columns(
pl.col("birthday").str.strptime(pl.Date, strict=False)
pl.col("birthday").str.to_date(strict=False)
)
# --8<-- [end:dataframe]

Expand Down
4 changes: 2 additions & 2 deletions docs/src/python/user-guide/expressions/casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@
)

out = df.select(
pl.col("date").dt.strftime("%Y-%m-%d"),
pl.col("string").str.strptime(pl.Datetime, "%Y-%m-%d"),
pl.col("date").dt.to_string("%Y-%m-%d"),
pl.col("string").str.to_datetime("%Y-%m-%d"),
)
print(out)
# --8<-- [end:dates2]
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# --8<-- [end:range]

# --8<-- [start:negative]
ts = pl.Series(["-1300-05-23", "-1400-03-02"]).str.strptime(pl.Date)
ts = pl.Series(["-1300-05-23", "-1400-03-02"]).str.to_date()

negative_dates_df = pl.DataFrame({"ts": ts, "values": [3, 4]})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# --8<-- [start:cast]
df = pl.read_csv("docs/data/apple_stock.csv", try_parse_dates=False)

df = df.with_columns(pl.col("Date").str.strptime(pl.Date, format="%Y-%m-%d"))
df = df.with_columns(pl.col("Date").str.to_date("%Y-%m-%d"))
print(df)
# --8<-- [end:cast]

Expand All @@ -36,7 +36,7 @@
]
mixed_parsed = (
pl.Series(data)
.str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%z")
.str.to_datetime("%Y-%m-%dT%H:%M:%S%z")
.dt.convert_time_zone("Europe/Brussels")
)
print(mixed_parsed)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# --8<-- [start:example]
ts = ["2021-03-27 03:00", "2021-03-28 03:00"]
tz_naive = pl.Series("tz_naive", ts).str.strptime(pl.Datetime)
tz_naive = pl.Series("tz_naive", ts).str.to_datetime()
tz_aware = tz_naive.dt.replace_time_zone("UTC").rename("tz_aware")
time_zones_df = pl.DataFrame([tz_naive, tz_aware])
print(time_zones_df)
Expand Down
2 changes: 1 addition & 1 deletion docs/src/rust/getting-started/series-dataframes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// --8<-- [end:string]

// --8<-- [start:dt]
// This operation is not directly available on the Series object yet, only on the DataFrame
// This operation is not directly available on the Series object yet, only as an Expression
// --8<-- [end:dt]

// --8<-- [start:dataframe]
Expand Down
7 changes: 4 additions & 3 deletions docs/src/rust/user-guide/expressions/casting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.clone()
.lazy()
.select([
col("date").dt().strftime("%Y-%m-%d"),
col("string").str().strptime(
DataType::Datetime(TimeUnit::Microseconds, None),
col("date").dt().to_string("%Y-%m-%d"),
col("string").str().to_datetime(
TimeUnit::Microseconds,
None,
StrptimeOptions::default(),
lit("raise"),
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.with_column(
col("ts")
.str()
.strptime(DataType::Date, StrptimeOptions::default(), lit("raise")),
.to_date(StrptimeOptions::default(), lit("raise")),
)
.collect()?;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.lazy()
.with_columns([col("Date")
.str()
.strptime(DataType::Date, StrptimeOptions::default(), lit("raise"))])
.to_date(StrptimeOptions::default(), lit("raise"))])
.collect()?;
println!("{}", &df);
// --8<-- [end:cast]
Expand Down Expand Up @@ -57,8 +57,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
];
let q = col("date")
.str()
.strptime(
DataType::Datetime(TimeUnit::Microseconds, None),
.to_datetime(
TimeUnit::Microseconds,
None,
StrptimeOptions {
format: Some("%Y-%m-%dT%H:%M:%S%z".to_string()),
..Default::default()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let tz_naive = Series::new("tz_naive", &ts);
let time_zones_df = DataFrame::new(vec![tz_naive])?
.lazy()
.select([col("tz_naive").str().strptime(
DataType::Datetime(TimeUnit::Milliseconds, None),
.select([col("tz_naive").str().to_datetime(
TimeUnit::Milliseconds,
None,
StrptimeOptions::default(),
lit("raise"),
)])
Expand Down
4 changes: 2 additions & 2 deletions docs/user-guide/expressions/casting.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ Temporal data types such as `Date` or `Datetime` are represented as the number o
--8<-- "python/user-guide/expressions/casting.py:dates"
```

To perform casting operations between strings and `Dates`/`Datetimes`, `strftime` and `strptime` are utilized. Polars adopts the [chrono format syntax](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) for when formatting. It's worth noting that `strptime` features additional options that support timezone functionality. Refer to the API documentation for further information.
To convert between strings and `Dates`/`Datetimes`, `dt.to_string` and `str.to_datetime` are utilized. Polars adopts the [chrono format syntax](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) for formatting. It's worth noting that `str.to_datetime` features additional options that support timezone functionality. Refer to the API documentation for further information.

{{code_block('user-guide/expressions/casting','dates2',['strftime','strptime'])}}
{{code_block('user-guide/expressions/casting','dates2',['dt.to_string','str.to_date'])}}

```python exec="on" result="text" session="user-guide/cast"
--8<-- "python/user-guide/expressions/casting.py:dates2"
Expand Down
12 changes: 6 additions & 6 deletions docs/user-guide/expressions/column-selections.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Often, we don't just want to include all columns, but include all _while_ exclud

Specifying multiple strings allows expressions to _expand_ to all matching columns:

{{code_block('user-guide/expressions/column-selections','expansion_by_names',['dt_to_string'])}}
{{code_block('user-guide/expressions/column-selections','expansion_by_names',['dt.to_string'])}}

```python exec="on" result="text" session="user-guide/column-selections"
--8<-- "python/user-guide/expressions/column-selections.py:expansion_by_names"
Expand All @@ -47,7 +47,7 @@ Specifying multiple strings allows expressions to _expand_ to all matching colum

Multiple column selection is possible by regular expressions also, by making sure to wrap the regex by `^` and `$` to let `pl.col` know that a regex selection is expected:

{{code_block('user-guide/expressions/column-selections','expansion_by_regex',[''])}}
{{code_block('user-guide/expressions/column-selections','expansion_by_regex',[])}}

```python exec="on" result="text" session="user-guide/column-selections"
--8<-- "python/user-guide/expressions/column-selections.py:expansion_by_regex"
Expand Down Expand Up @@ -81,15 +81,15 @@ To select just the integer and string columns, we can do:

These _selectors_ also allow for set based selection operations. For instance, to select the **numeric** columns **except** the **first** column that indicates row numbers:

{{code_block('user-guide/expressions/column-selections','selectors_diff',['cs_first', 'cs_numeric'])}}
{{code_block('user-guide/expressions/column-selections','selectors_diff',['cs.first', 'cs.numeric'])}}

```python exec="on" result="text" session="user-guide/column-selections"
--8<-- "python/user-guide/expressions/column-selections.py:selectors_diff"
```

We can also select the row number by name **and** any **non**-numeric columns:

{{code_block('user-guide/expressions/column-selections','selectors_union',['cs_by_name', 'cs_numeric'])}}
{{code_block('user-guide/expressions/column-selections','selectors_union',['cs.by_name', 'cs.numeric'])}}

```python exec="on" result="text" session="user-guide/column-selections"
--8<-- "python/user-guide/expressions/column-selections.py:selectors_union"
Expand All @@ -99,7 +99,7 @@ We can also select the row number by name **and** any **non**-numeric columns:

_Selectors_ can also be matched by substring and regex patterns:

{{code_block('user-guide/expressions/column-selections','selectors_by_name',['cs_contains', 'cs_matches'])}}
{{code_block('user-guide/expressions/column-selections','selectors_by_name',['cs.contains', 'cs.matches'])}}

```python exec="on" result="text" session="user-guide/column-selections"
--8<-- "python/user-guide/expressions/column-selections.py:selectors_by_name"
Expand All @@ -109,7 +109,7 @@ _Selectors_ can also be matched by substring and regex patterns:

What if we want to apply a specific operation on the selected columns (i.e. get back to representing them as **expressions** to operate upon)? We can simply convert them using `as_expr` and then proceed as normal:

{{code_block('user-guide/expressions/column-selections','selectors_to_expr',['cs_temporal'])}}
{{code_block('user-guide/expressions/column-selections','selectors_to_expr',['cs.temporal'])}}

```python exec="on" result="text" session="user-guide/column-selections"
--8<-- "python/user-guide/expressions/column-selections.py:selectors_to_expr"
Expand Down
6 changes: 3 additions & 3 deletions docs/user-guide/expressions/lists.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ However, in Polars, we often do not need to do this to operate on the `List` ele

Polars provides several standard operations on `List` columns. If we want the first three measurements, we can do a `head(3)`. The last three can be obtained via a `tail(3)`, or alternately, via `slice` (negative indexing is supported). We can also identify the number of observations via `lengths`. Let's see them in action:

{{code_block('user-guide/expressions/lists','list_ops',['Expr.List'])}}
{{code_block('user-guide/expressions/lists','list_ops',['Expr.list'])}}

```python exec="on" result="text" session="user-guide/lists"
--8<-- "python/user-guide/expressions/lists.py:list_ops"
Expand All @@ -60,7 +60,7 @@ If we need to identify the stations that are giving the most number of errors fr

The third step requires a casting (or alternately, a regex pattern search) operation to be perform on each element of the list. We can do this using by applying the operation on each element by first referencing them in the `pl.element()` context, and then calling a suitable Polars expression on them. Let's see how:

{{code_block('user-guide/expressions/lists','count_errors',['Expr.List', 'element'])}}
{{code_block('user-guide/expressions/lists','count_errors',['Expr.list', 'element'])}}

```python exec="on" result="text" session="user-guide/lists"
--8<-- "python/user-guide/expressions/lists.py:count_errors"
Expand Down Expand Up @@ -110,7 +110,7 @@ We can define `Array` columns in this manner:

Basic operations are available on it:

{{code_block('user-guide/expressions/lists','array_ops',['arr'])}}
{{code_block('user-guide/expressions/lists','array_ops',['Series.arr'])}}

```python exec="on" result="text" session="user-guide/lists"
--8<-- "python/user-guide/expressions/lists.py:array_ops"
Expand Down
10 changes: 5 additions & 5 deletions docs/user-guide/expressions/strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ String processing functions are available in the `str` namespace.

The `str` namespace can be accessed through the `.str` attribute of a column with `Utf8` data type. In the following example, we create a column named `animal` and compute the length of each element in the column in terms of the number of bytes and the number of characters. If you are working with ASCII text, then the results of these two computations will be the same, and using `lengths` is recommended since it is faster.

{{code_block('user-guide/expressions/strings','df',['lengths','n_chars'])}}
{{code_block('user-guide/expressions/strings','df',['str.lengths','str.n_chars'])}}

```python exec="on" result="text" session="user-guide/strings"
--8<-- "python/user-guide/expressions/strings.py:setup"
Expand All @@ -23,7 +23,7 @@ The `str` namespace can be accessed through the `.str` attribute of a column wit

To check for the presence of a pattern within a string, we can use the contains method. The `contains` method accepts either a regular substring or a regex pattern, depending on the value of the `literal` parameter. If the pattern we're searching for is a simple substring located either at the beginning or end of the string, we can alternatively use the `starts_with` and `ends_with` functions.

{{code_block('user-guide/expressions/strings','existence',['str.contains', 'starts_with','ends_with'])}}
{{code_block('user-guide/expressions/strings','existence',['str.contains', 'str.starts_with','str.ends_with'])}}

```python exec="on" result="text" session="user-guide/strings"
--8<-- "python/user-guide/expressions/strings.py:existence"
Expand All @@ -33,15 +33,15 @@ To check for the presence of a pattern within a string, we can use the contains

The `extract` method allows us to extract a pattern from a specified string. This method takes a regex pattern containing one or more capture groups, which are defined by parentheses `()` in the pattern. The group index indicates which capture group to output.

{{code_block('user-guide/expressions/strings','extract',['extract'])}}
{{code_block('user-guide/expressions/strings','extract',['str.extract'])}}

```python exec="on" result="text" session="user-guide/strings"
--8<-- "python/user-guide/expressions/strings.py:extract"
```

To extract all occurrences of a pattern within a string, we can use the `extract_all` method. In the example below, we extract all numbers from a string using the regex pattern `(\d+)`, which matches one or more digits. The resulting output of the `extract_all` method is a list containing all instances of the matched pattern within the string.

{{code_block('user-guide/expressions/strings','extract_all',['extract_all'])}}
{{code_block('user-guide/expressions/strings','extract_all',['str.extract_all'])}}

```python exec="on" result="text" session="user-guide/strings"
--8<-- "python/user-guide/expressions/strings.py:extract_all"
Expand All @@ -51,7 +51,7 @@ To extract all occurrences of a pattern within a string, we can use the `extract

We have discussed two methods for pattern matching and extraction thus far, and now we will explore how to replace a pattern within a string. Similar to `extract` and `extract_all`, Polars provides the `replace` and `replace_all` methods for this purpose. In the example below we replace one match of `abc` at the end of a word (`\b`) by `ABC` and we replace all occurrence of `a` with `-`.

{{code_block('user-guide/expressions/strings','replace',['replace','replace_all'])}}
{{code_block('user-guide/expressions/strings','replace',['str.replace','str.replace_all'])}}

```python exec="on" result="text" session="user-guide/strings"
--8<-- "python/user-guide/expressions/strings.py:replace"
Expand Down
4 changes: 2 additions & 2 deletions docs/user-guide/expressions/structs.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ Polars will interpret a `dict` sent to the `Series` constructor as a `Struct`:

Let's say that we needed to obtain just the `movie` value in the `Series` that we created above. We can use the `field` method to do so:

{{code_block('user-guide/expressions/structs','series_struct_extract',['field'])}}
{{code_block('user-guide/expressions/structs','series_struct_extract',['struct.field'])}}

```python exec="on" result="text" session="user-guide/structs"
--8<-- "python/user-guide/expressions/structs.py:series_struct_extract"
Expand All @@ -62,7 +62,7 @@ Let's say that we needed to obtain just the `movie` value in the `Series` that w

What if we need to rename individual `field`s of a `Struct` column? We first convert the `rating_Series` object to a `DataFrame` so that we can view the changes easily, and then use the `rename_fields` method:

{{code_block('user-guide/expressions/structs','series_struct_rename',['rename_fields'])}}
{{code_block('user-guide/expressions/structs','series_struct_rename',['struct.rename_fields'])}}

```python exec="on" result="text" session="user-guide/structs"
--8<-- "python/user-guide/expressions/structs.py:series_struct_rename"
Expand Down
2 changes: 1 addition & 1 deletion docs/user-guide/io/aws.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ located on an AWS bucket.

Load a `.parquet` file using:

{{code_block('user-guide/io/aws','bucket',['from_arrow'])}}
{{code_block('user-guide/io/aws','bucket',[])}}
2 changes: 1 addition & 1 deletion docs/user-guide/transformations/time-series/filter.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Say you are working with an archeologist and are dealing in negative dates.
Polars can parse and store them just fine, but the Python `datetime` library
does not. So for filtering, you should use attributes in the `.dt` namespace:

{{code_block('user-guide/transformations/time-series/filter','negative',['strptime'])}}
{{code_block('user-guide/transformations/time-series/filter','negative',['str.to_date'])}}

```python exec="on" result="text" session="user-guide/transformations/ts/filter"
--8<-- "python/user-guide/transformations/time-series/filter.py:negative"
Expand Down
10 changes: 5 additions & 5 deletions docs/user-guide/transformations/time-series/parsing.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,21 @@ On the other hand binary formats such as parquet have a schema that is respected

## Casting strings to dates

You can also cast a column of datetimes encoded as strings to a datetime type. You do this by calling the string `str.strptime` method and passing the format of the date string:
You can also cast a column of datetimes encoded as strings to a datetime type. You do this by calling the string `str.to_date` method and passing the format of the date string:

{{code_block('user-guide/transformations/time-series/parsing','cast',['read_csv','strptime'])}}
{{code_block('user-guide/transformations/time-series/parsing','cast',['read_csv','str.to_date'])}}

```python exec="on" result="text" session="user-guide/transformations/ts/parsing"
--8<-- "python/user-guide/transformations/time-series/parsing.py:cast"
```

[The strptime date formats can be found here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
[The format string specification can be found here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).

## Extracting date features from a date column

You can extract data features such as the year or day from a date column using the `.dt` namespace on a date column:

{{code_block('user-guide/transformations/time-series/parsing','extract',['year'])}}
{{code_block('user-guide/transformations/time-series/parsing','extract',['dt.year'])}}

```python exec="on" result="text" session="user-guide/transformations/ts/parsing"
--8<-- "python/user-guide/transformations/time-series/parsing.py:extract"
Expand All @@ -51,7 +51,7 @@ You can extract data features such as the year or day from a date column using t
If you have mixed offsets (say, due to crossing daylight saving time),
then you can use `utc=True` and then convert to your time zone:

{{code_block('user-guide/transformations/time-series/parsing','mixed',['strptime','convert_time_zone'])}}
{{code_block('user-guide/transformations/time-series/parsing','mixed',['str.to_datetime','dt.convert_time_zone'])}}

```python exec="on" result="text" session="user-guide/transformations/ts/parsing"
--8<-- "python/user-guide/transformations/time-series/parsing.py:mixed"
Expand Down
Loading

0 comments on commit 97ccc52

Please sign in to comment.