docs(rust): Add API links for Rust user guide examples (#11294)

pola-rs · Sep 25, 2023 · 97ccc52 · 97ccc52
1 parent 37ec9be
commit 97ccc52
Show file tree

Hide file tree

Showing 25 changed files with 214 additions and 108 deletions.
diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml
@@ -4,13 +4,15 @@ on:
   pull_request:
     paths:
       - py-polars/**
+      - docs/src/python/**
       - crates/**
       - .github/workflows/test-python.yml
   push:
     branches:
       - main
     paths:
       - crates/**
+      - docs/src/python/**
       - py-polars/**
       - .github/workflows/test-python.yml
 

diff --git a/docs/_build/API_REFERENCE_LINKS.yml b/docs/_build/API_REFERENCE_LINKS.yml
diff --git a/docs/getting-started/series-dataframes.md b/docs/getting-started/series-dataframes.md
@@ -31,7 +31,7 @@ Although it is more common to work directly on a `DataFrame` object, `Series` im
 
 There are a number of methods related to string operations in the `StringNamespace`. These only work on `Series` with the Datatype `Utf8`.
 
-{{code_block('getting-started/series-dataframes','string',['replace'])}}
+{{code_block('getting-started/series-dataframes','string',['str.replace'])}}
 
 ```python exec="on" result="text" session="getting-started/series"
 --8<-- "python/getting-started/series-dataframes.py:string"
@@ -41,7 +41,7 @@ There are a number of methods related to string operations in the `StringNamespa
 
 Similar to strings, there is a separate namespace for datetime related operations in the `DateLikeNameSpace`. These only work on `Series`with DataTypes related to dates.
 
-{{code_block('getting-started/series-dataframes','dt',['day'])}}
+{{code_block('getting-started/series-dataframes','dt',['Series.dt.day'])}}
 
 ```python exec="on" result="text" session="getting-started/series"
 --8<-- "python/getting-started/series-dataframes.py:dt"

diff --git a/docs/src/python/getting-started/series-dataframes.py b/docs/src/python/getting-started/series-dataframes.py
@@ -23,8 +23,7 @@
 start = date(2001, 1, 1)
 stop = date(2001, 1, 9)
 s = pl.date_range(start, stop, interval="2d", eager=True)
-s.dt.day()
-print(s)
+print(s.dt.day())
 # --8<-- [end:dt]
 
 # --8<-- [start:dataframe]

diff --git a/docs/src/python/user-guide/expressions/aggregation.py b/docs/src/python/user-guide/expressions/aggregation.py
@@ -16,7 +16,7 @@
 }
 
 dataset = pl.read_csv(url, dtypes=dtypes).with_columns(
-    pl.col("birthday").str.strptime(pl.Date, strict=False)
+    pl.col("birthday").str.to_date(strict=False)
 )
 # --8<-- [end:dataframe]
 

diff --git a/docs/src/python/user-guide/expressions/casting.py b/docs/src/python/user-guide/expressions/casting.py
@@ -122,8 +122,8 @@
 )
 
 out = df.select(
-    pl.col("date").dt.strftime("%Y-%m-%d"),
-    pl.col("string").str.strptime(pl.Datetime, "%Y-%m-%d"),
+    pl.col("date").dt.to_string("%Y-%m-%d"),
+    pl.col("string").str.to_datetime("%Y-%m-%d"),
 )
 print(out)
 # --8<-- [end:dates2]
diff --git a/docs/src/python/user-guide/transformations/time-series/filter.py b/docs/src/python/user-guide/transformations/time-series/filter.py
@@ -21,7 +21,7 @@
 # --8<-- [end:range]
 
 # --8<-- [start:negative]
-ts = pl.Series(["-1300-05-23", "-1400-03-02"]).str.strptime(pl.Date)
+ts = pl.Series(["-1300-05-23", "-1400-03-02"]).str.to_date()
 
 negative_dates_df = pl.DataFrame({"ts": ts, "values": [3, 4]})
 

diff --git a/docs/src/python/user-guide/transformations/time-series/parsing.py b/docs/src/python/user-guide/transformations/time-series/parsing.py
@@ -12,7 +12,7 @@
 # --8<-- [start:cast]
 df = pl.read_csv("docs/data/apple_stock.csv", try_parse_dates=False)
 
-df = df.with_columns(pl.col("Date").str.strptime(pl.Date, format="%Y-%m-%d"))
+df = df.with_columns(pl.col("Date").str.to_date("%Y-%m-%d"))
 print(df)
 # --8<-- [end:cast]
 
@@ -36,7 +36,7 @@
 ]
 mixed_parsed = (
     pl.Series(data)
-    .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%z")
+    .str.to_datetime("%Y-%m-%dT%H:%M:%S%z")
     .dt.convert_time_zone("Europe/Brussels")
 )
 print(mixed_parsed)

diff --git a/docs/src/python/user-guide/transformations/time-series/timezones.py b/docs/src/python/user-guide/transformations/time-series/timezones.py
@@ -5,7 +5,7 @@
 
 # --8<-- [start:example]
 ts = ["2021-03-27 03:00", "2021-03-28 03:00"]
-tz_naive = pl.Series("tz_naive", ts).str.strptime(pl.Datetime)
+tz_naive = pl.Series("tz_naive", ts).str.to_datetime()
 tz_aware = tz_naive.dt.replace_time_zone("UTC").rename("tz_aware")
 time_zones_df = pl.DataFrame([tz_naive, tz_aware])
 print(time_zones_df)

diff --git a/docs/src/rust/getting-started/series-dataframes.rs b/docs/src/rust/getting-started/series-dataframes.rs
@@ -18,7 +18,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     // --8<-- [end:string]
 
     // --8<-- [start:dt]
-    // This operation is not directly available on the Series object yet, only on the DataFrame
+    // This operation is not directly available on the Series object yet, only as an Expression
     // --8<-- [end:dt]
 
     // --8<-- [start:dataframe]

diff --git a/docs/src/rust/user-guide/expressions/casting.rs b/docs/src/rust/user-guide/expressions/casting.rs
@@ -187,9 +187,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .clone()
         .lazy()
         .select([
-            col("date").dt().strftime("%Y-%m-%d"),
-            col("string").str().strptime(
-                DataType::Datetime(TimeUnit::Microseconds, None),
+            col("date").dt().to_string("%Y-%m-%d"),
+            col("string").str().to_datetime(
+                TimeUnit::Microseconds,
+                None,
                 StrptimeOptions::default(),
                 lit("raise"),
             ),

diff --git a/docs/src/rust/user-guide/transformations/time-series/filter.rs b/docs/src/rust/user-guide/transformations/time-series/filter.rs
@@ -45,7 +45,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     .with_column(
         col("ts")
             .str()
-            .strptime(DataType::Date, StrptimeOptions::default(), lit("raise")),
+            .to_date(StrptimeOptions::default(), lit("raise")),
     )
     .collect()?;
 

diff --git a/docs/src/rust/user-guide/transformations/time-series/parsing.rs b/docs/src/rust/user-guide/transformations/time-series/parsing.rs
@@ -25,7 +25,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .lazy()
         .with_columns([col("Date")
             .str()
-            .strptime(DataType::Date, StrptimeOptions::default(), lit("raise"))])
+            .to_date(StrptimeOptions::default(), lit("raise"))])
         .collect()?;
     println!("{}", &df);
     // --8<-- [end:cast]
@@ -57,8 +57,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     ];
     let q = col("date")
         .str()
-        .strptime(
-            DataType::Datetime(TimeUnit::Microseconds, None),
+        .to_datetime(
+            TimeUnit::Microseconds,
+            None,
             StrptimeOptions {
                 format: Some("%Y-%m-%dT%H:%M:%S%z".to_string()),
                 ..Default::default()

diff --git a/docs/src/rust/user-guide/transformations/time-series/timezones.rs b/docs/src/rust/user-guide/transformations/time-series/timezones.rs
@@ -8,8 +8,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let tz_naive = Series::new("tz_naive", &ts);
     let time_zones_df = DataFrame::new(vec![tz_naive])?
         .lazy()
-        .select([col("tz_naive").str().strptime(
-            DataType::Datetime(TimeUnit::Milliseconds, None),
+        .select([col("tz_naive").str().to_datetime(
+            TimeUnit::Milliseconds,
+            None,
             StrptimeOptions::default(),
             lit("raise"),
         )])

diff --git a/docs/user-guide/expressions/casting.md b/docs/user-guide/expressions/casting.md
@@ -91,9 +91,9 @@ Temporal data types such as `Date` or `Datetime` are represented as the number o
 --8<-- "python/user-guide/expressions/casting.py:dates"
 ```
 
-To perform casting operations between strings and `Dates`/`Datetimes`, `strftime` and `strptime` are utilized. Polars adopts the [chrono format syntax](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) for when formatting. It's worth noting that `strptime` features additional options that support timezone functionality. Refer to the API documentation for further information.
+To convert between strings and `Dates`/`Datetimes`, `dt.to_string` and `str.to_datetime` are utilized. Polars adopts the [chrono format syntax](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) for formatting. It's worth noting that `str.to_datetime` features additional options that support timezone functionality. Refer to the API documentation for further information.
 
-{{code_block('user-guide/expressions/casting','dates2',['strftime','strptime'])}}
+{{code_block('user-guide/expressions/casting','dates2',['dt.to_string','str.to_date'])}}
 
 ```python exec="on" result="text" session="user-guide/cast"
 --8<-- "python/user-guide/expressions/casting.py:dates2"

diff --git a/docs/user-guide/expressions/column-selections.md b/docs/user-guide/expressions/column-selections.md
@@ -37,7 +37,7 @@ Often, we don't just want to include all columns, but include all _while_ exclud
 
 Specifying multiple strings allows expressions to _expand_ to all matching columns:
 
-{{code_block('user-guide/expressions/column-selections','expansion_by_names',['dt_to_string'])}}
+{{code_block('user-guide/expressions/column-selections','expansion_by_names',['dt.to_string'])}}
 
 ```python exec="on" result="text" session="user-guide/column-selections"
 --8<-- "python/user-guide/expressions/column-selections.py:expansion_by_names"
@@ -47,7 +47,7 @@ Specifying multiple strings allows expressions to _expand_ to all matching colum
 
 Multiple column selection is possible by regular expressions also, by making sure to wrap the regex by `^` and `$` to let `pl.col` know that a regex selection is expected:
 
-{{code_block('user-guide/expressions/column-selections','expansion_by_regex',[''])}}
+{{code_block('user-guide/expressions/column-selections','expansion_by_regex',[])}}
 
 ```python exec="on" result="text" session="user-guide/column-selections"
 --8<-- "python/user-guide/expressions/column-selections.py:expansion_by_regex"
@@ -81,15 +81,15 @@ To select just the integer and string columns, we can do:
 
 These _selectors_ also allow for set based selection operations. For instance, to select the **numeric** columns **except** the **first** column that indicates row numbers:
 
-{{code_block('user-guide/expressions/column-selections','selectors_diff',['cs_first', 'cs_numeric'])}}
+{{code_block('user-guide/expressions/column-selections','selectors_diff',['cs.first', 'cs.numeric'])}}
 
 ```python exec="on" result="text" session="user-guide/column-selections"
 --8<-- "python/user-guide/expressions/column-selections.py:selectors_diff"
 ```
 
 We can also select the row number by name **and** any **non**-numeric columns:
 
-{{code_block('user-guide/expressions/column-selections','selectors_union',['cs_by_name', 'cs_numeric'])}}
+{{code_block('user-guide/expressions/column-selections','selectors_union',['cs.by_name', 'cs.numeric'])}}
 
 ```python exec="on" result="text" session="user-guide/column-selections"
 --8<-- "python/user-guide/expressions/column-selections.py:selectors_union"
@@ -99,7 +99,7 @@ We can also select the row number by name **and** any **non**-numeric columns:
 
 _Selectors_ can also be matched by substring and regex patterns:
 
-{{code_block('user-guide/expressions/column-selections','selectors_by_name',['cs_contains', 'cs_matches'])}}
+{{code_block('user-guide/expressions/column-selections','selectors_by_name',['cs.contains', 'cs.matches'])}}
 
 ```python exec="on" result="text" session="user-guide/column-selections"
 --8<-- "python/user-guide/expressions/column-selections.py:selectors_by_name"
@@ -109,7 +109,7 @@ _Selectors_ can also be matched by substring and regex patterns:
 
 What if we want to apply a specific operation on the selected columns (i.e. get back to representing them as **expressions** to operate upon)? We can simply convert them using `as_expr` and then proceed as normal:
 
-{{code_block('user-guide/expressions/column-selections','selectors_to_expr',['cs_temporal'])}}
+{{code_block('user-guide/expressions/column-selections','selectors_to_expr',['cs.temporal'])}}
 
 ```python exec="on" result="text" session="user-guide/column-selections"
 --8<-- "python/user-guide/expressions/column-selections.py:selectors_to_expr"

diff --git a/docs/user-guide/expressions/lists.md b/docs/user-guide/expressions/lists.md
@@ -39,7 +39,7 @@ However, in Polars, we often do not need to do this to operate on the `List` ele
 
 Polars provides several standard operations on `List` columns. If we want the first three measurements, we can do a `head(3)`. The last three can be obtained via a `tail(3)`, or alternately, via `slice` (negative indexing is supported). We can also identify the number of observations via `lengths`. Let's see them in action:
 
-{{code_block('user-guide/expressions/lists','list_ops',['Expr.List'])}}
+{{code_block('user-guide/expressions/lists','list_ops',['Expr.list'])}}
 
 ```python exec="on" result="text" session="user-guide/lists"
 --8<-- "python/user-guide/expressions/lists.py:list_ops"
@@ -60,7 +60,7 @@ If we need to identify the stations that are giving the most number of errors fr
 
 The third step requires a casting (or alternately, a regex pattern search) operation to be perform on each element of the list. We can do this using by applying the operation on each element by first referencing them in the `pl.element()` context, and then calling a suitable Polars expression on them. Let's see how:
 
-{{code_block('user-guide/expressions/lists','count_errors',['Expr.List', 'element'])}}
+{{code_block('user-guide/expressions/lists','count_errors',['Expr.list', 'element'])}}
 
 ```python exec="on" result="text" session="user-guide/lists"
 --8<-- "python/user-guide/expressions/lists.py:count_errors"
@@ -110,7 +110,7 @@ We can define `Array` columns in this manner:
 
 Basic operations are available on it:
 
-{{code_block('user-guide/expressions/lists','array_ops',['arr'])}}
+{{code_block('user-guide/expressions/lists','array_ops',['Series.arr'])}}
 
 ```python exec="on" result="text" session="user-guide/lists"
 --8<-- "python/user-guide/expressions/lists.py:array_ops"

diff --git a/docs/user-guide/expressions/strings.md b/docs/user-guide/expressions/strings.md
@@ -8,7 +8,7 @@ String processing functions are available in the `str` namespace.
 
 The `str` namespace can be accessed through the `.str` attribute of a column with `Utf8` data type. In the following example, we create a column named `animal` and compute the length of each element in the column in terms of the number of bytes and the number of characters. If you are working with ASCII text, then the results of these two computations will be the same, and using `lengths` is recommended since it is faster.
 
-{{code_block('user-guide/expressions/strings','df',['lengths','n_chars'])}}
+{{code_block('user-guide/expressions/strings','df',['str.lengths','str.n_chars'])}}
 
 ```python exec="on" result="text" session="user-guide/strings"
 --8<-- "python/user-guide/expressions/strings.py:setup"
@@ -23,7 +23,7 @@ The `str` namespace can be accessed through the `.str` attribute of a column wit
 
 To check for the presence of a pattern within a string, we can use the contains method. The `contains` method accepts either a regular substring or a regex pattern, depending on the value of the `literal` parameter. If the pattern we're searching for is a simple substring located either at the beginning or end of the string, we can alternatively use the `starts_with` and `ends_with` functions.
 
-{{code_block('user-guide/expressions/strings','existence',['str.contains', 'starts_with','ends_with'])}}
+{{code_block('user-guide/expressions/strings','existence',['str.contains', 'str.starts_with','str.ends_with'])}}
 
 ```python exec="on" result="text" session="user-guide/strings"
 --8<-- "python/user-guide/expressions/strings.py:existence"
@@ -33,15 +33,15 @@ To check for the presence of a pattern within a string, we can use the contains
 
 The `extract` method allows us to extract a pattern from a specified string. This method takes a regex pattern containing one or more capture groups, which are defined by parentheses `()` in the pattern. The group index indicates which capture group to output.
 
-{{code_block('user-guide/expressions/strings','extract',['extract'])}}
+{{code_block('user-guide/expressions/strings','extract',['str.extract'])}}
 
 ```python exec="on" result="text" session="user-guide/strings"
 --8<-- "python/user-guide/expressions/strings.py:extract"
 ```
 
 To extract all occurrences of a pattern within a string, we can use the `extract_all` method. In the example below, we extract all numbers from a string using the regex pattern `(\d+)`, which matches one or more digits. The resulting output of the `extract_all` method is a list containing all instances of the matched pattern within the string.
 
-{{code_block('user-guide/expressions/strings','extract_all',['extract_all'])}}
+{{code_block('user-guide/expressions/strings','extract_all',['str.extract_all'])}}
 
 ```python exec="on" result="text" session="user-guide/strings"
 --8<-- "python/user-guide/expressions/strings.py:extract_all"
@@ -51,7 +51,7 @@ To extract all occurrences of a pattern within a string, we can use the `extract
 
 We have discussed two methods for pattern matching and extraction thus far, and now we will explore how to replace a pattern within a string. Similar to `extract` and `extract_all`, Polars provides the `replace` and `replace_all` methods for this purpose. In the example below we replace one match of `abc` at the end of a word (`\b`) by `ABC` and we replace all occurrence of `a` with `-`.
 
-{{code_block('user-guide/expressions/strings','replace',['replace','replace_all'])}}
+{{code_block('user-guide/expressions/strings','replace',['str.replace','str.replace_all'])}}
 
 ```python exec="on" result="text" session="user-guide/strings"
 --8<-- "python/user-guide/expressions/strings.py:replace"

diff --git a/docs/user-guide/expressions/structs.md b/docs/user-guide/expressions/structs.md
@@ -52,7 +52,7 @@ Polars will interpret a `dict` sent to the `Series` constructor as a `Struct`:
 
 Let's say that we needed to obtain just the `movie` value in the `Series` that we created above. We can use the `field` method to do so:
 
-{{code_block('user-guide/expressions/structs','series_struct_extract',['field'])}}
+{{code_block('user-guide/expressions/structs','series_struct_extract',['struct.field'])}}
 
 ```python exec="on" result="text" session="user-guide/structs"
 --8<-- "python/user-guide/expressions/structs.py:series_struct_extract"
@@ -62,7 +62,7 @@ Let's say that we needed to obtain just the `movie` value in the `Series` that w
 
 What if we need to rename individual `field`s of a `Struct` column? We first convert the `rating_Series` object to a `DataFrame` so that we can view the changes easily, and then use the `rename_fields` method:
 
-{{code_block('user-guide/expressions/structs','series_struct_rename',['rename_fields'])}}
+{{code_block('user-guide/expressions/structs','series_struct_rename',['struct.rename_fields'])}}
 
 ```python exec="on" result="text" session="user-guide/structs"
 --8<-- "python/user-guide/expressions/structs.py:series_struct_rename"

diff --git a/docs/user-guide/io/aws.md b/docs/user-guide/io/aws.md
@@ -17,4 +17,4 @@ located on an AWS bucket.
 
 Load a `.parquet` file using:
 
-{{code_block('user-guide/io/aws','bucket',['from_arrow'])}}
+{{code_block('user-guide/io/aws','bucket',[])}}
diff --git a/docs/user-guide/transformations/time-series/filter.md b/docs/user-guide/transformations/time-series/filter.md
@@ -41,7 +41,7 @@ Say you are working with an archeologist and are dealing in negative dates.
 Polars can parse and store them just fine, but the Python `datetime` library
 does not. So for filtering, you should use attributes in the `.dt` namespace:
 
-{{code_block('user-guide/transformations/time-series/filter','negative',['strptime'])}}
+{{code_block('user-guide/transformations/time-series/filter','negative',['str.to_date'])}}
 
 ```python exec="on" result="text" session="user-guide/transformations/ts/filter"
 --8<-- "python/user-guide/transformations/time-series/filter.py:negative"

diff --git a/docs/user-guide/transformations/time-series/parsing.md b/docs/user-guide/transformations/time-series/parsing.md
@@ -26,21 +26,21 @@ On the other hand binary formats such as parquet have a schema that is respected
 
 ## Casting strings to dates
 
-You can also cast a column of datetimes encoded as strings to a datetime type. You do this by calling the string `str.strptime` method and passing the format of the date string:
+You can also cast a column of datetimes encoded as strings to a datetime type. You do this by calling the string `str.to_date` method and passing the format of the date string:
 
-{{code_block('user-guide/transformations/time-series/parsing','cast',['read_csv','strptime'])}}
+{{code_block('user-guide/transformations/time-series/parsing','cast',['read_csv','str.to_date'])}}
 
 ```python exec="on" result="text" session="user-guide/transformations/ts/parsing"
 --8<-- "python/user-guide/transformations/time-series/parsing.py:cast"
 ```
 
-[The strptime date formats can be found here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
+[The format string specification can be found here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
 
 ## Extracting date features from a date column
 
 You can extract data features such as the year or day from a date column using the `.dt` namespace on a date column:
 
-{{code_block('user-guide/transformations/time-series/parsing','extract',['year'])}}
+{{code_block('user-guide/transformations/time-series/parsing','extract',['dt.year'])}}
 
 ```python exec="on" result="text" session="user-guide/transformations/ts/parsing"
 --8<-- "python/user-guide/transformations/time-series/parsing.py:extract"
@@ -51,7 +51,7 @@ You can extract data features such as the year or day from a date column using t
 If you have mixed offsets (say, due to crossing daylight saving time),
 then you can use `utc=True` and then convert to your time zone:
 
-{{code_block('user-guide/transformations/time-series/parsing','mixed',['strptime','convert_time_zone'])}}
+{{code_block('user-guide/transformations/time-series/parsing','mixed',['str.to_datetime','dt.convert_time_zone'])}}
 
 ```python exec="on" result="text" session="user-guide/transformations/ts/parsing"
 --8<-- "python/user-guide/transformations/time-series/parsing.py:mixed"