diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 02386075157d..96a8823b1d84 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -4,6 +4,7 @@ on: pull_request: paths: - py-polars/** + - docs/src/python/** - crates/** - .github/workflows/test-python.yml push: @@ -11,6 +12,7 @@ on: - main paths: - crates/** + - docs/src/python/** - py-polars/** - .github/workflows/test-python.yml diff --git a/docs/_build/API_REFERENCE_LINKS.yml b/docs/_build/API_REFERENCE_LINKS.yml index 4e028d99a8b2..d68415e84f5f 100644 --- a/docs/_build/API_REFERENCE_LINKS.yml +++ b/docs/_build/API_REFERENCE_LINKS.yml @@ -18,13 +18,9 @@ python: max: https://pola-rs.github.io/polars/py-polars/html/reference/series/api/polars.Series.max.html value_counts: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.value_counts.html unnest: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.unnest.html - field: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.struct.field.html struct: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.struct.html - rename_fields: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.struct.rename_fields.html is_duplicated: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.is_duplicated.html - replace: https://pola-rs.github.io/polars/py-polars/html/reference/series/api/polars.Series.str.replace.html sample: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.sample.html - day: https://pola-rs.github.io/polars/py-polars/html/reference/series/api/polars.Series.dt.day.html head: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.head.html tail: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.tail.html describe: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.describe.html @@ -35,7 +31,7 @@ python: fold: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.fold.html concat_str: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.concat_str.html str.split: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.split.html - Expr.List: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/list.html + Expr.list: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/list.html element: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.element.html all: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.all.html exclude: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.exclude.html @@ -58,34 +54,6 @@ python: apply: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.apply.html over: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.over.html implode: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.implode.html - dt_to_string: - link: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.to_string.html - name: dt.to_string - selectors: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html - cs_numeric: - link: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.numeric - name: cs.numeric - cs_by_name: - link: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.by_name - name: cs.by_name - cs_first: - link: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.first - name: cs.first - cs_temporal: - link: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.temporal - name: cs.temporal - cs_contains: - link: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.contains - name: cs.contains - cs_matches: - link: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.matches - name: cs.matches - is_selector: - link: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.is_selector - name: is_selector - selector_column_names: - link: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.selector_column_names - name: selector_column_names DataFrame.explode: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.explode.html read_database_connectorx: name: read_database @@ -131,39 +99,54 @@ python: pivot: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.pivot.html melt: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.melt.html is_between: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.is_between.html - strftime: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.strftime.html - strptime: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.strptime.html - year: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.year.html - convert_time_zone: - name: convert_time_zone - link: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.convert_time_zone.html - feature_flags: ['timezone'] - replace_time_zone: - name: replace_time_zone - link: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.replace_time_zone.html - feature_flags: ['timezone'] + date_range: https://pola-rs.github.io/polars/py-polars/html/reference/api/polars.date_range.html upsample: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.upsample.html group_by_dynamic: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.group_by_dynamic.html - explode: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.explode.html cast: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.cast.html np.log: name: log link: https://numpy.org/doc/stable/reference/generated/numpy.log.html feature_flags: ['numpy'] - lengths: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.lengths.html - n_chars: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.n_chars.html - str.contains: - name: str.contains - link: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.contains.html - starts_with: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.starts_with.html - ends_with: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.ends_with.html - extract: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.extract.html - extract_all: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.extract_all.html - replace: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.replace.html - replace_all: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.replace_all.html Array: https://pola-rs.github.io/polars/py-polars/html/reference/api/polars.Array.html - arr: https://pola-rs.github.io/polars/py-polars/html/reference/series/array.html + Series.arr: https://pola-rs.github.io/polars/py-polars/html/reference/series/array.html + Series.dt.day: https://pola-rs.github.io/polars/py-polars/html/reference/series/api/polars.Series.dt.day.html + + selectors: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html + cs.numeric: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.numeric + cs.by_name: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.by_name + cs.first: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.first + cs.temporal: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.temporal + cs.contains: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.contains + cs.matches: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.matches + is_selector: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.is_selector + selector_column_names: https://pola-rs.github.io/polars/py-polars/html/reference/selectors.html#polars.selectors.selector_column_names + + dt.convert_time_zone: + name: dt.convert_time_zone + link: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.convert_time_zone.html + feature_flags: ['timezone'] + dt.replace_time_zone: + name: dt.replace_time_zone + link: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.replace_time_zone.html + feature_flags: ['timezone'] + dt.to_string: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.to_string.html + dt.year: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.dt.year.html + + str.starts_with: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.starts_with.html + str.ends_with: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.ends_with.html + str.extract: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.extract.html + str.extract_all: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.extract_all.html + str.contains: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.contains.html + str.replace: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.replace.html + str.replace_all: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.replace_all.html + str.to_datetime: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.to_datetime.html + str.to_date: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.to_date.html + str.n_chars: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.n_chars.html + str.lengths: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.str.lengths.html + + struct.field: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.struct.field.html + struct.rename_fields: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.struct.rename_fields.html rust: DataFrame: https://pola-rs.github.io/polars/docs/rust/dev/polars/frame/struct.DataFrame.html @@ -176,9 +159,26 @@ rust: filter: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/frame/struct.LazyFrame.html#method.filter with_columns: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/frame/struct.LazyFrame.html#method.with_columns group_by: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/frame/struct.LazyFrame.html#method.group_by + group_by_dynamic: + name: group_by_dynamic + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/frame/struct.LazyFrame.html#method.group_by_dynamic + feature_flags: [dynamic_group_by] join: https://pola-rs.github.io/polars/docs/rust/dev/polars_core/frame/hash_join/index.html hstack: https://pola-rs.github.io/polars/docs/rust/dev/polars_core/frame/struct.DataFrame.html#method.hstack + concat: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/functions/fn.concat.html SQLContext: https://pola-rs.github.io/polars/py-polars/html/reference/sql.html + + operators: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Operator.html + + Array: https://pola-rs.github.io/polars/docs/rust/dev/polars/datatypes/enum.DataType.html#variant.Array + + DataFrame.explode: https://pola-rs.github.io/polars/docs/rust/dev/polars/frame/struct.DataFrame.html#method.explode + pivot: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/frame/pivot/fn.pivot.html + melt: https://pola-rs.github.io/polars/docs/rust/dev/polars/frame/struct.DataFrame.html#method.melt + upsample: https://pola-rs.github.io/polars/docs/rust/dev/polars/frame/struct.DataFrame.html#method.upsample + join_asof: https://pola-rs.github.io/polars/docs/rust/dev/polars/prelude/trait.AsofJoin.html#method.join_asof + unnest: https://pola-rs.github.io/polars/docs/rust/dev/polars/frame/struct.DataFrame.html#method.unnest + read_csv: name: CsvReader link: https://pola-rs.github.io/polars/docs/rust/dev/polars_io/csv/struct.CsvReader.html @@ -243,7 +243,12 @@ rust: name: collect link: https://pola-rs.github.io/polars/docs/rust/dev/polars/prelude/struct.LazyFrame.html#method.collect feature_flags: ['streaming'] + col: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/fn.col.html + element: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/fn.col.html + all: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/functions/fn.all.html + when: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/fn.when.html + sort: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.sort arr.eval: name: arr @@ -262,3 +267,101 @@ rust: map: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.map apply: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.apply over: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.over + + alias: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.alias + approx_n_unique: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.approx_n_unique + cast: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.cast + exclude: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.exclude + fill_nan: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.fill_nan + fill_null: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.fill_null + n_unique: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.n_unique + null_count: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.null_count + interpolate: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.interpolate + is_between: https://github.com/pola-rs/polars/issues/11285 + is_duplicated: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.is_duplicated + is_null: https://pola-rs.github.io/polars/docs/rust/dev/polars/prelude/enum.Expr.html#method.is_null + value_counts: + name: value_counts + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.value_counts + feature_flags: [dtype-struct] + + Expr.list: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/struct.ListNameSpace.html + Series.arr: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/struct.ArrayNameSpace.html + + date_range: + name: date_range + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/functions/fn.date_range.html + feature_flags: [range, dtype-date] + + selectors: https://github.com/pola-rs/polars/issues/10594 + cs.numeric: https://github.com/pola-rs/polars/issues/10594 + cs.by_name: https://github.com/pola-rs/polars/issues/10594 + cs.first: https://github.com/pola-rs/polars/issues/10594 + cs.temporal: https://github.com/pola-rs/polars/issues/10594 + cs.contains: https://github.com/pola-rs/polars/issues/10594 + cs.matches: https://github.com/pola-rs/polars/issues/10594 + is_selector: https://github.com/pola-rs/polars/issues/10594 + selector_column_names: https://github.com/pola-rs/polars/issues/10594 + + dt.convert_time_zone: + name: dt.convert_time_zone + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/dt/struct.DateLikeNameSpace.html#method.convert_time_zone + feature_flags: [timezones] + dt.replace_time_zone: + name: dt.replace_time_zone + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/dt/struct.DateLikeNameSpace.html#method.replace_time_zone + feature_flags: [timezones] + dt.to_string: + name: dt.to_string + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/dt/struct.DateLikeNameSpace.html#method.to_string + feature_flags: [temporal] + dt.year: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/dt/struct.DateLikeNameSpace.html#method.year + Series.dt.day: + name: dt.day + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/dt/struct.DateLikeNameSpace.html#method.day + feature_flags: [temporal] + + list.eval: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/trait.ListNameSpaceExtension.html#method.eval + + str.contains: + name: str.contains + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.contains + feature_flags: [regex] + str.extract: + name: str.extract + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.extract + str.extract_all: + name: str.extract_all + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.extract_all + str.replace: + name: str.replace + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.replace + feature_flags: [regex] + str.replace_all: + name: str.replace_all + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.replace_all + feature_flags: [regex] + str.starts_with: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.starts_with + str.ends_with: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.ends_with + str.split: + name: str.split + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.split + str.to_date: + name: str.replace_all + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.to_date + feature_flags: [dtype-date] + str.to_datetime: + name: str.replace_all + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.to_datetime + feature_flags: [dtype-datetime] + str.n_chars: + name: str.n_chars + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.n_chars + str.lengths: + name: str.lengths + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/string/struct.StringNameSpace.html#method.lengths + + struct.rename_fields: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/struct.StructNameSpace.html#method.rename_fields + struct.field: + name: struct.field_by_name + link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/struct.StructNameSpace.html#method.field_by_name diff --git a/docs/getting-started/series-dataframes.md b/docs/getting-started/series-dataframes.md index 07e05c194b93..d0a6e957fc2c 100644 --- a/docs/getting-started/series-dataframes.md +++ b/docs/getting-started/series-dataframes.md @@ -31,7 +31,7 @@ Although it is more common to work directly on a `DataFrame` object, `Series` im There are a number of methods related to string operations in the `StringNamespace`. These only work on `Series` with the Datatype `Utf8`. -{{code_block('getting-started/series-dataframes','string',['replace'])}} +{{code_block('getting-started/series-dataframes','string',['str.replace'])}} ```python exec="on" result="text" session="getting-started/series" --8<-- "python/getting-started/series-dataframes.py:string" @@ -41,7 +41,7 @@ There are a number of methods related to string operations in the `StringNamespa Similar to strings, there is a separate namespace for datetime related operations in the `DateLikeNameSpace`. These only work on `Series`with DataTypes related to dates. -{{code_block('getting-started/series-dataframes','dt',['day'])}} +{{code_block('getting-started/series-dataframes','dt',['Series.dt.day'])}} ```python exec="on" result="text" session="getting-started/series" --8<-- "python/getting-started/series-dataframes.py:dt" diff --git a/docs/src/python/getting-started/series-dataframes.py b/docs/src/python/getting-started/series-dataframes.py index 6f2fdf265c22..3171da06adbc 100644 --- a/docs/src/python/getting-started/series-dataframes.py +++ b/docs/src/python/getting-started/series-dataframes.py @@ -23,8 +23,7 @@ start = date(2001, 1, 1) stop = date(2001, 1, 9) s = pl.date_range(start, stop, interval="2d", eager=True) -s.dt.day() -print(s) +print(s.dt.day()) # --8<-- [end:dt] # --8<-- [start:dataframe] diff --git a/docs/src/python/user-guide/expressions/aggregation.py b/docs/src/python/user-guide/expressions/aggregation.py index 55a986164fbd..79120d79547f 100644 --- a/docs/src/python/user-guide/expressions/aggregation.py +++ b/docs/src/python/user-guide/expressions/aggregation.py @@ -16,7 +16,7 @@ } dataset = pl.read_csv(url, dtypes=dtypes).with_columns( - pl.col("birthday").str.strptime(pl.Date, strict=False) + pl.col("birthday").str.to_date(strict=False) ) # --8<-- [end:dataframe] diff --git a/docs/src/python/user-guide/expressions/casting.py b/docs/src/python/user-guide/expressions/casting.py index 7a57ac13656f..5f248937743e 100644 --- a/docs/src/python/user-guide/expressions/casting.py +++ b/docs/src/python/user-guide/expressions/casting.py @@ -122,8 +122,8 @@ ) out = df.select( - pl.col("date").dt.strftime("%Y-%m-%d"), - pl.col("string").str.strptime(pl.Datetime, "%Y-%m-%d"), + pl.col("date").dt.to_string("%Y-%m-%d"), + pl.col("string").str.to_datetime("%Y-%m-%d"), ) print(out) # --8<-- [end:dates2] diff --git a/docs/src/python/user-guide/transformations/time-series/filter.py b/docs/src/python/user-guide/transformations/time-series/filter.py index 6a2a28e44f8c..e720c9ae8ef5 100644 --- a/docs/src/python/user-guide/transformations/time-series/filter.py +++ b/docs/src/python/user-guide/transformations/time-series/filter.py @@ -21,7 +21,7 @@ # --8<-- [end:range] # --8<-- [start:negative] -ts = pl.Series(["-1300-05-23", "-1400-03-02"]).str.strptime(pl.Date) +ts = pl.Series(["-1300-05-23", "-1400-03-02"]).str.to_date() negative_dates_df = pl.DataFrame({"ts": ts, "values": [3, 4]}) diff --git a/docs/src/python/user-guide/transformations/time-series/parsing.py b/docs/src/python/user-guide/transformations/time-series/parsing.py index 0e49df5495a0..0a7a05842cd1 100644 --- a/docs/src/python/user-guide/transformations/time-series/parsing.py +++ b/docs/src/python/user-guide/transformations/time-series/parsing.py @@ -12,7 +12,7 @@ # --8<-- [start:cast] df = pl.read_csv("docs/data/apple_stock.csv", try_parse_dates=False) -df = df.with_columns(pl.col("Date").str.strptime(pl.Date, format="%Y-%m-%d")) +df = df.with_columns(pl.col("Date").str.to_date("%Y-%m-%d")) print(df) # --8<-- [end:cast] @@ -36,7 +36,7 @@ ] mixed_parsed = ( pl.Series(data) - .str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S%z") + .str.to_datetime("%Y-%m-%dT%H:%M:%S%z") .dt.convert_time_zone("Europe/Brussels") ) print(mixed_parsed) diff --git a/docs/src/python/user-guide/transformations/time-series/timezones.py b/docs/src/python/user-guide/transformations/time-series/timezones.py index 13234a9d8e30..0f5470b08e30 100644 --- a/docs/src/python/user-guide/transformations/time-series/timezones.py +++ b/docs/src/python/user-guide/transformations/time-series/timezones.py @@ -5,7 +5,7 @@ # --8<-- [start:example] ts = ["2021-03-27 03:00", "2021-03-28 03:00"] -tz_naive = pl.Series("tz_naive", ts).str.strptime(pl.Datetime) +tz_naive = pl.Series("tz_naive", ts).str.to_datetime() tz_aware = tz_naive.dt.replace_time_zone("UTC").rename("tz_aware") time_zones_df = pl.DataFrame([tz_naive, tz_aware]) print(time_zones_df) diff --git a/docs/src/rust/getting-started/series-dataframes.rs b/docs/src/rust/getting-started/series-dataframes.rs index 09b45d705bac..f156784e2bbc 100644 --- a/docs/src/rust/getting-started/series-dataframes.rs +++ b/docs/src/rust/getting-started/series-dataframes.rs @@ -18,7 +18,7 @@ fn main() -> Result<(), Box> { // --8<-- [end:string] // --8<-- [start:dt] - // This operation is not directly available on the Series object yet, only on the DataFrame + // This operation is not directly available on the Series object yet, only as an Expression // --8<-- [end:dt] // --8<-- [start:dataframe] diff --git a/docs/src/rust/user-guide/expressions/casting.rs b/docs/src/rust/user-guide/expressions/casting.rs index eae6bd520c65..2c4938897b8a 100644 --- a/docs/src/rust/user-guide/expressions/casting.rs +++ b/docs/src/rust/user-guide/expressions/casting.rs @@ -187,9 +187,10 @@ fn main() -> Result<(), Box> { .clone() .lazy() .select([ - col("date").dt().strftime("%Y-%m-%d"), - col("string").str().strptime( - DataType::Datetime(TimeUnit::Microseconds, None), + col("date").dt().to_string("%Y-%m-%d"), + col("string").str().to_datetime( + TimeUnit::Microseconds, + None, StrptimeOptions::default(), lit("raise"), ), diff --git a/docs/src/rust/user-guide/transformations/time-series/filter.rs b/docs/src/rust/user-guide/transformations/time-series/filter.rs index 95cca184a068..da00effb30d0 100644 --- a/docs/src/rust/user-guide/transformations/time-series/filter.rs +++ b/docs/src/rust/user-guide/transformations/time-series/filter.rs @@ -45,7 +45,7 @@ fn main() -> Result<(), Box> { .with_column( col("ts") .str() - .strptime(DataType::Date, StrptimeOptions::default(), lit("raise")), + .to_date(StrptimeOptions::default(), lit("raise")), ) .collect()?; diff --git a/docs/src/rust/user-guide/transformations/time-series/parsing.rs b/docs/src/rust/user-guide/transformations/time-series/parsing.rs index 1ab4bddf8ba5..0f22761d371c 100644 --- a/docs/src/rust/user-guide/transformations/time-series/parsing.rs +++ b/docs/src/rust/user-guide/transformations/time-series/parsing.rs @@ -25,7 +25,7 @@ fn main() -> Result<(), Box> { .lazy() .with_columns([col("Date") .str() - .strptime(DataType::Date, StrptimeOptions::default(), lit("raise"))]) + .to_date(StrptimeOptions::default(), lit("raise"))]) .collect()?; println!("{}", &df); // --8<-- [end:cast] @@ -57,8 +57,9 @@ fn main() -> Result<(), Box> { ]; let q = col("date") .str() - .strptime( - DataType::Datetime(TimeUnit::Microseconds, None), + .to_datetime( + TimeUnit::Microseconds, + None, StrptimeOptions { format: Some("%Y-%m-%dT%H:%M:%S%z".to_string()), ..Default::default() diff --git a/docs/src/rust/user-guide/transformations/time-series/timezones.rs b/docs/src/rust/user-guide/transformations/time-series/timezones.rs index 6bc846f199be..09865a428586 100644 --- a/docs/src/rust/user-guide/transformations/time-series/timezones.rs +++ b/docs/src/rust/user-guide/transformations/time-series/timezones.rs @@ -8,8 +8,9 @@ fn main() -> Result<(), Box> { let tz_naive = Series::new("tz_naive", &ts); let time_zones_df = DataFrame::new(vec![tz_naive])? .lazy() - .select([col("tz_naive").str().strptime( - DataType::Datetime(TimeUnit::Milliseconds, None), + .select([col("tz_naive").str().to_datetime( + TimeUnit::Milliseconds, + None, StrptimeOptions::default(), lit("raise"), )]) diff --git a/docs/user-guide/expressions/casting.md b/docs/user-guide/expressions/casting.md index cb06699fa2ed..88b9d3fcbbd6 100644 --- a/docs/user-guide/expressions/casting.md +++ b/docs/user-guide/expressions/casting.md @@ -91,9 +91,9 @@ Temporal data types such as `Date` or `Datetime` are represented as the number o --8<-- "python/user-guide/expressions/casting.py:dates" ``` -To perform casting operations between strings and `Dates`/`Datetimes`, `strftime` and `strptime` are utilized. Polars adopts the [chrono format syntax](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) for when formatting. It's worth noting that `strptime` features additional options that support timezone functionality. Refer to the API documentation for further information. +To convert between strings and `Dates`/`Datetimes`, `dt.to_string` and `str.to_datetime` are utilized. Polars adopts the [chrono format syntax](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) for formatting. It's worth noting that `str.to_datetime` features additional options that support timezone functionality. Refer to the API documentation for further information. -{{code_block('user-guide/expressions/casting','dates2',['strftime','strptime'])}} +{{code_block('user-guide/expressions/casting','dates2',['dt.to_string','str.to_date'])}} ```python exec="on" result="text" session="user-guide/cast" --8<-- "python/user-guide/expressions/casting.py:dates2" diff --git a/docs/user-guide/expressions/column-selections.md b/docs/user-guide/expressions/column-selections.md index 0f6b1a82f018..9c9579411ba4 100644 --- a/docs/user-guide/expressions/column-selections.md +++ b/docs/user-guide/expressions/column-selections.md @@ -37,7 +37,7 @@ Often, we don't just want to include all columns, but include all _while_ exclud Specifying multiple strings allows expressions to _expand_ to all matching columns: -{{code_block('user-guide/expressions/column-selections','expansion_by_names',['dt_to_string'])}} +{{code_block('user-guide/expressions/column-selections','expansion_by_names',['dt.to_string'])}} ```python exec="on" result="text" session="user-guide/column-selections" --8<-- "python/user-guide/expressions/column-selections.py:expansion_by_names" @@ -47,7 +47,7 @@ Specifying multiple strings allows expressions to _expand_ to all matching colum Multiple column selection is possible by regular expressions also, by making sure to wrap the regex by `^` and `$` to let `pl.col` know that a regex selection is expected: -{{code_block('user-guide/expressions/column-selections','expansion_by_regex',[''])}} +{{code_block('user-guide/expressions/column-selections','expansion_by_regex',[])}} ```python exec="on" result="text" session="user-guide/column-selections" --8<-- "python/user-guide/expressions/column-selections.py:expansion_by_regex" @@ -81,7 +81,7 @@ To select just the integer and string columns, we can do: These _selectors_ also allow for set based selection operations. For instance, to select the **numeric** columns **except** the **first** column that indicates row numbers: -{{code_block('user-guide/expressions/column-selections','selectors_diff',['cs_first', 'cs_numeric'])}} +{{code_block('user-guide/expressions/column-selections','selectors_diff',['cs.first', 'cs.numeric'])}} ```python exec="on" result="text" session="user-guide/column-selections" --8<-- "python/user-guide/expressions/column-selections.py:selectors_diff" @@ -89,7 +89,7 @@ These _selectors_ also allow for set based selection operations. For instance, t We can also select the row number by name **and** any **non**-numeric columns: -{{code_block('user-guide/expressions/column-selections','selectors_union',['cs_by_name', 'cs_numeric'])}} +{{code_block('user-guide/expressions/column-selections','selectors_union',['cs.by_name', 'cs.numeric'])}} ```python exec="on" result="text" session="user-guide/column-selections" --8<-- "python/user-guide/expressions/column-selections.py:selectors_union" @@ -99,7 +99,7 @@ We can also select the row number by name **and** any **non**-numeric columns: _Selectors_ can also be matched by substring and regex patterns: -{{code_block('user-guide/expressions/column-selections','selectors_by_name',['cs_contains', 'cs_matches'])}} +{{code_block('user-guide/expressions/column-selections','selectors_by_name',['cs.contains', 'cs.matches'])}} ```python exec="on" result="text" session="user-guide/column-selections" --8<-- "python/user-guide/expressions/column-selections.py:selectors_by_name" @@ -109,7 +109,7 @@ _Selectors_ can also be matched by substring and regex patterns: What if we want to apply a specific operation on the selected columns (i.e. get back to representing them as **expressions** to operate upon)? We can simply convert them using `as_expr` and then proceed as normal: -{{code_block('user-guide/expressions/column-selections','selectors_to_expr',['cs_temporal'])}} +{{code_block('user-guide/expressions/column-selections','selectors_to_expr',['cs.temporal'])}} ```python exec="on" result="text" session="user-guide/column-selections" --8<-- "python/user-guide/expressions/column-selections.py:selectors_to_expr" diff --git a/docs/user-guide/expressions/lists.md b/docs/user-guide/expressions/lists.md index b7c508f11b90..467c663aafd5 100644 --- a/docs/user-guide/expressions/lists.md +++ b/docs/user-guide/expressions/lists.md @@ -39,7 +39,7 @@ However, in Polars, we often do not need to do this to operate on the `List` ele Polars provides several standard operations on `List` columns. If we want the first three measurements, we can do a `head(3)`. The last three can be obtained via a `tail(3)`, or alternately, via `slice` (negative indexing is supported). We can also identify the number of observations via `lengths`. Let's see them in action: -{{code_block('user-guide/expressions/lists','list_ops',['Expr.List'])}} +{{code_block('user-guide/expressions/lists','list_ops',['Expr.list'])}} ```python exec="on" result="text" session="user-guide/lists" --8<-- "python/user-guide/expressions/lists.py:list_ops" @@ -60,7 +60,7 @@ If we need to identify the stations that are giving the most number of errors fr The third step requires a casting (or alternately, a regex pattern search) operation to be perform on each element of the list. We can do this using by applying the operation on each element by first referencing them in the `pl.element()` context, and then calling a suitable Polars expression on them. Let's see how: -{{code_block('user-guide/expressions/lists','count_errors',['Expr.List', 'element'])}} +{{code_block('user-guide/expressions/lists','count_errors',['Expr.list', 'element'])}} ```python exec="on" result="text" session="user-guide/lists" --8<-- "python/user-guide/expressions/lists.py:count_errors" @@ -110,7 +110,7 @@ We can define `Array` columns in this manner: Basic operations are available on it: -{{code_block('user-guide/expressions/lists','array_ops',['arr'])}} +{{code_block('user-guide/expressions/lists','array_ops',['Series.arr'])}} ```python exec="on" result="text" session="user-guide/lists" --8<-- "python/user-guide/expressions/lists.py:array_ops" diff --git a/docs/user-guide/expressions/strings.md b/docs/user-guide/expressions/strings.md index ccb06de30f20..9f00f7b1268e 100644 --- a/docs/user-guide/expressions/strings.md +++ b/docs/user-guide/expressions/strings.md @@ -8,7 +8,7 @@ String processing functions are available in the `str` namespace. The `str` namespace can be accessed through the `.str` attribute of a column with `Utf8` data type. In the following example, we create a column named `animal` and compute the length of each element in the column in terms of the number of bytes and the number of characters. If you are working with ASCII text, then the results of these two computations will be the same, and using `lengths` is recommended since it is faster. -{{code_block('user-guide/expressions/strings','df',['lengths','n_chars'])}} +{{code_block('user-guide/expressions/strings','df',['str.lengths','str.n_chars'])}} ```python exec="on" result="text" session="user-guide/strings" --8<-- "python/user-guide/expressions/strings.py:setup" @@ -23,7 +23,7 @@ The `str` namespace can be accessed through the `.str` attribute of a column wit To check for the presence of a pattern within a string, we can use the contains method. The `contains` method accepts either a regular substring or a regex pattern, depending on the value of the `literal` parameter. If the pattern we're searching for is a simple substring located either at the beginning or end of the string, we can alternatively use the `starts_with` and `ends_with` functions. -{{code_block('user-guide/expressions/strings','existence',['str.contains', 'starts_with','ends_with'])}} +{{code_block('user-guide/expressions/strings','existence',['str.contains', 'str.starts_with','str.ends_with'])}} ```python exec="on" result="text" session="user-guide/strings" --8<-- "python/user-guide/expressions/strings.py:existence" @@ -33,7 +33,7 @@ To check for the presence of a pattern within a string, we can use the contains The `extract` method allows us to extract a pattern from a specified string. This method takes a regex pattern containing one or more capture groups, which are defined by parentheses `()` in the pattern. The group index indicates which capture group to output. -{{code_block('user-guide/expressions/strings','extract',['extract'])}} +{{code_block('user-guide/expressions/strings','extract',['str.extract'])}} ```python exec="on" result="text" session="user-guide/strings" --8<-- "python/user-guide/expressions/strings.py:extract" @@ -41,7 +41,7 @@ The `extract` method allows us to extract a pattern from a specified string. Thi To extract all occurrences of a pattern within a string, we can use the `extract_all` method. In the example below, we extract all numbers from a string using the regex pattern `(\d+)`, which matches one or more digits. The resulting output of the `extract_all` method is a list containing all instances of the matched pattern within the string. -{{code_block('user-guide/expressions/strings','extract_all',['extract_all'])}} +{{code_block('user-guide/expressions/strings','extract_all',['str.extract_all'])}} ```python exec="on" result="text" session="user-guide/strings" --8<-- "python/user-guide/expressions/strings.py:extract_all" @@ -51,7 +51,7 @@ To extract all occurrences of a pattern within a string, we can use the `extract We have discussed two methods for pattern matching and extraction thus far, and now we will explore how to replace a pattern within a string. Similar to `extract` and `extract_all`, Polars provides the `replace` and `replace_all` methods for this purpose. In the example below we replace one match of `abc` at the end of a word (`\b`) by `ABC` and we replace all occurrence of `a` with `-`. -{{code_block('user-guide/expressions/strings','replace',['replace','replace_all'])}} +{{code_block('user-guide/expressions/strings','replace',['str.replace','str.replace_all'])}} ```python exec="on" result="text" session="user-guide/strings" --8<-- "python/user-guide/expressions/strings.py:replace" diff --git a/docs/user-guide/expressions/structs.md b/docs/user-guide/expressions/structs.md index 9973e61d4c68..ee0012fe4db4 100644 --- a/docs/user-guide/expressions/structs.md +++ b/docs/user-guide/expressions/structs.md @@ -52,7 +52,7 @@ Polars will interpret a `dict` sent to the `Series` constructor as a `Struct`: Let's say that we needed to obtain just the `movie` value in the `Series` that we created above. We can use the `field` method to do so: -{{code_block('user-guide/expressions/structs','series_struct_extract',['field'])}} +{{code_block('user-guide/expressions/structs','series_struct_extract',['struct.field'])}} ```python exec="on" result="text" session="user-guide/structs" --8<-- "python/user-guide/expressions/structs.py:series_struct_extract" @@ -62,7 +62,7 @@ Let's say that we needed to obtain just the `movie` value in the `Series` that w What if we need to rename individual `field`s of a `Struct` column? We first convert the `rating_Series` object to a `DataFrame` so that we can view the changes easily, and then use the `rename_fields` method: -{{code_block('user-guide/expressions/structs','series_struct_rename',['rename_fields'])}} +{{code_block('user-guide/expressions/structs','series_struct_rename',['struct.rename_fields'])}} ```python exec="on" result="text" session="user-guide/structs" --8<-- "python/user-guide/expressions/structs.py:series_struct_rename" diff --git a/docs/user-guide/io/aws.md b/docs/user-guide/io/aws.md index e19efc74b580..27c9cfeaf453 100644 --- a/docs/user-guide/io/aws.md +++ b/docs/user-guide/io/aws.md @@ -17,4 +17,4 @@ located on an AWS bucket. Load a `.parquet` file using: -{{code_block('user-guide/io/aws','bucket',['from_arrow'])}} +{{code_block('user-guide/io/aws','bucket',[])}} diff --git a/docs/user-guide/transformations/time-series/filter.md b/docs/user-guide/transformations/time-series/filter.md index 326969c34e11..1f57d8866fbd 100644 --- a/docs/user-guide/transformations/time-series/filter.md +++ b/docs/user-guide/transformations/time-series/filter.md @@ -41,7 +41,7 @@ Say you are working with an archeologist and are dealing in negative dates. Polars can parse and store them just fine, but the Python `datetime` library does not. So for filtering, you should use attributes in the `.dt` namespace: -{{code_block('user-guide/transformations/time-series/filter','negative',['strptime'])}} +{{code_block('user-guide/transformations/time-series/filter','negative',['str.to_date'])}} ```python exec="on" result="text" session="user-guide/transformations/ts/filter" --8<-- "python/user-guide/transformations/time-series/filter.py:negative" diff --git a/docs/user-guide/transformations/time-series/parsing.md b/docs/user-guide/transformations/time-series/parsing.md index a31095d07434..62bdb0a44b8f 100644 --- a/docs/user-guide/transformations/time-series/parsing.md +++ b/docs/user-guide/transformations/time-series/parsing.md @@ -26,21 +26,21 @@ On the other hand binary formats such as parquet have a schema that is respected ## Casting strings to dates -You can also cast a column of datetimes encoded as strings to a datetime type. You do this by calling the string `str.strptime` method and passing the format of the date string: +You can also cast a column of datetimes encoded as strings to a datetime type. You do this by calling the string `str.to_date` method and passing the format of the date string: -{{code_block('user-guide/transformations/time-series/parsing','cast',['read_csv','strptime'])}} +{{code_block('user-guide/transformations/time-series/parsing','cast',['read_csv','str.to_date'])}} ```python exec="on" result="text" session="user-guide/transformations/ts/parsing" --8<-- "python/user-guide/transformations/time-series/parsing.py:cast" ``` -[The strptime date formats can be found here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). +[The format string specification can be found here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). ## Extracting date features from a date column You can extract data features such as the year or day from a date column using the `.dt` namespace on a date column: -{{code_block('user-guide/transformations/time-series/parsing','extract',['year'])}} +{{code_block('user-guide/transformations/time-series/parsing','extract',['dt.year'])}} ```python exec="on" result="text" session="user-guide/transformations/ts/parsing" --8<-- "python/user-guide/transformations/time-series/parsing.py:extract" @@ -51,7 +51,7 @@ You can extract data features such as the year or day from a date column using t If you have mixed offsets (say, due to crossing daylight saving time), then you can use `utc=True` and then convert to your time zone: -{{code_block('user-guide/transformations/time-series/parsing','mixed',['strptime','convert_time_zone'])}} +{{code_block('user-guide/transformations/time-series/parsing','mixed',['str.to_datetime','dt.convert_time_zone'])}} ```python exec="on" result="text" session="user-guide/transformations/ts/parsing" --8<-- "python/user-guide/transformations/time-series/parsing.py:mixed" diff --git a/docs/user-guide/transformations/time-series/rolling.md b/docs/user-guide/transformations/time-series/rolling.md index a88373caada2..f3e009f99b76 100644 --- a/docs/user-guide/transformations/time-series/rolling.md +++ b/docs/user-guide/transformations/time-series/rolling.md @@ -103,7 +103,7 @@ Below we show an example where we use **group_by_dynamic** to compute: - the number of days until the end of the month - the number of days in a month -{{code_block('user-guide/transformations/time-series/rolling','group_by_dyn',['group_by_dynamic','explode','date_range'])}} +{{code_block('user-guide/transformations/time-series/rolling','group_by_dyn',['group_by_dynamic','DataFrame.explode','date_range'])}} ```python exec="on" result="text" session="user-guide/transformations/ts/rolling" --8<-- "python/user-guide/transformations/time-series/rolling.py:group_by_dyn" diff --git a/docs/user-guide/transformations/time-series/timezones.md b/docs/user-guide/transformations/time-series/timezones.md index 48f6870e8b20..a12b97c68dd9 100644 --- a/docs/user-guide/transformations/time-series/timezones.md +++ b/docs/user-guide/transformations/time-series/timezones.md @@ -32,14 +32,14 @@ The main methods for setting and converting between time zones are: Let's look at some examples of common operations: -{{code_block('user-guide/transformations/time-series/timezones','example',['strptime','replace_time_zone'])}} +{{code_block('user-guide/transformations/time-series/timezones','example',['str.to_datetime','dt.replace_time_zone'])}} ```python exec="on" result="text" session="user-guide/transformations/ts/timezones" --8<-- "python/user-guide/transformations/time-series/timezones.py:setup" --8<-- "python/user-guide/transformations/time-series/timezones.py:example" ``` -{{code_block('user-guide/transformations/time-series/timezones','example2',['convert_time_zone','replace_time_zone'])}} +{{code_block('user-guide/transformations/time-series/timezones','example2',['dt.convert_time_zone','dt.replace_time_zone'])}} ```python exec="on" result="text" session="user-guide/transformations/ts/timezones" --8<-- "python/user-guide/transformations/time-series/timezones.py:example2" diff --git a/mkdocs.yml b/mkdocs.yml index 65e961b13225..425033e2eb19 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -132,8 +132,7 @@ extra: property: G-LKNVFWD3T5 # Preview controls -# TODO: Fix warnings and turn on strict mode -strict: false +strict: true # Formatting options markdown_extensions: