From 862c7dd00cdd688cdc359d986fa55e07d20ce39c Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Fri, 19 Apr 2024 18:32:56 +1000
Subject: [PATCH 01/46] add make_clean_names function that can be applied to
 polars

---
 environment-dev.yml                        |   1 +
 janitor/functions/__init__.py              |   2 +
 janitor/functions/clean_names.py           | 125 ++--------
 janitor/functions/polars/__init__.py       |   0
 janitor/functions/utils.py                 | 273 +++++++++++++++++++++
 tests/functions/polars/test_clean_names.py | 123 ++++++++++
 6 files changed, 414 insertions(+), 110 deletions(-)
 create mode 100644 janitor/functions/polars/__init__.py
 create mode 100644 tests/functions/polars/test_clean_names.py

diff --git a/environment-dev.yml b/environment-dev.yml
index 1f8e48ece..322deec86 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -34,6 +34,7 @@ dependencies:
   - pipreqs
   - pip-tools
   - pre-commit
+  - pypolars
   - pyspark>=3.2.0
   - pytest
   - pytest-cov
diff --git a/janitor/functions/__init__.py b/janitor/functions/__init__.py
index 35681b9d9..ef1a69458 100644
--- a/janitor/functions/__init__.py
+++ b/janitor/functions/__init__.py
@@ -81,6 +81,7 @@
     col,
     get_columns,
     get_index_labels,
+    make_clean_names,
     patterns,
     unionize_dataframe_categories,
 )
@@ -129,6 +130,7 @@
     "join_apply",
     "label_encode",
     "limit_column_characters",
+    "make_clean_names",
     "min_max_scale",
     "move",
     "pivot_longer",
diff --git a/janitor/functions/clean_names.py b/janitor/functions/clean_names.py
index 71735a7fc..db439d30f 100644
--- a/janitor/functions/clean_names.py
+++ b/janitor/functions/clean_names.py
@@ -1,14 +1,15 @@
-"""Functions for cleaning columns names."""
+"""Functions for cleaning columns/index names and/or column values."""
 
-import unicodedata
-from typing import Hashable, Optional, Union
+from typing import Optional, Union
 
 import pandas as pd
 import pandas_flavor as pf
 from pandas.api.types import is_scalar
 
-from janitor.errors import JanitorError
-from janitor.functions.utils import _is_str_or_cat, get_index_labels
+from janitor.functions.utils import (
+    get_index_labels,
+    make_clean_names,
+)
 from janitor.utils import deprecated_alias
 
 
@@ -116,14 +117,15 @@ def clean_names(
             column_names = [column_names]
         df = df.copy()
         for column_name in column_names:
-            df[column_name] = _clean_names_single_object(
-                obj=df[column_name],
+            df[column_name] = make_clean_names(
+                col=df[column_name],
                 enforce_string=enforce_string,
                 case_type=case_type,
                 remove_special=remove_special,
                 strip_accents=strip_accents,
                 strip_underscores=strip_underscores,
                 truncate_limit=truncate_limit,
+                df_type="pandas",
             )
         return df
 
@@ -136,128 +138,31 @@ def clean_names(
             for number in range(target_axis.nlevels)
         ]
         target_axis = [
-            _clean_names_single_object(
-                obj=obj,
+            make_clean_names(
+                col=obj,
                 enforce_string=enforce_string,
                 case_type=case_type,
                 remove_special=remove_special,
                 strip_accents=strip_accents,
                 strip_underscores=strip_underscores,
                 truncate_limit=truncate_limit,
+                df_type="pandas",
             )
             for obj in target_axis
         ]
     else:
-        target_axis = _clean_names_single_object(
-            obj=target_axis,
+        target_axis = make_clean_names(
+            col=target_axis,
             enforce_string=enforce_string,
             case_type=case_type,
             remove_special=remove_special,
             strip_accents=strip_accents,
             strip_underscores=strip_underscores,
             truncate_limit=truncate_limit,
+            df_type="pandas",
         )
     # Store the original column names, if enabled by user
     if preserve_original_labels:
         df.__dict__["original_labels"] = getattr(df, axis)
     setattr(df, axis, target_axis)
     return df
-
-
-def _clean_names_single_object(
-    obj: Union[pd.Index, pd.Series],
-    enforce_string,
-    case_type,
-    remove_special,
-    strip_accents,
-    strip_underscores,
-    truncate_limit,
-):
-    """
-    Apply _clean_names on a single pandas object.
-    """
-    if enforce_string and not (_is_str_or_cat(obj)):
-        obj = obj.astype(str)
-    obj = _change_case(obj, case_type)
-    obj = _normalize_1(obj)
-    if remove_special:
-        obj = obj.map(_remove_special)
-    if strip_accents:
-        obj = obj.map(_strip_accents)
-    obj = obj.str.replace(pat="_+", repl="_", regex=True)
-    obj = _strip_underscores_func(obj, strip_underscores=strip_underscores)
-    if truncate_limit:
-        obj = obj.str[:truncate_limit]
-    return obj
-
-
-def _change_case(col: Union[pd.Index, pd.Series], case_type: str) -> str:
-    """Change case of labels in pandas object."""
-    case_types = {"preserve", "upper", "lower", "snake"}
-    case_type = case_type.lower()
-    if case_type not in case_types:
-        raise JanitorError(f"case_type must be one of: {case_types}")
-    if case_type == "preserve":
-        return col
-    if case_type == "upper":
-        return col.str.upper()
-    if case_type == "lower":
-        return col.str.lower()
-    # Implementation taken from: https://gist.github.com/jaytaylor/3660565
-    # by @jtaylor
-    return (
-        col.str.replace(pat=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", regex=True)
-        .str.replace(pat=r"([a-z0-9])([A-Z])", repl=r"\1_\2", regex=True)
-        .str.lower()
-    )
-
-
-def _remove_special(label: Hashable) -> str:
-    """Remove special characters from label."""
-    return "".join(
-        [item for item in str(label) if item.isalnum() or "_" in item]
-    )
-
-
-def _normalize_1(col: Union[pd.Index, pd.Series]) -> str:
-    """Perform normalization of labels in pandas object."""
-    FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
-    for search, replace in FIXES:
-        col = col.str.replace(pat=search, repl=replace, regex=True)
-    return col
-
-
-def _strip_accents(label: Hashable) -> str:
-    """Remove accents from a label.
-
-    Inspired from [StackOverflow][so].
-
-    [so]: https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-strin
-    """  # noqa: E501
-
-    return "".join(
-        [
-            letter
-            for letter in unicodedata.normalize("NFD", str(label))
-            if not unicodedata.combining(letter)
-        ]
-    )
-
-
-def _strip_underscores_func(
-    col: Union[pd.Index, pd.Series], strip_underscores: Union[str, bool] = None
-) -> pd.DataFrame:
-    """Strip underscores from a pandas object."""
-    underscore_options = {None, "left", "right", "both", "l", "r", True}
-    if strip_underscores not in underscore_options:
-        raise JanitorError(
-            f"strip_underscores must be one of: {underscore_options}"
-        )
-
-    if strip_underscores in ["left", "l"]:
-        return col.str.lstrip("_")
-    if strip_underscores in ["right", "r"]:
-        return col.str.rstrip("_")
-    if strip_underscores in {True, "both"}:
-        return col.str.strip("_")
-    return col
diff --git a/janitor/functions/polars/__init__.py b/janitor/functions/polars/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 8aa4d346b..01e192853 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -5,6 +5,7 @@
 import fnmatch
 import inspect
 import re
+import unicodedata
 import warnings
 from collections.abc import Callable as dispatch_callable
 from dataclasses import dataclass
@@ -24,6 +25,7 @@
 
 import numpy as np
 import pandas as pd
+import polars as pl
 from multipledispatch import dispatch
 from pandas.api.types import (
     is_bool_dtype,
@@ -36,6 +38,7 @@
 from pandas.core.common import is_bool_indexer
 from pandas.core.groupby.generic import DataFrameGroupBy, SeriesGroupBy
 
+from janitor.errors import JanitorError
 from janitor.utils import _expand_grid, check, check_column, find_stack_level
 
 warnings.simplefilter("always", DeprecationWarning)
@@ -1133,3 +1136,273 @@ def __eq__(self, other):
         """
         self.join_args = (self.cols, other.cols, "==")
         return self
+
+
+def _change_case(
+    col: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    case_type: str,
+    df_type: str,
+) -> str:
+    """Change case of labels in col."""
+    case_types = {"preserve", "upper", "lower", "snake"}
+    case_type = case_type.lower()
+    if case_type not in case_types:
+        raise JanitorError(f"df_type must be one of: {case_types}")
+
+    if df_type == "pandas":
+        if case_type == "preserve":
+            return col
+        if case_type == "upper":
+            return col.str.upper()
+        if case_type == "lower":
+            return col.str.lower()
+        # Implementation taken from: https://gist.github.com/jaytaylor/3660565
+        # by @jtaylor
+        return (
+            col.str.replace(pat=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", regex=True)
+            .str.replace(pat=r"([a-z0-9])([A-Z])", repl=r"\1_\2", regex=True)
+            .str.lower()
+        )
+    if df_type == "polars":
+        if case_type == "preserve":
+            return col
+        if case_type == "upper":
+            return col.str.to_uppercase()
+        if case_type == "lower":
+            return col.str.to_lowercase()
+        # Implementation taken from: https://gist.github.com/jaytaylor/3660565
+        # by @jtaylor
+        return (
+            col.str.replace_all(
+                pattern=r"(.)([A-Z][a-z]+)", value=r"${1}_${2}", literal=False
+            )
+            .str.replace_all(
+                pattern=r"([a-z0-9])([A-Z])", value=r"${1}_${2}", literal=False
+            )
+            .str.to_lowercase()
+        )
+    if df_type == "str":
+        if case_type == "preserve":
+            return col
+        if case_type == "upper":
+            return col.upper()
+        if case_type == "lower":
+            return col.lower()
+        # Implementation adapted from: https://gist.github.com/jaytaylor/3660565
+        # by @jtaylor
+        col = re.sub(pattern=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", string=col)
+        col = re.sub(pattern=r"([a-z0-9])([A-Z])", repl=r"\1_\2", string=col)
+        return col.lower()
+
+    if case_type == "preserve":
+        return col
+    if case_type == "upper":
+        return [label.upper() for label in col]
+    if case_type == "lower":
+        return [label.lower() for label in col]
+    # Implementation adapted from: https://gist.github.com/jaytaylor/3660565
+    # by @jtaylor
+    col = [
+        re.sub(pattern=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", string=label)
+        for label in col
+    ]
+    col = [
+        re.sub(pattern=r"([a-z0-9])([A-Z])", repl=r"\1_\2", string=label)
+        for label in col
+    ]
+    col = [label.lower() for label in col]
+    return col
+
+
+def _normalize_1(
+    col: Union[pd.Index, pd.Series, pl.Expr, list, str], df_type: str
+) -> str:
+    """Perform normalization of labels in col."""
+    FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
+    if df_type == "pandas":
+        for search, replace in FIXES:
+            col = col.str.replace(pat=search, repl=replace, regex=True)
+    elif df_type == "polars":
+        for search, replace in FIXES:
+            col = col.str.replace_all(
+                pattern=search, value=replace, literal=False
+            )
+    elif df_type == "str":
+        for search, replace in FIXES:
+            col = re.sub(pattern=search, repl=replace, string=col)
+    else:
+        for search, replace in FIXES:
+            col = [
+                re.sub(pattern=search, repl=replace, string=label)
+                for label in col
+            ]
+    return col
+
+
+def _remove_special(
+    df_type: str,
+    col: Union[pd.Index, pd.Series, pl.Expr, list, str] = None,
+) -> str:
+    """Remove special characters from col."""
+    if df_type == "pandas":
+        return col.str.replace(
+            pat="[^A-Za-z_\\d]", repl="", regex=True
+        ).str.strip()
+    if df_type == "polars":
+        return col.str.replace_all(
+            pattern="[^A-Za-z_\\d]", value="", literal=False
+        ).str.strip_chars()
+    elif df_type == "str":
+        col = [item for item in col if item.isalnum() or (item == "_")]
+        return "".join(col)
+    out = []
+    for label in col:
+        word = [item for item in label if item.isalnum() or (item == "_")]
+        word = "".join(word)
+        out.append(word)
+    return out
+
+
+def _strip_accents(
+    col: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    df_type: str,
+) -> str:
+    """Remove accents from a label.
+
+    Inspired from [StackOverflow][so].
+
+    [so]: https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-strin
+    """  # noqa: E501
+    if df_type == "pandas":
+        return col.map(
+            lambda f: "".join(
+                [
+                    letter
+                    for letter in unicodedata.normalize("NFD", str(f))
+                    if not unicodedata.combining(letter)
+                ]
+            )
+        )
+    if df_type == "polars":
+        return col.map_elements(
+            lambda word: [
+                letter
+                for letter in unicodedata.normalize("NFD", word)
+                if not unicodedata.combining(letter)
+            ],
+            return_dtype=pl.List(pl.Utf8),
+        ).list.join("")
+    if df_type == "str":
+        col = [
+            letter
+            for letter in unicodedata.normalize("NFD", col)
+            if not unicodedata.combining(letter)
+        ]
+        return "".join(col)
+    out = []
+    for label in col:
+        word = [
+            letter
+            for letter in unicodedata.normalize("NFD", label)
+            if not unicodedata.combining(letter)
+        ]
+        word = "".join(word)
+        out.append(word)
+    return out
+
+
+def _strip_underscores_func(
+    col: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    df_type: str,
+    strip_underscores: Union[str, bool] = None,
+) -> pd.DataFrame:
+    """Strip underscores."""
+    underscore_options = {None, "left", "right", "both", "l", "r", True}
+    if strip_underscores not in underscore_options:
+        raise JanitorError(
+            f"strip_underscores must be one of: {underscore_options}"
+        )
+    if df_type == "pandas":
+        if strip_underscores in {"left", "l"}:
+            return col.str.lstrip("_")
+        if strip_underscores in {"right", "r"}:
+            return col.str.rstrip("_")
+        if strip_underscores in {True, "both"}:
+            return col.str.strip("_")
+        return col
+
+    if df_type == "polars":
+        if strip_underscores in {"left", "l"}:
+            return col.str.strip_chars_start("_")
+        if strip_underscores in {"right", "r"}:
+            return col.str.strip_chars_end("_")
+        if strip_underscores in {True, "both"}:
+            return col.str.strip_chars("_")
+        return col
+
+    if df_type == "str":
+        if strip_underscores in {"left", "l"}:
+            return col.lstrip("_")
+        if strip_underscores in {"right", "r"}:
+            return col.rstrip("_")
+        if strip_underscores in {True, "both"}:
+            return col.strip("_")
+        return col
+
+    if strip_underscores in {"left", "l"}:
+        return [label.lstrip("_") for label in col]
+    if strip_underscores in {"right", "r"}:
+        return [label.rstrip("_") for label in col]
+    if strip_underscores in {True, "both"}:
+        return [label.strip("_") for label in col]
+    return col
+
+
+def make_clean_names(
+    col: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    strip_underscores: Optional[Union[str, bool]] = None,
+    case_type: str = "lower",
+    remove_special: bool = False,
+    strip_accents: bool = False,
+    enforce_string: bool = False,
+    truncate_limit: int = None,
+    df_type: str = "pandas",
+) -> Union[pd.Index, pd.Series, pl.Expr, list]:
+    """
+    Generic function to clean an object.
+    """
+    if enforce_string and (df_type == "pandas"):
+        if not (_is_str_or_cat(col)):
+            col = col.astype(str)
+    elif enforce_string and (df_type == "python"):
+        col = [str(label) for label in col]
+    elif enforce_string and (df_type == "str"):
+        col = str(col)
+    elif enforce_string and (df_type == "polars"):
+        col = col.cast(pl.Utf8)
+    col = _change_case(col, case_type, df_type=df_type)
+    col = _normalize_1(col, df_type=df_type)
+    if remove_special:
+        col = _remove_special(df_type=df_type, col=col)
+    if strip_accents:
+        col = _strip_accents(col=col, df_type=df_type)
+    if df_type == "pandas":
+        col = col.str.replace(pat="_+", repl="_", regex=True)
+    elif df_type == "polars":
+        col = col.str.replace(pattern="_+", value="_", literal=False)
+    elif df_type == "str":
+        col = re.sub(pattern="_+", repl="_", string=col)
+    else:
+        col = [re.sub(pattern="_+", repl="_", string=label) for label in col]
+    col = _strip_underscores_func(
+        col, strip_underscores=strip_underscores, df_type=df_type
+    )
+    if truncate_limit and (df_type == "pandas"):
+        col = col.str[:truncate_limit]
+    elif truncate_limit and (df_type == "polars"):
+        col = col.str.slice(offset=0, length=truncate_limit)
+    elif truncate_limit and (df_type == "str"):
+        col = col[:truncate_limit]
+    elif truncate_limit:
+        col = [label[:truncate_limit] for label in col]
+    return col
diff --git a/tests/functions/polars/test_clean_names.py b/tests/functions/polars/test_clean_names.py
new file mode 100644
index 000000000..51d6f1ff4
--- /dev/null
+++ b/tests/functions/polars/test_clean_names.py
@@ -0,0 +1,123 @@
+import polars as pl
+import pytest
+
+from janitor import make_clean_names
+
+
+@pytest.mark.functions
+def test_clean_names_method_chain(dataframe):
+    """Tests clean_names default args in a method chain."""
+    df = pl.from_pandas(dataframe)
+    df = df.rename(lambda col: make_clean_names(col, df_type="str"))
+    expected_columns = [
+        "a",
+        "bell_chart",
+        "decorated_elephant",
+        "animals@#$%^",
+        "cities",
+    ]
+    assert df.columns == expected_columns
+
+
+@pytest.mark.functions
+def test_clean_names_special_characters(dataframe):
+    """Tests clean_names `remove_special` parameter."""
+    df = pl.from_pandas(dataframe)
+    df = df.rename(
+        lambda col: make_clean_names(col, df_type="str", remove_special=True)
+    )
+    expected_columns = [
+        "a",
+        "bell_chart",
+        "decorated_elephant",
+        "animals",
+        "cities",
+    ]
+    assert df.columns == expected_columns
+
+
+@pytest.mark.functions
+def test_clean_names_uppercase(dataframe):
+    """Tests clean_names `case_type` parameter = upper."""
+    df = pl.from_pandas(dataframe)
+    df = df.rename(
+        lambda col: make_clean_names(
+            col, df_type="str", remove_special=True, case_type="upper"
+        )
+    )
+    expected_columns = [
+        "A",
+        "BELL_CHART",
+        "DECORATED_ELEPHANT",
+        "ANIMALS",
+        "CITIES",
+    ]
+    assert df.columns == expected_columns
+
+
+@pytest.mark.functions
+def test_clean_names_strip_accents():
+    """Tests clean_names `strip_accents` parameter."""
+    df = pl.DataFrame({"João": [1, 2], "Лука́ся": [1, 2], "Käfer": [1, 2]})
+    df = df.rename(
+        lambda col: make_clean_names(col, df_type="str", strip_accents=True)
+    )
+    expected_columns = ["joao", "лукася", "kafer"]
+    assert df.columns == expected_columns
+
+
+@pytest.mark.functions
+def test_clean_names_camelcase_to_snake(dataframe):
+    """Tests clean_names `case_type` parameter = snake."""
+    df = pl.from_pandas(dataframe)
+    df = (
+        df.select("a")
+        .rename({"a": "AColumnName"})
+        .rename(
+            lambda col: make_clean_names(
+                col, df_type="str", remove_special=True, case_type="snake"
+            )
+        )
+    )
+    assert df.columns == ["a_column_name"]
+
+
+@pytest.mark.functions
+def test_clean_names_truncate_limit(dataframe):
+    """Tests clean_names `truncate_limit` parameter."""
+    df = pl.from_pandas(dataframe)
+    df = df.rename(
+        lambda col: make_clean_names(col, df_type="str", truncate_limit=7)
+    )
+    # df = dataframe.clean_names(truncate_limit=7)
+    expected_columns = ["a", "bell_ch", "decorat", "animals", "cities"]
+    assert df.columns == expected_columns
+
+
+@pytest.mark.functions
+def test_charac():
+    """Ensure non standard characters and spaces have been cleaned up."""
+
+    df = pl.DataFrame(
+        {
+            r"Current accountbalance(in % of GDP)": range(5),
+        }
+    )
+    df = df.rename(
+        lambda col: make_clean_names(
+            col, df_type="str", strip_underscores=True, case_type="lower"
+        )
+    )
+
+    assert "current_accountbalance_in_%_of_gdp" in df.columns
+
+
+def test_clean_column_values():
+    """Clean column values"""
+    raw = pl.DataFrame({"raw": ["Abçdê fgí j"]})
+    outcome = raw.with_columns(
+        pl.col("raw").pipe(
+            make_clean_names, df_type="polars", strip_accents=True
+        )
+    )
+    assert list(outcome)[0][0] == "abcde_fgi_j"

From 01531cc208486c7b92a851988a06676b602c822a Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 19:45:26 +1000
Subject: [PATCH 02/46] add examples for make_clean_names

---
 examples/notebooks/bla.ipynb                  |  94 +++++
 janitor/functions/clean_names.py              |  29 +-
 janitor/functions/utils.py                    | 338 ++++++++++++------
 janitor/spark/functions.py                    |   2 +-
 ...an_names.py => test_clean_names_polars.py} |  28 +-
 5 files changed, 351 insertions(+), 140 deletions(-)
 create mode 100644 examples/notebooks/bla.ipynb
 rename tests/functions/{polars/test_clean_names.py => test_clean_names_polars.py} (78%)

diff --git a/examples/notebooks/bla.ipynb b/examples/notebooks/bla.ipynb
new file mode 100644
index 000000000..f47c4b335
--- /dev/null
+++ b/examples/notebooks/bla.ipynb
@@ -0,0 +1,94 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import polars as pl\n",
+    "from janitor import make_clean_names"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (3, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Aloha</th><th>Bell Chart</th><th>Animals@#$%^</th></tr><tr><td>i64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>0</td><td>0</td><td>0</td></tr><tr><td>1</td><td>1</td><td>1</td></tr><tr><td>2</td><td>2</td><td>2</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (3, 3)\n",
+       "┌───────┬────────────┬──────────────┐\n",
+       "│ Aloha ┆ Bell Chart ┆ Animals@#$%^ │\n",
+       "│ ---   ┆ ---        ┆ ---          │\n",
+       "│ i64   ┆ i64        ┆ i64          │\n",
+       "╞═══════╪════════════╪══════════════╡\n",
+       "│ 0     ┆ 0          ┆ 0            │\n",
+       "│ 1     ┆ 1          ┆ 1            │\n",
+       "│ 2     ┆ 2          ┆ 2            │\n",
+       "└───────┴────────────┴──────────────┘"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pl.DataFrame(\n",
+    "    {\n",
+    "        \"Aloha\": range(3),\n",
+    "        \"Bell Chart\": range(3),\n",
+    "        \"Animals@#$%^\": range(3)\n",
+    "    }\n",
+    ")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "raw.with_columns(\n",
+    "    pl.col(\"raw\").pipe(\n",
+    "        make_clean_names, object_type=\"polars\", strip_accents=True\n",
+    "    )\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pyjanitor-dev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/janitor/functions/clean_names.py b/janitor/functions/clean_names.py
index db439d30f..69af7f33e 100644
--- a/janitor/functions/clean_names.py
+++ b/janitor/functions/clean_names.py
@@ -78,8 +78,9 @@ def clean_names(
             Column selection is possible using the
             [`select`][janitor.functions.select.select] syntax.
         strip_underscores: Removes the outer underscores from all
-            column names. Default None keeps outer underscores. Values can be
-            either 'left', 'right' or 'both' or the respective shorthand 'l',
+            column names/values. Default None keeps outer underscores.
+            Values can be either 'left', 'right' or 'both'
+            or the respective shorthand 'l',
             'r' and True.
         case_type: Whether to make columns lower or uppercase.
             Current case may be preserved with 'preserve',
@@ -89,15 +90,17 @@ def clean_names(
         remove_special: Remove special characters from columns.
             Only letters, numbers and underscores are preserved.
         strip_accents: Whether or not to remove accents from
-            columns names.
+            columns names/values.
         preserve_original_labels: Preserve original names.
             This is later retrievable using `df.original_labels`.
             Applies if `axis` is not None.
-        enforce_string: Whether or not to convert all column names
-            to string type. Defaults to True, but can be turned off.
+        enforce_string: Whether or not to convert all
+            column names/values to string type.
+            Defaults to True, but can be turned off.
             Columns with >1 levels will not be converted by default.
-        truncate_limit: Truncates formatted column names to
-            the specified length. Default None does not truncate.
+        truncate_limit: Truncates formatted column names/values
+            to the specified length.
+            Default None does not truncate.
 
     Raises:
         ValueError: If `axis=None` and `column_names=None`.
@@ -118,14 +121,14 @@ def clean_names(
         df = df.copy()
         for column_name in column_names:
             df[column_name] = make_clean_names(
-                col=df[column_name],
+                obj=df[column_name],
                 enforce_string=enforce_string,
                 case_type=case_type,
                 remove_special=remove_special,
                 strip_accents=strip_accents,
                 strip_underscores=strip_underscores,
                 truncate_limit=truncate_limit,
-                df_type="pandas",
+                object_type="pandas",
             )
         return df
 
@@ -139,27 +142,27 @@ def clean_names(
         ]
         target_axis = [
             make_clean_names(
-                col=obj,
+                obj=obj,
                 enforce_string=enforce_string,
                 case_type=case_type,
                 remove_special=remove_special,
                 strip_accents=strip_accents,
                 strip_underscores=strip_underscores,
                 truncate_limit=truncate_limit,
-                df_type="pandas",
+                object_type="pandas",
             )
             for obj in target_axis
         ]
     else:
         target_axis = make_clean_names(
-            col=target_axis,
+            obj=target_axis,
             enforce_string=enforce_string,
             case_type=case_type,
             remove_special=remove_special,
             strip_accents=strip_accents,
             strip_underscores=strip_underscores,
             truncate_limit=truncate_limit,
-            df_type="pandas",
+            object_type="pandas",
         )
     # Store the original column names, if enabled by user
     if preserve_original_labels:
diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 01e192853..197908f92 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1139,41 +1139,41 @@ def __eq__(self, other):
 
 
 def _change_case(
-    col: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    obj: Union[pd.Index, pd.Series, pl.Expr, list, str],
     case_type: str,
-    df_type: str,
+    object_type: str,
 ) -> str:
-    """Change case of labels in col."""
+    """Change case of labels in obj."""
     case_types = {"preserve", "upper", "lower", "snake"}
     case_type = case_type.lower()
     if case_type not in case_types:
-        raise JanitorError(f"df_type must be one of: {case_types}")
+        raise JanitorError(f"type must be one of: {case_types}")
 
-    if df_type == "pandas":
+    if object_type == "pandas":
         if case_type == "preserve":
-            return col
+            return obj
         if case_type == "upper":
-            return col.str.upper()
+            return obj.str.upper()
         if case_type == "lower":
-            return col.str.lower()
+            return obj.str.lower()
         # Implementation taken from: https://gist.github.com/jaytaylor/3660565
         # by @jtaylor
         return (
-            col.str.replace(pat=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", regex=True)
+            obj.str.replace(pat=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", regex=True)
             .str.replace(pat=r"([a-z0-9])([A-Z])", repl=r"\1_\2", regex=True)
             .str.lower()
         )
-    if df_type == "polars":
+    if object_type == "polars":
         if case_type == "preserve":
-            return col
+            return obj
         if case_type == "upper":
-            return col.str.to_uppercase()
+            return obj.str.to_uppercase()
         if case_type == "lower":
-            return col.str.to_lowercase()
+            return obj.str.to_lowercase()
         # Implementation taken from: https://gist.github.com/jaytaylor/3660565
         # by @jtaylor
         return (
-            col.str.replace_all(
+            obj.str.replace_all(
                 pattern=r"(.)([A-Z][a-z]+)", value=r"${1}_${2}", literal=False
             )
             .str.replace_all(
@@ -1181,82 +1181,82 @@ def _change_case(
             )
             .str.to_lowercase()
         )
-    if df_type == "str":
+    if object_type == "string":
         if case_type == "preserve":
-            return col
+            return obj
         if case_type == "upper":
-            return col.upper()
+            return obj.upper()
         if case_type == "lower":
-            return col.lower()
+            return obj.lower()
         # Implementation adapted from: https://gist.github.com/jaytaylor/3660565
         # by @jtaylor
-        col = re.sub(pattern=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", string=col)
-        col = re.sub(pattern=r"([a-z0-9])([A-Z])", repl=r"\1_\2", string=col)
-        return col.lower()
+        obj = re.sub(pattern=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", string=obj)
+        obj = re.sub(pattern=r"([a-z0-9])([A-Z])", repl=r"\1_\2", string=obj)
+        return obj.lower()
 
     if case_type == "preserve":
-        return col
+        return obj
     if case_type == "upper":
-        return [label.upper() for label in col]
+        return [label.upper() for label in obj]
     if case_type == "lower":
-        return [label.lower() for label in col]
+        return [label.lower() for label in obj]
     # Implementation adapted from: https://gist.github.com/jaytaylor/3660565
     # by @jtaylor
-    col = [
+    obj = [
         re.sub(pattern=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", string=label)
-        for label in col
+        for label in obj
     ]
-    col = [
+    obj = [
         re.sub(pattern=r"([a-z0-9])([A-Z])", repl=r"\1_\2", string=label)
-        for label in col
+        for label in obj
     ]
-    col = [label.lower() for label in col]
-    return col
+    obj = [label.lower() for label in obj]
+    return obj
 
 
 def _normalize_1(
-    col: Union[pd.Index, pd.Series, pl.Expr, list, str], df_type: str
+    obj: Union[pd.Index, pd.Series, pl.Expr, list, str], object_type: str
 ) -> str:
-    """Perform normalization of labels in col."""
+    """Perform normalization of labels in obj."""
     FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
-    if df_type == "pandas":
+    if object_type == "pandas":
         for search, replace in FIXES:
-            col = col.str.replace(pat=search, repl=replace, regex=True)
-    elif df_type == "polars":
+            obj = obj.str.replace(pat=search, repl=replace, regex=True)
+    elif object_type == "polars":
         for search, replace in FIXES:
-            col = col.str.replace_all(
+            obj = obj.str.replace_all(
                 pattern=search, value=replace, literal=False
             )
-    elif df_type == "str":
+    elif object_type == "string":
         for search, replace in FIXES:
-            col = re.sub(pattern=search, repl=replace, string=col)
+            obj = re.sub(pattern=search, repl=replace, string=obj)
     else:
         for search, replace in FIXES:
-            col = [
+            obj = [
                 re.sub(pattern=search, repl=replace, string=label)
-                for label in col
+                for label in obj
             ]
-    return col
+    return obj
 
 
 def _remove_special(
-    df_type: str,
-    col: Union[pd.Index, pd.Series, pl.Expr, list, str] = None,
+    object_type: str,
+    obj: Union[pd.Index, pd.Series, pl.Expr, list, str] = None,
 ) -> str:
-    """Remove special characters from col."""
-    if df_type == "pandas":
-        return col.str.replace(
+    """Remove special characters from obj."""
+    if object_type == "pandas":
+        return obj.str.replace(
             pat="[^A-Za-z_\\d]", repl="", regex=True
         ).str.strip()
-    if df_type == "polars":
-        return col.str.replace_all(
+    if object_type == "polars":
+        return obj.str.replace_all(
             pattern="[^A-Za-z_\\d]", value="", literal=False
         ).str.strip_chars()
-    elif df_type == "str":
-        col = [item for item in col if item.isalnum() or (item == "_")]
-        return "".join(col)
+    elif object_type == "string":
+        obj = [item for item in obj if item.isalnum() or (item == "_")]
+        return "".join(obj)
     out = []
-    for label in col:
+    for label in obj:
         word = [item for item in label if item.isalnum() or (item == "_")]
         word = "".join(word)
         out.append(word)
@@ -1264,8 +1264,8 @@ def _remove_special(
 
 
 def _strip_accents(
-    col: Union[pd.Index, pd.Series, pl.Expr, list, str],
-    df_type: str,
+    obj: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    object_type: str,
 ) -> str:
     """Remove accents from a label.
 
@@ -1273,8 +1273,8 @@ def _strip_accents(
 
     [so]: https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-strin
     """  # noqa: E501
-    if df_type == "pandas":
-        return col.map(
+    if object_type == "pandas":
+        return obj.map(
             lambda f: "".join(
                 [
                     letter
@@ -1283,8 +1283,8 @@ def _strip_accents(
                 ]
             )
         )
-    if df_type == "polars":
-        return col.map_elements(
+    if object_type == "polars":
+        return obj.map_elements(
             lambda word: [
                 letter
                 for letter in unicodedata.normalize("NFD", word)
@@ -1292,15 +1292,15 @@ def _strip_accents(
             ],
             return_dtype=pl.List(pl.Utf8),
         ).list.join("")
-    if df_type == "str":
-        col = [
+    if object_type == "string":
+        obj = [
             letter
-            for letter in unicodedata.normalize("NFD", col)
+            for letter in unicodedata.normalize("NFD", obj)
             if not unicodedata.combining(letter)
         ]
-        return "".join(col)
+        return "".join(obj)
     out = []
-    for label in col:
+    for label in obj:
         word = [
             letter
             for letter in unicodedata.normalize("NFD", label)
@@ -1312,8 +1312,8 @@ def _strip_accents(
 
 
 def _strip_underscores_func(
-    col: Union[pd.Index, pd.Series, pl.Expr, list, str],
-    df_type: str,
+    obj: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    object_type: str,
     strip_underscores: Union[str, bool] = None,
 ) -> pd.DataFrame:
     """Strip underscores."""
@@ -1322,87 +1322,189 @@ def _strip_underscores_func(
         raise JanitorError(
             f"strip_underscores must be one of: {underscore_options}"
         )
-    if df_type == "pandas":
+    if object_type == "pandas":
         if strip_underscores in {"left", "l"}:
-            return col.str.lstrip("_")
+            return obj.str.lstrip("_")
         if strip_underscores in {"right", "r"}:
-            return col.str.rstrip("_")
+            return obj.str.rstrip("_")
         if strip_underscores in {True, "both"}:
-            return col.str.strip("_")
-        return col
+            return obj.str.strip("_")
+        return obj
 
-    if df_type == "polars":
+    if object_type == "polars":
         if strip_underscores in {"left", "l"}:
-            return col.str.strip_chars_start("_")
+            return obj.str.strip_chars_start("_")
         if strip_underscores in {"right", "r"}:
-            return col.str.strip_chars_end("_")
+            return obj.str.strip_chars_end("_")
         if strip_underscores in {True, "both"}:
-            return col.str.strip_chars("_")
-        return col
+            return obj.str.strip_chars("_")
+        return obj
 
-    if df_type == "str":
+    if object_type == "string":
         if strip_underscores in {"left", "l"}:
-            return col.lstrip("_")
+            return obj.lstrip("_")
         if strip_underscores in {"right", "r"}:
-            return col.rstrip("_")
+            return obj.rstrip("_")
         if strip_underscores in {True, "both"}:
-            return col.strip("_")
-        return col
+            return obj.strip("_")
+        return obj
 
     if strip_underscores in {"left", "l"}:
-        return [label.lstrip("_") for label in col]
+        return [label.lstrip("_") for label in obj]
     if strip_underscores in {"right", "r"}:
-        return [label.rstrip("_") for label in col]
+        return [label.rstrip("_") for label in obj]
     if strip_underscores in {True, "both"}:
-        return [label.strip("_") for label in col]
-    return col
+        return [label.strip("_") for label in obj]
+    return obj
 
 
 def make_clean_names(
-    col: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    obj: Union[pd.Index, pd.Series, pl.Expr, list, str],
     strip_underscores: Optional[Union[str, bool]] = None,
     case_type: str = "lower",
     remove_special: bool = False,
     strip_accents: bool = False,
     enforce_string: bool = False,
     truncate_limit: int = None,
-    df_type: str = "pandas",
+    object_type: str = "pandas",
 ) -> Union[pd.Index, pd.Series, pl.Expr, list]:
     """
-    Generic function to clean an object.
-    """
-    if enforce_string and (df_type == "pandas"):
-        if not (_is_str_or_cat(col)):
-            col = col.astype(str)
-    elif enforce_string and (df_type == "python"):
-        col = [str(label) for label in col]
-    elif enforce_string and (df_type == "str"):
-        col = str(col)
-    elif enforce_string and (df_type == "polars"):
-        col = col.cast(pl.Utf8)
-    col = _change_case(col, case_type, df_type=df_type)
-    col = _normalize_1(col, df_type=df_type)
+    Generic function to clean labels in an object.
+    It can be applied to a pandas Index/Series, a Polars Expression,
+    or a python string/list.
+    For pandas, there is a [`clean_names`][janitor.functions.clean_names.clean_names]
+    method, which is a wrapper around the `make_clean_names` function.
+    For polars, use this function via existing Polars functions. The examples below
+    show how you can use this within polars.
+
+    Examples:
+        >>> import polars as pl
+        >>> import janitor
+        >>> df = pl.DataFrame(
+        ...     {
+        ...         "Aloha": range(3),
+        ...         "Bell Chart": range(3),
+        ...         "Animals@#$%^": range(3)
+        ...     }
+        ... )
+        >>> df
+        shape: (3, 3)
+        ┌───────┬────────────┬──────────────┐
+        │ Aloha ┆ Bell Chart ┆ Animals@#$%^ │
+        │ ---   ┆ ---        ┆ ---          │
+        │ i64   ┆ i64        ┆ i64          │
+        ╞═══════╪════════════╪══════════════╡
+        │ 0     ┆ 0          ┆ 0            │
+        │ 1     ┆ 1          ┆ 1            │
+        │ 2     ┆ 2          ┆ 2            │
+        └───────┴────────────┴──────────────┘
+
+        Clean the column names,
+        via [rename](https://docs.pola.rs/py-polars/html/reference/dataframe/api/polars.DataFrame.rename.html#polars-dataframe-rename):
+        >>> df.rename(
+        ...     lambda objumn_name: make_clean_names(
+        ...         obj=objumn_name, remove_special=True, object_type="string"
+        ...     )
+        ... )
+        shape: (3, 3)
+        ┌───────┬────────────┬─────────┐
+        │ aloha ┆ bell_chart ┆ animals │
+        │ ---   ┆ ---        ┆ ---     │
+        │ i64   ┆ i64        ┆ i64     │
+        ╞═══════╪════════════╪═════════╡
+        │ 0     ┆ 0          ┆ 0       │
+        │ 1     ┆ 1          ┆ 1       │
+        │ 2     ┆ 2          ┆ 2       │
+        └───────┴────────────┴─────────┘
+
+        >>> df = pl.DataFrame({"raw": ["Abçdê fgí j"]})
+        >>> df
+        shape: (1, 1)
+        ┌─────────────┐
+        │ raw         │
+        │ ---         │
+        │ str         │
+        ╞═════════════╡
+        │ Abçdê fgí j │
+        └─────────────┘
+
+        Clean the column values,
+        via [with_columns](https://docs.pola.rs/py-polars/html/reference/dataframe/api/polars.DataFrame.with_columns.html#polars-dataframe-with-columns):
+        >>> df.with_columns(
+        ...    pl.col("raw").pipe(
+        ...        make_clean_names, object_type="polars", strip_accents=True
+        ...    )
+        ... )
+        shape: (1, 1)
+        ┌─────────────┐
+        │ raw         │
+        │ ---         │
+        │ str         │
+        ╞═════════════╡
+        │ abcde_fgi_j │
+        └─────────────┘
+
+    !!! info "New in version 0.28.0"
+
+    Args:
+        obj: The object to clean. It can be a pandas Index,
+            a pandas Series, a polars Expression, a python string,
+            or a python list.
+        strip_underscores: Removes the outer underscores from all
+            labels. Default None keeps outer underscores. Values can be
+            either 'left', 'right' or 'both' or the respective shorthand 'l',
+            'r' and True.
+        case_type: Whether to make the labels lower or uppercase.
+            Current case may be preserved with 'preserve',
+            while snake case conversion (from CamelCase or camelCase only)
+            can be turned on using "snake".
+            Default 'lower' makes all characters lowercase.
+        remove_special: Remove special characters from the labels.
+            Only letters, numbers and underscores are preserved.
+        strip_accents: Whether or not to remove accents from
+            the labels.
+        enforce_string: Whether or not to convert the labels to string.
+            Defaults to True, but can be turned off.
+        truncate_limit: Truncates formatted labels to
+            the specified length. Default None does not truncate.
+        object_type: The type of object to clean. It should be either `pandas`,
+            `polars`, a python `string`, or a python `list`.
+    Returns:
+        A pandas Index, pandas Series, polars Expression, a python string,
+        or a python list.
+    """  # noqa: E501
+    if enforce_string and (object_type == "pandas"):
+        if not (_is_str_or_cat(obj)):
+            obj = obj.astype(str)
+    elif enforce_string and (object_type == "list"):
+        obj = [str(label) for label in obj]
+    elif enforce_string and (object_type == "string"):
+        obj = str(obj)
+    elif enforce_string and (object_type == "polars"):
+        obj = obj.cast(pl.Utf8)
+    obj = _change_case(obj, case_type, object_type=object_type)
+    obj = _normalize_1(obj, object_type=object_type)
     if remove_special:
-        col = _remove_special(df_type=df_type, col=col)
+        obj = _remove_special(object_type=object_type, obj=obj)
     if strip_accents:
-        col = _strip_accents(col=col, df_type=df_type)
-    if df_type == "pandas":
-        col = col.str.replace(pat="_+", repl="_", regex=True)
-    elif df_type == "polars":
-        col = col.str.replace(pattern="_+", value="_", literal=False)
-    elif df_type == "str":
-        col = re.sub(pattern="_+", repl="_", string=col)
+        obj = _strip_accents(obj=obj, object_type=object_type)
+    if object_type == "pandas":
+        obj = obj.str.replace(pat="_+", repl="_", regex=True)
+    elif object_type == "polars":
+        obj = obj.str.replace(pattern="_+", value="_", literal=False)
+    elif object_type == "string":
+        obj = re.sub(pattern="_+", repl="_", string=obj)
     else:
-        col = [re.sub(pattern="_+", repl="_", string=label) for label in col]
-    col = _strip_underscores_func(
-        col, strip_underscores=strip_underscores, df_type=df_type
+        obj = [re.sub(pattern="_+", repl="_", string=label) for label in obj]
+    obj = _strip_underscores_func(
+        obj, strip_underscores=strip_underscores, object_type=object_type
     )
-    if truncate_limit and (df_type == "pandas"):
-        col = col.str[:truncate_limit]
-    elif truncate_limit and (df_type == "polars"):
-        col = col.str.slice(offset=0, length=truncate_limit)
-    elif truncate_limit and (df_type == "str"):
-        col = col[:truncate_limit]
+    if truncate_limit and (object_type == "pandas"):
+        obj = obj.str[:truncate_limit]
+    elif truncate_limit and (object_type == "polars"):
+        obj = obj.str.slice(offset=0, length=truncate_limit)
+    elif truncate_limit and (object_type == "string"):
+        obj = obj[:truncate_limit]
     elif truncate_limit:
-        col = [label[:truncate_limit] for label in col]
-    return col
+        obj = [label[:truncate_limit] for label in obj]
+    return obj
diff --git a/janitor/spark/functions.py b/janitor/spark/functions.py
index a43f7338d..57abd1824 100644
--- a/janitor/spark/functions.py
+++ b/janitor/spark/functions.py
@@ -4,7 +4,7 @@
 from typing import Union
 
 from janitor import utils as janitor_utils
-from janitor.functions.clean_names import (
+from janitor.functions.utils import (
     _change_case,
     _normalize_1,
     _remove_special,
diff --git a/tests/functions/polars/test_clean_names.py b/tests/functions/test_clean_names_polars.py
similarity index 78%
rename from tests/functions/polars/test_clean_names.py
rename to tests/functions/test_clean_names_polars.py
index 51d6f1ff4..cacdfe608 100644
--- a/tests/functions/polars/test_clean_names.py
+++ b/tests/functions/test_clean_names_polars.py
@@ -8,7 +8,7 @@
 def test_clean_names_method_chain(dataframe):
     """Tests clean_names default args in a method chain."""
     df = pl.from_pandas(dataframe)
-    df = df.rename(lambda col: make_clean_names(col, df_type="str"))
+    df = df.rename(lambda col: make_clean_names(col, object_type="string"))
     expected_columns = [
         "a",
         "bell_chart",
@@ -24,7 +24,9 @@ def test_clean_names_special_characters(dataframe):
     """Tests clean_names `remove_special` parameter."""
     df = pl.from_pandas(dataframe)
     df = df.rename(
-        lambda col: make_clean_names(col, df_type="str", remove_special=True)
+        lambda col: make_clean_names(
+            col, object_type="string", remove_special=True
+        )
     )
     expected_columns = [
         "a",
@@ -42,7 +44,7 @@ def test_clean_names_uppercase(dataframe):
     df = pl.from_pandas(dataframe)
     df = df.rename(
         lambda col: make_clean_names(
-            col, df_type="str", remove_special=True, case_type="upper"
+            col, object_type="string", remove_special=True, case_type="upper"
         )
     )
     expected_columns = [
@@ -60,7 +62,9 @@ def test_clean_names_strip_accents():
     """Tests clean_names `strip_accents` parameter."""
     df = pl.DataFrame({"João": [1, 2], "Лука́ся": [1, 2], "Käfer": [1, 2]})
     df = df.rename(
-        lambda col: make_clean_names(col, df_type="str", strip_accents=True)
+        lambda col: make_clean_names(
+            col, object_type="string", strip_accents=True
+        )
     )
     expected_columns = ["joao", "лукася", "kafer"]
     assert df.columns == expected_columns
@@ -75,7 +79,10 @@ def test_clean_names_camelcase_to_snake(dataframe):
         .rename({"a": "AColumnName"})
         .rename(
             lambda col: make_clean_names(
-                col, df_type="str", remove_special=True, case_type="snake"
+                col,
+                object_type="string",
+                remove_special=True,
+                case_type="snake",
             )
         )
     )
@@ -87,7 +94,9 @@ def test_clean_names_truncate_limit(dataframe):
     """Tests clean_names `truncate_limit` parameter."""
     df = pl.from_pandas(dataframe)
     df = df.rename(
-        lambda col: make_clean_names(col, df_type="str", truncate_limit=7)
+        lambda col: make_clean_names(
+            col, object_type="string", truncate_limit=7
+        )
     )
     # df = dataframe.clean_names(truncate_limit=7)
     expected_columns = ["a", "bell_ch", "decorat", "animals", "cities"]
@@ -105,7 +114,10 @@ def test_charac():
     )
     df = df.rename(
         lambda col: make_clean_names(
-            col, df_type="str", strip_underscores=True, case_type="lower"
+            col,
+            object_type="string",
+            strip_underscores=True,
+            case_type="lower",
         )
     )
 
@@ -117,7 +129,7 @@ def test_clean_column_values():
     raw = pl.DataFrame({"raw": ["Abçdê fgí j"]})
     outcome = raw.with_columns(
         pl.col("raw").pipe(
-            make_clean_names, df_type="polars", strip_accents=True
+            make_clean_names, object_type="polars", strip_accents=True
         )
     )
     assert list(outcome)[0][0] == "abcde_fgi_j"

From 0fb440e84fdf82b93afd1a617da97057370d9fa5 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 19:49:50 +1000
Subject: [PATCH 03/46] changelog

---
 CHANGELOG.md                 |  1 +
 examples/notebooks/bla.ipynb | 94 ------------------------------------
 2 files changed, 1 insertion(+), 94 deletions(-)
 delete mode 100644 examples/notebooks/bla.ipynb

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 552de1e50..0fabcc7fb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 # Changelog
 
 ## [Unreleased]
+-  [ENH] Add `make_clean_names` function which works on a pandas Index/Series, a Polar Expression, or a python string/list. Issue #1343
 
 ## [v0.27.0] - 2024-03-21
 
diff --git a/examples/notebooks/bla.ipynb b/examples/notebooks/bla.ipynb
deleted file mode 100644
index f47c4b335..000000000
--- a/examples/notebooks/bla.ipynb
+++ /dev/null
@@ -1,94 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import polars as pl\n",
-    "from janitor import make_clean_names"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div><style>\n",
-       ".dataframe > thead > tr,\n",
-       ".dataframe > tbody > tr {\n",
-       "  text-align: right;\n",
-       "  white-space: pre-wrap;\n",
-       "}\n",
-       "</style>\n",
-       "<small>shape: (3, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Aloha</th><th>Bell Chart</th><th>Animals@#$%^</th></tr><tr><td>i64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>0</td><td>0</td><td>0</td></tr><tr><td>1</td><td>1</td><td>1</td></tr><tr><td>2</td><td>2</td><td>2</td></tr></tbody></table></div>"
-      ],
-      "text/plain": [
-       "shape: (3, 3)\n",
-       "┌───────┬────────────┬──────────────┐\n",
-       "│ Aloha ┆ Bell Chart ┆ Animals@#$%^ │\n",
-       "│ ---   ┆ ---        ┆ ---          │\n",
-       "│ i64   ┆ i64        ┆ i64          │\n",
-       "╞═══════╪════════════╪══════════════╡\n",
-       "│ 0     ┆ 0          ┆ 0            │\n",
-       "│ 1     ┆ 1          ┆ 1            │\n",
-       "│ 2     ┆ 2          ┆ 2            │\n",
-       "└───────┴────────────┴──────────────┘"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = pl.DataFrame(\n",
-    "    {\n",
-    "        \"Aloha\": range(3),\n",
-    "        \"Bell Chart\": range(3),\n",
-    "        \"Animals@#$%^\": range(3)\n",
-    "    }\n",
-    ")\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "raw.with_columns(\n",
-    "    pl.col(\"raw\").pipe(\n",
-    "        make_clean_names, object_type=\"polars\", strip_accents=True\n",
-    "    )\n",
-    ")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "pyjanitor-dev",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

From 5e944b2211ffba92d40a2f5e12b7a8e8d093a625 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 20:05:02 +1000
Subject: [PATCH 04/46] limit import location for polars

---
 janitor/functions/utils.py | 8 +++++++-
 pyproject.toml             | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 197908f92..39bad2d91 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -12,6 +12,7 @@
 from enum import Enum
 from functools import singledispatch
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Hashable,
@@ -25,7 +26,6 @@
 
 import numpy as np
 import pandas as pd
-import polars as pl
 from multipledispatch import dispatch
 from pandas.api.types import (
     is_bool_dtype,
@@ -1138,6 +1138,10 @@ def __eq__(self, other):
         return self
 
 
+if TYPE_CHECKING:
+    import polars as pl
+
+
 def _change_case(
     obj: Union[pd.Index, pd.Series, pl.Expr, list, str],
     case_type: str,
@@ -1473,6 +1477,8 @@ def make_clean_names(
         A pandas Index, pandas Series, polars Expression, a python string,
         or a python list.
     """  # noqa: E501
+    if object_type == "polars":
+        import polars as pl
     if enforce_string and (object_type == "pandas"):
         if not (_is_str_or_cat(obj)):
             obj = obj.astype(str)
diff --git a/pyproject.toml b/pyproject.toml
index af1131d75..f6b98f54b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,6 +91,6 @@ lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 # Assume Python 3.10
 target-version = "py310"
 
-[tool.ruff.mccabe]
+[tool.ruff.lint.mccabe]
 # Unlike Flake8, default to a complexity level of 10.
 max-complexity = 10

From 501d9c67b6c2688929c0b40554f552f782c29f27 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 20:19:11 +1000
Subject: [PATCH 05/46] limit import location for polars

---
 janitor/functions/utils.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 39bad2d91..9f329e62f 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -39,7 +39,12 @@
 from pandas.core.groupby.generic import DataFrameGroupBy, SeriesGroupBy
 
 from janitor.errors import JanitorError
-from janitor.utils import _expand_grid, check, check_column, find_stack_level
+from janitor.utils import (
+    _expand_grid,
+    check,
+    check_column,
+    find_stack_level,
+)
 
 warnings.simplefilter("always", DeprecationWarning)
 

From 9506832433b8dd57f65c53de513e4e94c7e47bfc Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 20:25:48 +1000
Subject: [PATCH 06/46] fix polars in environment-dev.yml

---
 environment-dev.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment-dev.yml b/environment-dev.yml
index 322deec86..2543e2c76 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -34,7 +34,7 @@ dependencies:
   - pipreqs
   - pip-tools
   - pre-commit
-  - pypolars
+  - polars
   - pyspark>=3.2.0
   - pytest
   - pytest-cov

From 1ae8eddbe49274e0ef5613bceec18fa0cd28c9e5 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 20:35:18 +1000
Subject: [PATCH 07/46] install polars in doctest

---
 janitor/functions/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 9f329e62f..4d92d2aef 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1387,6 +1387,8 @@ def make_clean_names(
     show how you can use this within polars.
 
     Examples:
+        >>> import subprocess
+        >>> subprocess.call(['pip', 'install', 'polars'])
         >>> import polars as pl
         >>> import janitor
         >>> df = pl.DataFrame(

From 3b1829b2551bd4805ab24c6e4308aacb9c734b99 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 20:47:06 +1000
Subject: [PATCH 08/46] limit polars imports - user should have polars already
 installed

---
 janitor/functions/utils.py                 | 26 ++++++++++++----------
 tests/functions/test_clean_names_polars.py |  9 +++++---
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 4d92d2aef..56700ea68 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1144,11 +1144,11 @@ def __eq__(self, other):
 
 
 if TYPE_CHECKING:
-    import polars as pl
+    from polars import Expr
 
 
 def _change_case(
-    obj: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    obj: Union[pd.Index, pd.Series, Expr, list, str],
     case_type: str,
     object_type: str,
 ) -> str:
@@ -1224,7 +1224,7 @@ def _change_case(
 
 
 def _normalize_1(
-    obj: Union[pd.Index, pd.Series, pl.Expr, list, str], object_type: str
+    obj: Union[pd.Index, pd.Series, Expr, list, str], object_type: str
 ) -> str:
     """Perform normalization of labels in obj."""
     FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
@@ -1250,7 +1250,7 @@ def _normalize_1(
 
 def _remove_special(
     object_type: str,
-    obj: Union[pd.Index, pd.Series, pl.Expr, list, str] = None,
+    obj: Union[pd.Index, pd.Series, Expr, list, str] = None,
 ) -> str:
     """Remove special characters from obj."""
     if object_type == "pandas":
@@ -1273,7 +1273,7 @@ def _remove_special(
 
 
 def _strip_accents(
-    obj: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    obj: Union[pd.Index, pd.Series, Expr, list, str],
     object_type: str,
 ) -> str:
     """Remove accents from a label.
@@ -1293,13 +1293,15 @@ def _strip_accents(
             )
         )
     if object_type == "polars":
+        from polars import List, Utf8
+
         return obj.map_elements(
             lambda word: [
                 letter
                 for letter in unicodedata.normalize("NFD", word)
                 if not unicodedata.combining(letter)
             ],
-            return_dtype=pl.List(pl.Utf8),
+            return_dtype=List(Utf8),
         ).list.join("")
     if object_type == "string":
         obj = [
@@ -1321,7 +1323,7 @@ def _strip_accents(
 
 
 def _strip_underscores_func(
-    obj: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    obj: Union[pd.Index, pd.Series, Expr, list, str],
     object_type: str,
     strip_underscores: Union[str, bool] = None,
 ) -> pd.DataFrame:
@@ -1368,7 +1370,7 @@ def _strip_underscores_func(
 
 
 def make_clean_names(
-    obj: Union[pd.Index, pd.Series, pl.Expr, list, str],
+    obj: Union[pd.Index, pd.Series, Expr, list, str],
     strip_underscores: Optional[Union[str, bool]] = None,
     case_type: str = "lower",
     remove_special: bool = False,
@@ -1376,7 +1378,7 @@ def make_clean_names(
     enforce_string: bool = False,
     truncate_limit: int = None,
     object_type: str = "pandas",
-) -> Union[pd.Index, pd.Series, pl.Expr, list]:
+) -> Union[pd.Index, pd.Series, Expr, list]:
     """
     Generic function to clean labels in an object.
     It can be applied to a pandas Index/Series, a Polars Expression,
@@ -1484,8 +1486,6 @@ def make_clean_names(
         A pandas Index, pandas Series, polars Expression, a python string,
         or a python list.
     """  # noqa: E501
-    if object_type == "polars":
-        import polars as pl
     if enforce_string and (object_type == "pandas"):
         if not (_is_str_or_cat(obj)):
             obj = obj.astype(str)
@@ -1494,7 +1494,9 @@ def make_clean_names(
     elif enforce_string and (object_type == "string"):
         obj = str(obj)
     elif enforce_string and (object_type == "polars"):
-        obj = obj.cast(pl.Utf8)
+        from polars import Utf8
+
+        obj = obj.cast(Utf8)
     obj = _change_case(obj, case_type, object_type=object_type)
     obj = _normalize_1(obj, object_type=object_type)
     if remove_special:
diff --git a/tests/functions/test_clean_names_polars.py b/tests/functions/test_clean_names_polars.py
index cacdfe608..814029ae7 100644
--- a/tests/functions/test_clean_names_polars.py
+++ b/tests/functions/test_clean_names_polars.py
@@ -1,7 +1,10 @@
-import polars as pl
-import pytest
+import subprocess
 
-from janitor import make_clean_names
+subprocess.call(["pip", "install", "polars"])
+import polars as pl  # noqa: E402
+import pytest  # noqa: E402
+
+from janitor import make_clean_names  # noqa: E402
 
 
 @pytest.mark.functions

From 52fd80cf5d1e6fafef65f36dc21845272e28fc3f Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 20:52:18 +1000
Subject: [PATCH 09/46] use subprocess.run

---
 janitor/functions/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 56700ea68..cb668c620 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1390,7 +1390,7 @@ def make_clean_names(
 
     Examples:
         >>> import subprocess
-        >>> subprocess.call(['pip', 'install', 'polars'])
+        >>> subprocess.run(['pip', 'install', 'polars'])
         >>> import polars as pl
         >>> import janitor
         >>> df = pl.DataFrame(

From 2dce78b6db0cd607e08c0ca64fad1e4f2105a908 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 20:57:50 +1000
Subject: [PATCH 10/46] add subprocess.devnull to docstrings

---
 janitor/functions/utils.py                 | 4 +++-
 tests/functions/test_clean_names_polars.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index cb668c620..153911b13 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1390,7 +1390,9 @@ def make_clean_names(
 
     Examples:
         >>> import subprocess
-        >>> subprocess.run(['pip', 'install', 'polars'])
+        >>> subprocess.run(['pip', 'install', 'polars'],
+        ...     stdout = subprocess.DEVNULL,
+        ...     stderr = subprocess.STDOUT)
         >>> import polars as pl
         >>> import janitor
         >>> df = pl.DataFrame(
diff --git a/tests/functions/test_clean_names_polars.py b/tests/functions/test_clean_names_polars.py
index 814029ae7..b920aa2e5 100644
--- a/tests/functions/test_clean_names_polars.py
+++ b/tests/functions/test_clean_names_polars.py
@@ -1,6 +1,6 @@
 import subprocess
 
-subprocess.call(["pip", "install", "polars"])
+subprocess.run(["pip", "install", "polars"])
 import polars as pl  # noqa: E402
 import pytest  # noqa: E402
 

From 37b3feb312e720d3f91e3ca7bd9ed0f90390af02 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 20:58:18 +1000
Subject: [PATCH 11/46] add subprocess.devnull to docstrings

---
 janitor/functions/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 153911b13..1656627db 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1392,7 +1392,7 @@ def make_clean_names(
         >>> import subprocess
         >>> subprocess.run(['pip', 'install', 'polars'],
         ...     stdout = subprocess.DEVNULL,
-        ...     stderr = subprocess.STDOUT)
+        ...     stderr = subprocess.DEVNULL)
         >>> import polars as pl
         >>> import janitor
         >>> df = pl.DataFrame(

From 0953f2d2fb043ea5c127b27a92007e092f73ad9b Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 21:04:03 +1000
Subject: [PATCH 12/46] add subprocess.devnull to docstrings

---
 janitor/functions/utils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 1656627db..557567630 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1390,9 +1390,7 @@ def make_clean_names(
 
     Examples:
         >>> import subprocess
-        >>> subprocess.run(['pip', 'install', 'polars'],
-        ...     stdout = subprocess.DEVNULL,
-        ...     stderr = subprocess.DEVNULL)
+        >>> subprocess.call(['pip', 'install', 'polars'], stdout=open(os.devnull, 'wb'))
         >>> import polars as pl
         >>> import janitor
         >>> df = pl.DataFrame(

From d7c71b6498e46d6d3dc3e4a763b2d7c65f68eea4 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 21:07:31 +1000
Subject: [PATCH 13/46] add subprocess.devnull to docstrings

---
 janitor/functions/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 557567630..2c650328c 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1390,7 +1390,7 @@ def make_clean_names(
 
     Examples:
         >>> import subprocess
-        >>> subprocess.call(['pip', 'install', 'polars'], stdout=open(os.devnull, 'wb'))
+        >>> subprocess.call(['pip', 'install', 'polars'], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
         >>> import polars as pl
         >>> import janitor
         >>> df = pl.DataFrame(

From 40b850247e4c2d9e589eb96513e0e9682fc55867 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 21:16:19 +1000
Subject: [PATCH 14/46] add os.devnull

---
 janitor/functions/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 2c650328c..7912907ae 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1390,7 +1390,8 @@ def make_clean_names(
 
     Examples:
         >>> import subprocess
-        >>> subprocess.call(['pip', 'install', 'polars'], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
+        >>> import os
+        >>> subprocess.call(['pip', 'install', 'polars'], stdout=open(os.devnull, 'wb'))
         >>> import polars as pl
         >>> import janitor
         >>> df = pl.DataFrame(

From 4f11d095bad06291737f19d342c041a4ae876fd8 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 21:20:08 +1000
Subject: [PATCH 15/46] add polars as requirement for docs

---
 .requirements/docs.in      | 1 +
 janitor/functions/utils.py | 3 ---
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/.requirements/docs.in b/.requirements/docs.in
index f0d4afc29..b23e373aa 100644
--- a/.requirements/docs.in
+++ b/.requirements/docs.in
@@ -1,4 +1,5 @@
 mkdocs
+polars
 mkdocs-material
 mkdocstrings>=0.19.0
 mkdocstrings-python
diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 7912907ae..b06744d97 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1389,9 +1389,6 @@ def make_clean_names(
     show how you can use this within polars.
 
     Examples:
-        >>> import subprocess
-        >>> import os
-        >>> subprocess.call(['pip', 'install', 'polars'], stdout=open(os.devnull, 'wb'))
         >>> import polars as pl
         >>> import janitor
         >>> df = pl.DataFrame(

From 54b179c5ca420d6629c1e32885e7aec6fed04389 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 21:35:24 +1000
Subject: [PATCH 16/46] add polars to tests requirements

---
 .requirements/testing.in                   | 1 +
 janitor/functions/utils.py                 | 8 ++++++++
 tests/functions/test_clean_names_polars.py | 7 ++-----
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/.requirements/testing.in b/.requirements/testing.in
index 57e12c2d3..8179653b8 100644
--- a/.requirements/testing.in
+++ b/.requirements/testing.in
@@ -4,4 +4,5 @@ pytest>=3.4.2
 hypothesis>=4.4.0
 interrogate
 pandas-vet
+polars
 py>=1.10.0
diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index b06744d97..dde4880fc 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1455,6 +1455,14 @@ def make_clean_names(
         │ abcde_fgi_j │
         └─────────────┘
 
+        The `make_clean_names` function can also be applied to a python string or list:
+        >>> raw = ["Abçdê fgí j"]
+        >>> make_clean_names(raw, object_type='list', strip_accents=True)
+        ['abcde_fgi_j']
+        >>> raw = "Abçdê fgí j"
+        >>> make_clean_names(raw, object_type='string', strip_accents=True)
+        'abcde_fgi_j'
+
     !!! info "New in version 0.28.0"
 
     Args:
diff --git a/tests/functions/test_clean_names_polars.py b/tests/functions/test_clean_names_polars.py
index b920aa2e5..56d0b8e95 100644
--- a/tests/functions/test_clean_names_polars.py
+++ b/tests/functions/test_clean_names_polars.py
@@ -1,8 +1,5 @@
-import subprocess
-
-subprocess.run(["pip", "install", "polars"])
-import polars as pl  # noqa: E402
-import pytest  # noqa: E402
+import polars as pl
+import pytest
 
 from janitor import make_clean_names  # noqa: E402
 

From 25b39b9d1918a83373e7d784430e0d615a04e315 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 21:39:24 +1000
Subject: [PATCH 17/46] delete irrelevant folder

---
 janitor/functions/polars/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 janitor/functions/polars/__init__.py

diff --git a/janitor/functions/polars/__init__.py b/janitor/functions/polars/__init__.py
deleted file mode 100644
index e69de29bb..000000000

From a09f34bc6f15bcfa2dddf97797b6b1f6fb3ed910 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sat, 20 Apr 2024 21:45:00 +1000
Subject: [PATCH 18/46] changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0fabcc7fb..6a3492539 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
 # Changelog
 
 ## [Unreleased]
--  [ENH] Add `make_clean_names` function which works on a pandas Index/Series, a Polar Expression, or a python string/list. Issue #1343
+-  [ENH] Add `make_clean_names` function which works on a pandas Index/Series, a Polars Expression, or a python string/list. Issue #1343
 
 ## [v0.27.0] - 2024-03-21
 

From 1b375f84e4d5cba9e5955d6ade247f767c10c4d3 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 21 Apr 2024 19:35:03 +1000
Subject: [PATCH 19/46] create submodule for polars

---
 CHANGELOG.md                                  |   2 +-
 janitor/functions/__init__.py                 |   2 -
 janitor/functions/clean_names.py              | 125 +++++-
 janitor/functions/utils.py                    | 367 ++----------------
 janitor/polars/__init__.py                    | 131 +++++++
 janitor/polars/functions.py                   | 160 ++++++++
 .../functions/test_clean_names.py}            |  51 +--
 7 files changed, 443 insertions(+), 395 deletions(-)
 create mode 100644 janitor/polars/__init__.py
 create mode 100644 janitor/polars/functions.py
 rename tests/{functions/test_clean_names_polars.py => polars/functions/test_clean_names.py} (66%)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6a3492539..5717193d6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
 # Changelog
 
 ## [Unreleased]
--  [ENH] Add `make_clean_names` function which works on a pandas Index/Series, a Polars Expression, or a python string/list. Issue #1343
+-  [ENH] Added a `clean_names` method for polars - it can be used to clean the column names, or clean column values . Issue #1343
 
 ## [v0.27.0] - 2024-03-21
 
diff --git a/janitor/functions/__init__.py b/janitor/functions/__init__.py
index ef1a69458..35681b9d9 100644
--- a/janitor/functions/__init__.py
+++ b/janitor/functions/__init__.py
@@ -81,7 +81,6 @@
     col,
     get_columns,
     get_index_labels,
-    make_clean_names,
     patterns,
     unionize_dataframe_categories,
 )
@@ -130,7 +129,6 @@
     "join_apply",
     "label_encode",
     "limit_column_characters",
-    "make_clean_names",
     "min_max_scale",
     "move",
     "pivot_longer",
diff --git a/janitor/functions/clean_names.py b/janitor/functions/clean_names.py
index 69af7f33e..7eb2a7538 100644
--- a/janitor/functions/clean_names.py
+++ b/janitor/functions/clean_names.py
@@ -1,15 +1,16 @@
 """Functions for cleaning columns/index names and/or column values."""
 
+from __future__ import annotations
+
+import unicodedata
 from typing import Optional, Union
 
 import pandas as pd
 import pandas_flavor as pf
 from pandas.api.types import is_scalar
 
-from janitor.functions.utils import (
-    get_index_labels,
-    make_clean_names,
-)
+from janitor.errors import JanitorError
+from janitor.functions.utils import _is_str_or_cat, get_index_labels
 from janitor.utils import deprecated_alias
 
 
@@ -120,7 +121,7 @@ def clean_names(
             column_names = [column_names]
         df = df.copy()
         for column_name in column_names:
-            df[column_name] = make_clean_names(
+            df[column_name] = _clean_names(
                 obj=df[column_name],
                 enforce_string=enforce_string,
                 case_type=case_type,
@@ -128,7 +129,6 @@ def clean_names(
                 strip_accents=strip_accents,
                 strip_underscores=strip_underscores,
                 truncate_limit=truncate_limit,
-                object_type="pandas",
             )
         return df
 
@@ -141,7 +141,7 @@ def clean_names(
             for number in range(target_axis.nlevels)
         ]
         target_axis = [
-            make_clean_names(
+            _clean_names(
                 obj=obj,
                 enforce_string=enforce_string,
                 case_type=case_type,
@@ -149,12 +149,11 @@ def clean_names(
                 strip_accents=strip_accents,
                 strip_underscores=strip_underscores,
                 truncate_limit=truncate_limit,
-                object_type="pandas",
             )
             for obj in target_axis
         ]
     else:
-        target_axis = make_clean_names(
+        target_axis = _clean_names(
             obj=target_axis,
             enforce_string=enforce_string,
             case_type=case_type,
@@ -162,10 +161,116 @@ def clean_names(
             strip_accents=strip_accents,
             strip_underscores=strip_underscores,
             truncate_limit=truncate_limit,
-            object_type="pandas",
         )
     # Store the original column names, if enabled by user
     if preserve_original_labels:
         df.__dict__["original_labels"] = getattr(df, axis)
     setattr(df, axis, target_axis)
     return df
+
+
+def _clean_names(
+    obj: Union[pd.Index, pd.Series],
+    strip_underscores: Optional[Union[str, bool]] = None,
+    case_type: str = "lower",
+    remove_special: bool = False,
+    strip_accents: bool = False,
+    enforce_string: bool = False,
+    truncate_limit: int = None,
+) -> Union[pd.Index, pd.Series]:
+    """
+    Generic function to clean labels in a pandas object.
+    """
+    if enforce_string and not (_is_str_or_cat(obj)):
+        obj = obj.astype(str)
+    obj = _change_case(obj=obj, case_type=case_type)
+    obj = _normalize_1(obj=obj)
+    if remove_special:
+        obj = obj.str.replace(
+            pat="[^A-Za-z_\\d]", repl="", regex=True
+        ).str.strip()
+    if strip_accents:
+        obj = _strip_accents(obj=obj)
+    obj = obj.str.replace(pat="_+", repl="_", regex=True)
+    obj = _strip_underscores_func(
+        obj,
+        strip_underscores=strip_underscores,
+    )
+    if truncate_limit:
+        obj = obj.str[:truncate_limit]
+    return obj
+
+
+def _change_case(
+    obj: Union[pd.Index, pd.Series],
+    case_type: str,
+) -> Union[pd.Index, pd.Series]:
+    """Change case of labels in obj."""
+    case_types = {"preserve", "upper", "lower", "snake"}
+    case_type = case_type.lower()
+    if case_type not in case_types:
+        raise JanitorError(f"case_type must be one of: {case_types}")
+
+    if case_type == "preserve":
+        return obj
+    if case_type == "upper":
+        return obj.str.upper()
+    if case_type == "lower":
+        return obj.str.lower()
+    # Implementation taken from: https://gist.github.com/jaytaylor/3660565
+    # by @jtaylor
+    return (
+        obj.str.replace(pat=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", regex=True)
+        .str.replace(pat=r"([a-z0-9])([A-Z])", repl=r"\1_\2", regex=True)
+        .str.lower()
+    )
+
+
+def _normalize_1(
+    obj: Union[pd.Index, pd.Series]
+) -> Union[pd.Index, pd.Series]:
+    """Perform normalization of labels in obj."""
+    FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
+    for search, replace in FIXES:
+        obj = obj.str.replace(pat=search, repl=replace, regex=True)
+
+    return obj
+
+
+def _strip_accents(
+    obj: Union[pd.Index, pd.Series],
+) -> Union[pd.Index, pd.Series]:
+    """Remove accents from a label.
+
+    Inspired from [StackOverflow][so].
+
+    [so]: https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-strin
+    """  # noqa: E501
+    return obj.map(
+        lambda f: "".join(
+            [
+                letter
+                for letter in unicodedata.normalize("NFD", str(f))
+                if not unicodedata.combining(letter)
+            ]
+        )
+    )
+
+
+def _strip_underscores_func(
+    obj: Union[pd.Index, pd.Series],
+    strip_underscores: Union[str, bool] = None,
+) -> Union[pd.Index, pd.Series]:
+    """Strip underscores."""
+    underscore_options = {None, "left", "right", "both", "l", "r", True}
+    if strip_underscores not in underscore_options:
+        raise JanitorError(
+            f"strip_underscores must be one of: {underscore_options}"
+        )
+    if strip_underscores in {"left", "l"}:
+        return obj.str.lstrip("_")
+    if strip_underscores in {"right", "r"}:
+        return obj.str.rstrip("_")
+    if strip_underscores in {True, "both"}:
+        return obj.str.strip("_")
+    return obj
diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index dde4880fc..4bf0d0eea 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -12,7 +12,6 @@
 from enum import Enum
 from functools import singledispatch
 from typing import (
-    TYPE_CHECKING,
     Any,
     Callable,
     Hashable,
@@ -1143,138 +1142,48 @@ def __eq__(self, other):
         return self
 
 
-if TYPE_CHECKING:
-    from polars import Expr
-
-
 def _change_case(
-    obj: Union[pd.Index, pd.Series, Expr, list, str],
+    obj: str,
     case_type: str,
-    object_type: str,
 ) -> str:
-    """Change case of labels in obj."""
+    """Change case of obj."""
     case_types = {"preserve", "upper", "lower", "snake"}
     case_type = case_type.lower()
     if case_type not in case_types:
         raise JanitorError(f"type must be one of: {case_types}")
 
-    if object_type == "pandas":
-        if case_type == "preserve":
-            return obj
-        if case_type == "upper":
-            return obj.str.upper()
-        if case_type == "lower":
-            return obj.str.lower()
-        # Implementation taken from: https://gist.github.com/jaytaylor/3660565
-        # by @jtaylor
-        return (
-            obj.str.replace(pat=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", regex=True)
-            .str.replace(pat=r"([a-z0-9])([A-Z])", repl=r"\1_\2", regex=True)
-            .str.lower()
-        )
-    if object_type == "polars":
-        if case_type == "preserve":
-            return obj
-        if case_type == "upper":
-            return obj.str.to_uppercase()
-        if case_type == "lower":
-            return obj.str.to_lowercase()
-        # Implementation taken from: https://gist.github.com/jaytaylor/3660565
-        # by @jtaylor
-        return (
-            obj.str.replace_all(
-                pattern=r"(.)([A-Z][a-z]+)", value=r"${1}_${2}", literal=False
-            )
-            .str.replace_all(
-                pattern=r"([a-z0-9])([A-Z])", value=r"${1}_${2}", literal=False
-            )
-            .str.to_lowercase()
-        )
-    if object_type == "string":
-        if case_type == "preserve":
-            return obj
-        if case_type == "upper":
-            return obj.upper()
-        if case_type == "lower":
-            return obj.lower()
-        # Implementation adapted from: https://gist.github.com/jaytaylor/3660565
-        # by @jtaylor
-        obj = re.sub(pattern=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", string=obj)
-        obj = re.sub(pattern=r"([a-z0-9])([A-Z])", repl=r"\1_\2", string=obj)
-        return obj.lower()
-
     if case_type == "preserve":
         return obj
     if case_type == "upper":
-        return [label.upper() for label in obj]
+        return obj.upper()
     if case_type == "lower":
-        return [label.lower() for label in obj]
+        return obj.lower()
     # Implementation adapted from: https://gist.github.com/jaytaylor/3660565
     # by @jtaylor
-    obj = [
-        re.sub(pattern=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", string=label)
-        for label in obj
-    ]
-    obj = [
-        re.sub(pattern=r"([a-z0-9])([A-Z])", repl=r"\1_\2", string=label)
-        for label in obj
-    ]
-    obj = [label.lower() for label in obj]
-    return obj
+    obj = re.sub(pattern=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", string=obj)
+    obj = re.sub(pattern=r"([a-z0-9])([A-Z])", repl=r"\1_\2", string=obj)
+    return obj.lower()
 
 
-def _normalize_1(
-    obj: Union[pd.Index, pd.Series, Expr, list, str], object_type: str
-) -> str:
+def _normalize_1(obj: str) -> str:
     """Perform normalization of labels in obj."""
     FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
-    if object_type == "pandas":
-        for search, replace in FIXES:
-            obj = obj.str.replace(pat=search, repl=replace, regex=True)
-    elif object_type == "polars":
-        for search, replace in FIXES:
-            obj = obj.str.replace_all(
-                pattern=search, value=replace, literal=False
-            )
-    elif object_type == "string":
-        for search, replace in FIXES:
-            obj = re.sub(pattern=search, repl=replace, string=obj)
-    else:
-        for search, replace in FIXES:
-            obj = [
-                re.sub(pattern=search, repl=replace, string=label)
-                for label in obj
-            ]
+    for search, replace in FIXES:
+        obj = re.sub(pattern=search, repl=replace, string=obj)
+
     return obj
 
 
 def _remove_special(
-    object_type: str,
-    obj: Union[pd.Index, pd.Series, Expr, list, str] = None,
+    obj: str,
 ) -> str:
     """Remove special characters from obj."""
-    if object_type == "pandas":
-        return obj.str.replace(
-            pat="[^A-Za-z_\\d]", repl="", regex=True
-        ).str.strip()
-    if object_type == "polars":
-        return obj.str.replace_all(
-            pattern="[^A-Za-z_\\d]", value="", literal=False
-        ).str.strip_chars()
-    elif object_type == "string":
-        obj = [item for item in obj if item.isalnum() or (item == "_")]
-        return "".join(obj)
-    out = []
-    for label in obj:
-        word = [item for item in label if item.isalnum() or (item == "_")]
-        word = "".join(word)
-        out.append(word)
-    return out
+    obj = [item for item in obj if item.isalnum() or (item == "_")]
+    return "".join(obj)
 
 
 def _strip_accents(
-    obj: Union[pd.Index, pd.Series, Expr, list, str],
-    object_type: str,
+    obj: str,
 ) -> str:
     """Remove accents from a label.
 
@@ -1282,250 +1191,30 @@ def _strip_accents(
 
     [so]: https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-strin
     """  # noqa: E501
-    if object_type == "pandas":
-        return obj.map(
-            lambda f: "".join(
-                [
-                    letter
-                    for letter in unicodedata.normalize("NFD", str(f))
-                    if not unicodedata.combining(letter)
-                ]
-            )
-        )
-    if object_type == "polars":
-        from polars import List, Utf8
-
-        return obj.map_elements(
-            lambda word: [
-                letter
-                for letter in unicodedata.normalize("NFD", word)
-                if not unicodedata.combining(letter)
-            ],
-            return_dtype=List(Utf8),
-        ).list.join("")
-    if object_type == "string":
-        obj = [
-            letter
-            for letter in unicodedata.normalize("NFD", obj)
-            if not unicodedata.combining(letter)
-        ]
-        return "".join(obj)
-    out = []
-    for label in obj:
-        word = [
-            letter
-            for letter in unicodedata.normalize("NFD", label)
-            if not unicodedata.combining(letter)
-        ]
-        word = "".join(word)
-        out.append(word)
-    return out
+
+    obj = [
+        letter
+        for letter in unicodedata.normalize("NFD", obj)
+        if not unicodedata.combining(letter)
+    ]
+    return "".join(obj)
 
 
 def _strip_underscores_func(
-    obj: Union[pd.Index, pd.Series, Expr, list, str],
-    object_type: str,
+    obj: str,
     strip_underscores: Union[str, bool] = None,
-) -> pd.DataFrame:
-    """Strip underscores."""
+) -> str:
+    """Strip underscores from obj."""
     underscore_options = {None, "left", "right", "both", "l", "r", True}
     if strip_underscores not in underscore_options:
         raise JanitorError(
             f"strip_underscores must be one of: {underscore_options}"
         )
-    if object_type == "pandas":
-        if strip_underscores in {"left", "l"}:
-            return obj.str.lstrip("_")
-        if strip_underscores in {"right", "r"}:
-            return obj.str.rstrip("_")
-        if strip_underscores in {True, "both"}:
-            return obj.str.strip("_")
-        return obj
-
-    if object_type == "polars":
-        if strip_underscores in {"left", "l"}:
-            return obj.str.strip_chars_start("_")
-        if strip_underscores in {"right", "r"}:
-            return obj.str.strip_chars_end("_")
-        if strip_underscores in {True, "both"}:
-            return obj.str.strip_chars("_")
-        return obj
-
-    if object_type == "string":
-        if strip_underscores in {"left", "l"}:
-            return obj.lstrip("_")
-        if strip_underscores in {"right", "r"}:
-            return obj.rstrip("_")
-        if strip_underscores in {True, "both"}:
-            return obj.strip("_")
-        return obj
 
     if strip_underscores in {"left", "l"}:
-        return [label.lstrip("_") for label in obj]
+        return obj.lstrip("_")
     if strip_underscores in {"right", "r"}:
-        return [label.rstrip("_") for label in obj]
+        return obj.rstrip("_")
     if strip_underscores in {True, "both"}:
-        return [label.strip("_") for label in obj]
-    return obj
-
-
-def make_clean_names(
-    obj: Union[pd.Index, pd.Series, Expr, list, str],
-    strip_underscores: Optional[Union[str, bool]] = None,
-    case_type: str = "lower",
-    remove_special: bool = False,
-    strip_accents: bool = False,
-    enforce_string: bool = False,
-    truncate_limit: int = None,
-    object_type: str = "pandas",
-) -> Union[pd.Index, pd.Series, Expr, list]:
-    """
-    Generic function to clean labels in an object.
-    It can be applied to a pandas Index/Series, a Polars Expression,
-    or a python string/list.
-    For pandas, there is a [`clean_names`][janitor.functions.clean_names.clean_names]
-    method, which is a wrapper around the `make_clean_names` function.
-    For polars, use this function via existing Polars functions. The examples below
-    show how you can use this within polars.
-
-    Examples:
-        >>> import polars as pl
-        >>> import janitor
-        >>> df = pl.DataFrame(
-        ...     {
-        ...         "Aloha": range(3),
-        ...         "Bell Chart": range(3),
-        ...         "Animals@#$%^": range(3)
-        ...     }
-        ... )
-        >>> df
-        shape: (3, 3)
-        ┌───────┬────────────┬──────────────┐
-        │ Aloha ┆ Bell Chart ┆ Animals@#$%^ │
-        │ ---   ┆ ---        ┆ ---          │
-        │ i64   ┆ i64        ┆ i64          │
-        ╞═══════╪════════════╪══════════════╡
-        │ 0     ┆ 0          ┆ 0            │
-        │ 1     ┆ 1          ┆ 1            │
-        │ 2     ┆ 2          ┆ 2            │
-        └───────┴────────────┴──────────────┘
-
-        Clean the column names,
-        via [rename](https://docs.pola.rs/py-polars/html/reference/dataframe/api/polars.DataFrame.rename.html#polars-dataframe-rename):
-        >>> df.rename(
-        ...     lambda objumn_name: make_clean_names(
-        ...         obj=objumn_name, remove_special=True, object_type="string"
-        ...     )
-        ... )
-        shape: (3, 3)
-        ┌───────┬────────────┬─────────┐
-        │ aloha ┆ bell_chart ┆ animals │
-        │ ---   ┆ ---        ┆ ---     │
-        │ i64   ┆ i64        ┆ i64     │
-        ╞═══════╪════════════╪═════════╡
-        │ 0     ┆ 0          ┆ 0       │
-        │ 1     ┆ 1          ┆ 1       │
-        │ 2     ┆ 2          ┆ 2       │
-        └───────┴────────────┴─────────┘
-
-        >>> df = pl.DataFrame({"raw": ["Abçdê fgí j"]})
-        >>> df
-        shape: (1, 1)
-        ┌─────────────┐
-        │ raw         │
-        │ ---         │
-        │ str         │
-        ╞═════════════╡
-        │ Abçdê fgí j │
-        └─────────────┘
-
-        Clean the column values,
-        via [with_columns](https://docs.pola.rs/py-polars/html/reference/dataframe/api/polars.DataFrame.with_columns.html#polars-dataframe-with-columns):
-        >>> df.with_columns(
-        ...    pl.col("raw").pipe(
-        ...        make_clean_names, object_type="polars", strip_accents=True
-        ...    )
-        ... )
-        shape: (1, 1)
-        ┌─────────────┐
-        │ raw         │
-        │ ---         │
-        │ str         │
-        ╞═════════════╡
-        │ abcde_fgi_j │
-        └─────────────┘
-
-        The `make_clean_names` function can also be applied to a python string or list:
-        >>> raw = ["Abçdê fgí j"]
-        >>> make_clean_names(raw, object_type='list', strip_accents=True)
-        ['abcde_fgi_j']
-        >>> raw = "Abçdê fgí j"
-        >>> make_clean_names(raw, object_type='string', strip_accents=True)
-        'abcde_fgi_j'
-
-    !!! info "New in version 0.28.0"
-
-    Args:
-        obj: The object to clean. It can be a pandas Index,
-            a pandas Series, a polars Expression, a python string,
-            or a python list.
-        strip_underscores: Removes the outer underscores from all
-            labels. Default None keeps outer underscores. Values can be
-            either 'left', 'right' or 'both' or the respective shorthand 'l',
-            'r' and True.
-        case_type: Whether to make the labels lower or uppercase.
-            Current case may be preserved with 'preserve',
-            while snake case conversion (from CamelCase or camelCase only)
-            can be turned on using "snake".
-            Default 'lower' makes all characters lowercase.
-        remove_special: Remove special characters from the labels.
-            Only letters, numbers and underscores are preserved.
-        strip_accents: Whether or not to remove accents from
-            the labels.
-        enforce_string: Whether or not to convert the labels to string.
-            Defaults to True, but can be turned off.
-        truncate_limit: Truncates formatted labels to
-            the specified length. Default None does not truncate.
-        object_type: The type of object to clean. It should be either `pandas`,
-            `polars`, a python `string`, or a python `list`.
-    Returns:
-        A pandas Index, pandas Series, polars Expression, a python string,
-        or a python list.
-    """  # noqa: E501
-    if enforce_string and (object_type == "pandas"):
-        if not (_is_str_or_cat(obj)):
-            obj = obj.astype(str)
-    elif enforce_string and (object_type == "list"):
-        obj = [str(label) for label in obj]
-    elif enforce_string and (object_type == "string"):
-        obj = str(obj)
-    elif enforce_string and (object_type == "polars"):
-        from polars import Utf8
-
-        obj = obj.cast(Utf8)
-    obj = _change_case(obj, case_type, object_type=object_type)
-    obj = _normalize_1(obj, object_type=object_type)
-    if remove_special:
-        obj = _remove_special(object_type=object_type, obj=obj)
-    if strip_accents:
-        obj = _strip_accents(obj=obj, object_type=object_type)
-    if object_type == "pandas":
-        obj = obj.str.replace(pat="_+", repl="_", regex=True)
-    elif object_type == "polars":
-        obj = obj.str.replace(pattern="_+", value="_", literal=False)
-    elif object_type == "string":
-        obj = re.sub(pattern="_+", repl="_", string=obj)
-    else:
-        obj = [re.sub(pattern="_+", repl="_", string=label) for label in obj]
-    obj = _strip_underscores_func(
-        obj, strip_underscores=strip_underscores, object_type=object_type
-    )
-    if truncate_limit and (object_type == "pandas"):
-        obj = obj.str[:truncate_limit]
-    elif truncate_limit and (object_type == "polars"):
-        obj = obj.str.slice(offset=0, length=truncate_limit)
-    elif truncate_limit and (object_type == "string"):
-        obj = obj[:truncate_limit]
-    elif truncate_limit:
-        obj = [label[:truncate_limit] for label in obj]
+        return obj.strip("_")
     return obj
diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
new file mode 100644
index 000000000..843002a5b
--- /dev/null
+++ b/janitor/polars/__init__.py
@@ -0,0 +1,131 @@
+from typing import Optional, Union
+
+from janitor.utils import import_message
+
+from .functions import _clean_names
+
+try:
+    import polars as pl
+except ImportError:
+    import_message(
+        submodule="polars",
+        package="polars",
+        conda_channel="conda-forge",
+        pip_install=True,
+    )
+
+
+@pl.api.register_dataframe_namespace("janitor")
+class Frame:
+    def __init__(self, df: pl.DataFrame) -> pl.DataFrame:
+        self._df = df
+
+    def clean_names(
+        self,
+        strip_underscores: Optional[Union[str, bool]] = None,
+        case_type: str = "lower",
+        remove_special: bool = False,
+        strip_accents: bool = False,
+        enforce_string: bool = False,
+        truncate_limit: int = None,
+    ) -> pl.DataFrame:
+        """
+        Clean the column names in a polars DataFrame.
+
+        Examples:
+            >>> import polars as pl
+            >>> import janitor.polars
+            >>> df = pl.DataFrame(
+            ...     {
+            ...         "Aloha": range(3),
+            ...         "Bell Chart": range(3),
+            ...         "Animals@#$%^": range(3)
+            ...     }
+            ... )
+            >>> df
+            shape: (3, 3)
+            ┌───────┬────────────┬──────────────┐
+            │ Aloha ┆ Bell Chart ┆ Animals@#$%^ │
+            │ ---   ┆ ---        ┆ ---          │
+            │ i64   ┆ i64        ┆ i64          │
+            ╞═══════╪════════════╪══════════════╡
+            │ 0     ┆ 0          ┆ 0            │
+            │ 1     ┆ 1          ┆ 1            │
+            │ 2     ┆ 2          ┆ 2            │
+            └───────┴────────────┴──────────────┘
+            >>> df.janitor.clean_names(remove_special=True)
+            shape: (3, 3)
+            ┌───────┬────────────┬─────────┐
+            │ aloha ┆ bell_chart ┆ animals │
+            │ ---   ┆ ---        ┆ ---     │
+            │ i64   ┆ i64        ┆ i64     │
+            ╞═══════╪════════════╪═════════╡
+            │ 0     ┆ 0          ┆ 0       │
+            │ 1     ┆ 1          ┆ 1       │
+            │ 2     ┆ 2          ┆ 2       │
+            └───────┴────────────┴─────────┘
+        """
+        return self._df.rename(
+            lambda col: _clean_names(
+                obj=col,
+                strip_accents=strip_accents,
+                strip_underscores=strip_underscores,
+                case_type=case_type,
+                remove_special=remove_special,
+                enforce_string=enforce_string,
+                truncate_limit=truncate_limit,
+            )
+        )
+
+
+@pl.api.register_expr_namespace("janitor")
+class PolarsExpr:
+    def __init__(self, expr: pl.Expr) -> pl.Expr:
+        self._expr = expr
+
+    def clean_names(
+        self,
+        strip_underscores: Optional[Union[str, bool]] = None,
+        case_type: str = "lower",
+        remove_special: bool = False,
+        strip_accents: bool = False,
+        enforce_string: bool = False,
+        truncate_limit: int = None,
+    ) -> pl.Expr:
+        """
+        Clean the labels in a polars Expression.
+
+        Examples:
+            >>> import polars as pl
+            >>> import janitor.polars
+            >>> df = pl.DataFrame({"raw": ["Abçdê fgí j"]})
+            >>> df
+            shape: (1, 1)
+            ┌─────────────┐
+            │ raw         │
+            │ ---         │
+            │ str         │
+            ╞═════════════╡
+            │ Abçdê fgí j │
+            └─────────────┘
+
+            Clean the column values:
+            >>> df.with_columns(pl.col("raw").janitor.clean_names(strip_accents=True))
+            shape: (1, 1)
+            ┌─────────────┐
+            │ raw         │
+            │ ---         │
+            │ str         │
+            ╞═════════════╡
+            │ abcde_fgi_j │
+            └─────────────┘
+        """
+        return _clean_names(
+            obj=self._expr,
+            strip_accents=strip_accents,
+            strip_underscores=strip_underscores,
+            case_type=case_type,
+            remove_special=remove_special,
+            enforce_string=enforce_string,
+            truncate_limit=truncate_limit,
+        )
diff --git a/janitor/polars/functions.py b/janitor/polars/functions.py
new file mode 100644
index 000000000..c180ccd85
--- /dev/null
+++ b/janitor/polars/functions.py
@@ -0,0 +1,160 @@
+"""General purpose data cleaning functions for pyspark."""
+
+import re
+import unicodedata
+from typing import Optional, Union
+
+from janitor.errors import JanitorError
+from janitor.functions.utils import (
+    _change_case,
+    _normalize_1,
+    _remove_special,
+    _strip_accents,
+    _strip_underscores_func,
+)
+from janitor.utils import import_message
+
+try:
+    import polars as pl
+except ImportError:
+    import_message(
+        submodule="polars",
+        package="polars",
+        conda_channel="conda-forge",
+        pip_install=True,
+    )
+
+
+def _change_case_expr(
+    obj: pl.Expr,
+    case_type: str,
+) -> pl.Expr:
+    """Change case of obj."""
+    case_types = {"preserve", "upper", "lower", "snake"}
+    case_type = case_type.lower()
+    if case_type not in case_types:
+        raise JanitorError(f"type must be one of: {case_types}")
+
+    if case_type == "preserve":
+        return obj
+    if case_type == "upper":
+        return obj.str.to_uppercase()
+    if case_type == "lower":
+        return obj.str.to_lowercase()
+    # Implementation taken from: https://gist.github.com/jaytaylor/3660565
+    # by @jtaylor
+    return (
+        obj.str.replace_all(
+            pattern=r"(.)([A-Z][a-z]+)", value=r"${1}_${2}", literal=False
+        )
+        .str.replace_all(
+            pattern=r"([a-z0-9])([A-Z])", value=r"${1}_${2}", literal=False
+        )
+        .str.to_lowercase()
+    )
+
+
+def _normalize_expr(obj: pl.Expr) -> pl.Expr:
+    """Perform normalization of labels in obj."""
+    FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
+    for search, replace in FIXES:
+        obj = obj.str.replace_all(pattern=search, value=replace, literal=False)
+    return obj
+
+
+def _remove_special_expr(
+    obj: pl.Expr,
+) -> pl.Expr:
+    """Remove special characters from obj."""
+    return obj.str.replace_all(
+        pattern="[^A-Za-z_\\d]", value="", literal=False
+    ).str.strip_chars()
+
+
+def _strip_accents_expr(
+    obj: pl.Expr,
+) -> pl.Expr:
+    """Remove accents from a label.
+
+    Inspired from [StackOverflow][so].
+
+    [so]: https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-strin
+    """  # noqa: E501
+    # TODO: possible implementation in Rust
+    # or use a pyarrow implementation?
+    # https://github.com/pola-rs/polars/issues/11455
+    return obj.map_elements(
+        lambda word: [
+            letter
+            for letter in unicodedata.normalize("NFD", word)
+            if not unicodedata.combining(letter)
+        ],
+        return_dtype=pl.List(pl.Utf8),
+    ).list.join("")
+
+
+def _strip_underscores_func_expr(
+    obj: pl.Expr,
+    strip_underscores: Union[str, bool] = None,
+) -> pl.Expr:
+    """Strip underscores from obj."""
+    underscore_options = {None, "left", "right", "both", "l", "r", True}
+    if strip_underscores not in underscore_options:
+        raise JanitorError(
+            f"strip_underscores must be one of: {underscore_options}"
+        )
+    if strip_underscores in {"left", "l"}:
+        return obj.str.strip_chars_start("_")
+    if strip_underscores in {"right", "r"}:
+        return obj.str.strip_chars_end("_")
+    if strip_underscores in {True, "both"}:
+        return obj.str.strip_chars("_")
+    return obj
+
+
+def _clean_names(
+    obj: Union[str, pl.Expr],
+    strip_underscores: Optional[Union[str, bool]] = None,
+    case_type: str = "lower",
+    remove_special: bool = False,
+    strip_accents: bool = False,
+    enforce_string: bool = False,
+    truncate_limit: int = None,
+) -> str:
+    """
+    Generic function to clean labels.
+    Applies either to the columns of a polars DataFrame,
+    or a polars Expression.
+    """
+    if isinstance(obj, str):
+        if enforce_string:
+            obj = str(obj)
+        obj = _change_case(obj=obj, case_type=case_type)
+        obj = _normalize_1(obj=obj)
+        if remove_special:
+            obj = _remove_special(obj=obj)
+        if strip_accents:
+            obj = _strip_accents(obj=obj)
+        obj = re.sub(pattern="_+", repl="_", string=obj)
+        obj = _strip_underscores_func(
+            obj,
+            strip_underscores=strip_underscores,
+        )
+        obj = obj[:truncate_limit]
+        return obj
+    if enforce_string:
+        obj = obj.cast(pl.Utf8)
+    obj = _change_case_expr(obj=obj, case_type=case_type)
+    obj = _normalize_expr(obj=obj)
+    if remove_special:
+        obj = _remove_special_expr(obj=obj)
+    if strip_accents:
+        obj = _strip_accents_expr(obj=obj)
+    obj = obj.str.replace(pattern="_+", value="_", literal=False)
+    obj = _strip_underscores_func_expr(
+        obj,
+        strip_underscores=strip_underscores,
+    )
+    if truncate_limit:
+        obj = obj.str.slice(offset=0, length=truncate_limit)
+    return obj
diff --git a/tests/functions/test_clean_names_polars.py b/tests/polars/functions/test_clean_names.py
similarity index 66%
rename from tests/functions/test_clean_names_polars.py
rename to tests/polars/functions/test_clean_names.py
index 56d0b8e95..5ed77c8e8 100644
--- a/tests/functions/test_clean_names_polars.py
+++ b/tests/polars/functions/test_clean_names.py
@@ -1,14 +1,12 @@
 import polars as pl
 import pytest
 
-from janitor import make_clean_names  # noqa: E402
-
 
 @pytest.mark.functions
 def test_clean_names_method_chain(dataframe):
     """Tests clean_names default args in a method chain."""
     df = pl.from_pandas(dataframe)
-    df = df.rename(lambda col: make_clean_names(col, object_type="string"))
+    df = df.janitor.clean_names()
     expected_columns = [
         "a",
         "bell_chart",
@@ -23,11 +21,7 @@ def test_clean_names_method_chain(dataframe):
 def test_clean_names_special_characters(dataframe):
     """Tests clean_names `remove_special` parameter."""
     df = pl.from_pandas(dataframe)
-    df = df.rename(
-        lambda col: make_clean_names(
-            col, object_type="string", remove_special=True
-        )
-    )
+    df = df.janitor.clean_names(remove_special=True)
     expected_columns = [
         "a",
         "bell_chart",
@@ -42,11 +36,7 @@ def test_clean_names_special_characters(dataframe):
 def test_clean_names_uppercase(dataframe):
     """Tests clean_names `case_type` parameter = upper."""
     df = pl.from_pandas(dataframe)
-    df = df.rename(
-        lambda col: make_clean_names(
-            col, object_type="string", remove_special=True, case_type="upper"
-        )
-    )
+    df = df.janitor.clean_names(remove_special=True, case_type="upper")
     expected_columns = [
         "A",
         "BELL_CHART",
@@ -61,11 +51,7 @@ def test_clean_names_uppercase(dataframe):
 def test_clean_names_strip_accents():
     """Tests clean_names `strip_accents` parameter."""
     df = pl.DataFrame({"João": [1, 2], "Лука́ся": [1, 2], "Käfer": [1, 2]})
-    df = df.rename(
-        lambda col: make_clean_names(
-            col, object_type="string", strip_accents=True
-        )
-    )
+    df = df.janitor.clean_names(strip_accents=True)
     expected_columns = ["joao", "лукася", "kafer"]
     assert df.columns == expected_columns
 
@@ -77,14 +63,7 @@ def test_clean_names_camelcase_to_snake(dataframe):
     df = (
         df.select("a")
         .rename({"a": "AColumnName"})
-        .rename(
-            lambda col: make_clean_names(
-                col,
-                object_type="string",
-                remove_special=True,
-                case_type="snake",
-            )
-        )
+        .janitor.clean_names(remove_special=True, case_type="snake")
     )
     assert df.columns == ["a_column_name"]
 
@@ -93,12 +72,7 @@ def test_clean_names_camelcase_to_snake(dataframe):
 def test_clean_names_truncate_limit(dataframe):
     """Tests clean_names `truncate_limit` parameter."""
     df = pl.from_pandas(dataframe)
-    df = df.rename(
-        lambda col: make_clean_names(
-            col, object_type="string", truncate_limit=7
-        )
-    )
-    # df = dataframe.clean_names(truncate_limit=7)
+    df = df.janitor.clean_names(truncate_limit=7)
     expected_columns = ["a", "bell_ch", "decorat", "animals", "cities"]
     assert df.columns == expected_columns
 
@@ -112,14 +86,7 @@ def test_charac():
             r"Current accountbalance(in % of GDP)": range(5),
         }
     )
-    df = df.rename(
-        lambda col: make_clean_names(
-            col,
-            object_type="string",
-            strip_underscores=True,
-            case_type="lower",
-        )
-    )
+    df = df.janitor.clean_names(strip_underscores=True, case_type="lower")
 
     assert "current_accountbalance_in_%_of_gdp" in df.columns
 
@@ -128,8 +95,6 @@ def test_clean_column_values():
     """Clean column values"""
     raw = pl.DataFrame({"raw": ["Abçdê fgí j"]})
     outcome = raw.with_columns(
-        pl.col("raw").pipe(
-            make_clean_names, object_type="polars", strip_accents=True
-        )
+        pl.col("raw").janitor.clean_names(strip_accents=True)
     )
     assert list(outcome)[0][0] == "abcde_fgi_j"

From 799532f7a5592498e5c8ce5a535d3f3083f6fec9 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 21 Apr 2024 20:54:05 +1000
Subject: [PATCH 20/46] fix doctests

---
 janitor/functions/utils.py                    |  4 +-
 janitor/polars/__init__.py                    |  6 +-
 janitor/polars/functions.py                   | 61 +++++++++++--------
 ...an_names.py => test_clean_names_polars.py} |  0
 4 files changed, 41 insertions(+), 30 deletions(-)
 rename tests/polars/functions/{test_clean_names.py => test_clean_names_polars.py} (100%)

diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 4bf0d0eea..4e1f443ee 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -1166,7 +1166,7 @@ def _change_case(
 
 
 def _normalize_1(obj: str) -> str:
-    """Perform normalization of labels in obj."""
+    """Perform normalization of obj."""
     FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
     for search, replace in FIXES:
         obj = re.sub(pattern=search, repl=replace, string=obj)
@@ -1185,7 +1185,7 @@ def _remove_special(
 def _strip_accents(
     obj: str,
 ) -> str:
-    """Remove accents from a label.
+    """Remove accents from obj.
 
     Inspired from [StackOverflow][so].
 
diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index 843002a5b..d8c8bf3df 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -2,7 +2,7 @@
 
 from janitor.utils import import_message
 
-from .functions import _clean_names
+from .functions import _clean_column_names, _clean_expr_names
 
 try:
     import polars as pl
@@ -66,7 +66,7 @@ def clean_names(
             └───────┴────────────┴─────────┘
         """
         return self._df.rename(
-            lambda col: _clean_names(
+            lambda col: _clean_column_names(
                 obj=col,
                 strip_accents=strip_accents,
                 strip_underscores=strip_underscores,
@@ -120,7 +120,7 @@ def clean_names(
             │ abcde_fgi_j │
             └─────────────┘
         """
-        return _clean_names(
+        return _clean_expr_names(
             obj=self._expr,
             strip_accents=strip_accents,
             strip_underscores=strip_underscores,
diff --git a/janitor/polars/functions.py b/janitor/polars/functions.py
index c180ccd85..4322fe79c 100644
--- a/janitor/polars/functions.py
+++ b/janitor/polars/functions.py
@@ -1,4 +1,4 @@
-"""General purpose data cleaning functions for pyspark."""
+"""functions for polars."""
 
 import re
 import unicodedata
@@ -29,7 +29,7 @@ def _change_case_expr(
     obj: pl.Expr,
     case_type: str,
 ) -> pl.Expr:
-    """Change case of obj."""
+    """Change case of labels in obj."""
     case_types = {"preserve", "upper", "lower", "snake"}
     case_type = case_type.lower()
     if case_type not in case_types:
@@ -65,7 +65,7 @@ def _normalize_expr(obj: pl.Expr) -> pl.Expr:
 def _remove_special_expr(
     obj: pl.Expr,
 ) -> pl.Expr:
-    """Remove special characters from obj."""
+    """Remove special characters from the labels in obj."""
     return obj.str.replace_all(
         pattern="[^A-Za-z_\\d]", value="", literal=False
     ).str.strip_chars()
@@ -74,7 +74,7 @@ def _remove_special_expr(
 def _strip_accents_expr(
     obj: pl.Expr,
 ) -> pl.Expr:
-    """Remove accents from a label.
+    """Remove accents from the labels in obj.
 
     Inspired from [StackOverflow][so].
 
@@ -112,8 +112,8 @@ def _strip_underscores_func_expr(
     return obj
 
 
-def _clean_names(
-    obj: Union[str, pl.Expr],
+def _clean_column_names(
+    obj: str,
     strip_underscores: Optional[Union[str, bool]] = None,
     case_type: str = "lower",
     remove_special: bool = False,
@@ -122,26 +122,37 @@ def _clean_names(
     truncate_limit: int = None,
 ) -> str:
     """
-    Generic function to clean labels.
-    Applies either to the columns of a polars DataFrame,
-    or a polars Expression.
+    Function to clean the column names of a polars DataFrame.
+    """
+    if enforce_string:
+        obj = str(obj)
+    obj = _change_case(obj=obj, case_type=case_type)
+    obj = _normalize_1(obj=obj)
+    if remove_special:
+        obj = _remove_special(obj=obj)
+    if strip_accents:
+        obj = _strip_accents(obj=obj)
+    obj = re.sub(pattern="_+", repl="_", string=obj)
+    obj = _strip_underscores_func(
+        obj,
+        strip_underscores=strip_underscores,
+    )
+    obj = obj[:truncate_limit]
+    return obj
+
+
+def _clean_expr_names(
+    obj: pl.Expr,
+    strip_underscores: Optional[Union[str, bool]] = None,
+    case_type: str = "lower",
+    remove_special: bool = False,
+    strip_accents: bool = False,
+    enforce_string: bool = False,
+    truncate_limit: int = None,
+) -> pl.Expr:
+    """
+    Function to clean the labels of a polars Expression.
     """
-    if isinstance(obj, str):
-        if enforce_string:
-            obj = str(obj)
-        obj = _change_case(obj=obj, case_type=case_type)
-        obj = _normalize_1(obj=obj)
-        if remove_special:
-            obj = _remove_special(obj=obj)
-        if strip_accents:
-            obj = _strip_accents(obj=obj)
-        obj = re.sub(pattern="_+", repl="_", string=obj)
-        obj = _strip_underscores_func(
-            obj,
-            strip_underscores=strip_underscores,
-        )
-        obj = obj[:truncate_limit]
-        return obj
     if enforce_string:
         obj = obj.cast(pl.Utf8)
     obj = _change_case_expr(obj=obj, case_type=case_type)
diff --git a/tests/polars/functions/test_clean_names.py b/tests/polars/functions/test_clean_names_polars.py
similarity index 100%
rename from tests/polars/functions/test_clean_names.py
rename to tests/polars/functions/test_clean_names_polars.py

From dbce4b934fcb2a0e82899358c11d03700a0694b5 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 21 Apr 2024 21:16:32 +1000
Subject: [PATCH 21/46] fix tests; add polars to documentation

---
 janitor/polars/__init__.py | 2 +-
 mkdocs.yml                 | 1 +
 mkdocs/api/polars.md       | 3 +++
 pyproject.toml             | 2 +-
 4 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 mkdocs/api/polars.md

diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index d8c8bf3df..6aa5fcc64 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -16,7 +16,7 @@
 
 
 @pl.api.register_dataframe_namespace("janitor")
-class Frame:
+class PolarsFrame:
     def __init__(self, df: pl.DataFrame) -> pl.DataFrame:
         self._df = df
 
diff --git a/mkdocs.yml b/mkdocs.yml
index 639d71bea..a7545afc5 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -45,6 +45,7 @@ nav:
       - Machine Learning: api/ml.md
       - Math: api/math.md
       # - PySpark: api/pyspark.md  # will be added back later
+      - Polars: api/polars.md
       - Timeseries: api/timeseries.md
       - XArray: api/xarray.md
   - Development Guide: devguide.md
diff --git a/mkdocs/api/polars.md b/mkdocs/api/polars.md
new file mode 100644
index 000000000..db5b5d14f
--- /dev/null
+++ b/mkdocs/api/polars.md
@@ -0,0 +1,3 @@
+# Polars
+
+::: janitor.polars
diff --git a/pyproject.toml b/pyproject.toml
index f6b98f54b..52dc3f172 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 55
+fail-under = 10
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 1c642e6ba49ab1ea69b3145f6f492909f1876496 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 21 Apr 2024 21:16:40 +1000
Subject: [PATCH 22/46] fix tests; add polars to documentation

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 52dc3f172..f6b98f54b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 10
+fail-under = 55
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 407d21b90314bd51cd16537fb6197e99b36fa7ce Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 21 Apr 2024 21:23:45 +1000
Subject: [PATCH 23/46] import janitor.polars

---
 pyproject.toml                                    | 1 +
 tests/polars/functions/test_clean_names_polars.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index f6b98f54b..0a697589f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,6 +39,7 @@ markers = [
     "utils: utility tests",
     "engineering: tests for engineering",
     "ml: tests for machine learning",
+    "polars: tests for polars methods",
     "spark_functions: tests for pyspark functions",
     "xarray: tests for xarray functions",
     "timeseries: tests for timeseries",
diff --git a/tests/polars/functions/test_clean_names_polars.py b/tests/polars/functions/test_clean_names_polars.py
index 5ed77c8e8..23ce38742 100644
--- a/tests/polars/functions/test_clean_names_polars.py
+++ b/tests/polars/functions/test_clean_names_polars.py
@@ -1,6 +1,8 @@
 import polars as pl
 import pytest
 
+from janitor import polars  # noqa: F401
+
 
 @pytest.mark.functions
 def test_clean_names_method_chain(dataframe):

From aedfc65c7e23fba3dd967fd7ac29ed1c95f6d52d Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 21 Apr 2024 21:49:44 +1000
Subject: [PATCH 24/46] control docs output for polars submodule

---
 mkdocs/api/polars.md | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/mkdocs/api/polars.md b/mkdocs/api/polars.md
index db5b5d14f..905d9ed56 100644
--- a/mkdocs/api/polars.md
+++ b/mkdocs/api/polars.md
@@ -1,3 +1,19 @@
-# Polars
+# PolarsExpr
 
-::: janitor.polars
+::: janitor.polars.PolarsExpr
+    handler: python
+    options:
+      members:
+        - clean_names
+      show_root_heading: false
+      show_source: true
+
+# PolarsFrame
+
+::: janitor.polars.PolarsFrame
+    handler: python
+    options:
+      members:
+        - clean_names
+      show_root_heading: false
+      show_source: true

From db9b48649f7c89dcb2ea85b32769320b4e433f12 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 21 Apr 2024 21:58:47 +1000
Subject: [PATCH 25/46] exclude functions in docs rendering

---
 mkdocs/api/polars.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mkdocs/api/polars.md b/mkdocs/api/polars.md
index 905d9ed56..4a6c92f09 100644
--- a/mkdocs/api/polars.md
+++ b/mkdocs/api/polars.md
@@ -7,6 +7,7 @@
         - clean_names
       show_root_heading: false
       show_source: true
+      show_submodules: true
 
 # PolarsFrame
 
@@ -17,3 +18,4 @@
         - clean_names
       show_root_heading: false
       show_source: true
+      show_submodules: true

From 6a91e673bbc6c274bb8c8aa7c7811272f11196ae Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 21 Apr 2024 21:59:29 +1000
Subject: [PATCH 26/46] exclude functions in docs rendering

---
 mkdocs/api/polars.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mkdocs/api/polars.md b/mkdocs/api/polars.md
index 4a6c92f09..e6dba459d 100644
--- a/mkdocs/api/polars.md
+++ b/mkdocs/api/polars.md
@@ -7,7 +7,7 @@
         - clean_names
       show_root_heading: false
       show_source: true
-      show_submodules: true
+      show_submodules: false
 
 # PolarsFrame
 
@@ -18,4 +18,4 @@
         - clean_names
       show_root_heading: false
       show_source: true
-      show_submodules: true
+      show_submodules: false

From 7a8807855bb13ec7663bb69004e57de8ba941f91 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 21 Apr 2024 22:03:42 +1000
Subject: [PATCH 27/46] show_submodules=true

---
 mkdocs/api/polars.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mkdocs/api/polars.md b/mkdocs/api/polars.md
index e6dba459d..905d9ed56 100644
--- a/mkdocs/api/polars.md
+++ b/mkdocs/api/polars.md
@@ -7,7 +7,6 @@
         - clean_names
       show_root_heading: false
       show_source: true
-      show_submodules: false
 
 # PolarsFrame
 
@@ -18,4 +17,3 @@
         - clean_names
       show_root_heading: false
       show_source: true
-      show_submodules: false

From 6d7885e9952e5c4395e34fc3eb0a027b5fbd4665 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 21 Apr 2024 22:45:31 +1000
Subject: [PATCH 28/46] fix docstring rendering for polars

---
 janitor/polars/__init__.py  | 49 ++++++++++++++++++++++++++++++++++---
 janitor/polars/functions.py |  3 ---
 mkdocs/api/polars.md        | 20 +++------------
 3 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index 6aa5fcc64..9d2ca41de 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -26,7 +26,6 @@ def clean_names(
         case_type: str = "lower",
         remove_special: bool = False,
         strip_accents: bool = False,
-        enforce_string: bool = False,
         truncate_limit: int = None,
     ) -> pl.DataFrame:
         """
@@ -64,7 +63,28 @@ def clean_names(
             │ 1     ┆ 1          ┆ 1       │
             │ 2     ┆ 2          ┆ 2       │
             └───────┴────────────┴─────────┘
-        """
+
+        !!! info "New in version 0.28.0"
+
+        Args:
+            strip_underscores: Removes the outer underscores from all
+                column names. Default None keeps outer underscores. Values can be
+                either 'left', 'right' or 'both' or the respective shorthand 'l',
+                'r' and True.
+            case_type: Whether to make the column names lower or uppercase.
+                Current case may be preserved with 'preserve',
+                while snake case conversion (from CamelCase or camelCase only)
+                can be turned on using "snake".
+                Default 'lower' makes all characters lowercase.
+            remove_special: Remove special characters from the column names.
+                Only letters, numbers and underscores are preserved.
+            strip_accents: Whether or not to remove accents from
+                the labels.
+            truncate_limit: Truncates formatted column names to
+                the specified length. Default None does not truncate.
+        Returns:
+            A polars DataFrame.
+        """  # noqa: E501
         return self._df.rename(
             lambda col: _clean_column_names(
                 obj=col,
@@ -72,7 +92,6 @@ def clean_names(
                 strip_underscores=strip_underscores,
                 case_type=case_type,
                 remove_special=remove_special,
-                enforce_string=enforce_string,
                 truncate_limit=truncate_limit,
             )
         )
@@ -119,6 +138,30 @@ def clean_names(
             ╞═════════════╡
             │ abcde_fgi_j │
             └─────────────┘
+
+        !!! info "New in version 0.28.0"
+
+        Args:
+            strip_underscores: Removes the outer underscores
+                from all labels in the Expression.
+                Default None keeps outer underscores.
+                Values can be either 'left', 'right'
+                or 'both' or the respective shorthand 'l',
+                'r' and True.
+            case_type: Whether to make the labels in the expression lower or uppercase.
+                Current case may be preserved with 'preserve',
+                while snake case conversion (from CamelCase or camelCase only)
+                can be turned on using "snake".
+                Default 'lower' makes all characters lowercase.
+            remove_special: Remove special characters from the values in the expression.
+                Only letters, numbers and underscores are preserved.
+            strip_accents: Whether or not to remove accents from
+                the expression.
+            enforce_string: Whether or not to cast the expression to a string type.
+            truncate_limit: Truncates formatted labels in the expression to
+                the specified length. Default None does not truncate.
+        Returns:
+            A polars Expression.
         """
         return _clean_expr_names(
             obj=self._expr,
diff --git a/janitor/polars/functions.py b/janitor/polars/functions.py
index 4322fe79c..31e6106d5 100644
--- a/janitor/polars/functions.py
+++ b/janitor/polars/functions.py
@@ -118,14 +118,11 @@ def _clean_column_names(
     case_type: str = "lower",
     remove_special: bool = False,
     strip_accents: bool = False,
-    enforce_string: bool = False,
     truncate_limit: int = None,
 ) -> str:
     """
     Function to clean the column names of a polars DataFrame.
     """
-    if enforce_string:
-        obj = str(obj)
     obj = _change_case(obj=obj, case_type=case_type)
     obj = _normalize_1(obj=obj)
     if remove_special:
diff --git a/mkdocs/api/polars.md b/mkdocs/api/polars.md
index 905d9ed56..a2cbd574c 100644
--- a/mkdocs/api/polars.md
+++ b/mkdocs/api/polars.md
@@ -1,19 +1,7 @@
-# PolarsExpr
+# Polars
 
-::: janitor.polars.PolarsExpr
-    handler: python
+::: janitor.polars
     options:
       members:
-        - clean_names
-      show_root_heading: false
-      show_source: true
-
-# PolarsFrame
-
-::: janitor.polars.PolarsFrame
-    handler: python
-    options:
-      members:
-        - clean_names
-      show_root_heading: false
-      show_source: true
+      - PolarsExpr
+      - PolarsFrame

From 944fa0215a8c9a7bb3c50b78ea96627234bb8271 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Sun, 21 Apr 2024 23:05:03 +1000
Subject: [PATCH 29/46] Expression -> expression

---
 janitor/functions/clean_names.py | 2 +-
 janitor/polars/__init__.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/janitor/functions/clean_names.py b/janitor/functions/clean_names.py
index 7eb2a7538..a38753fa8 100644
--- a/janitor/functions/clean_names.py
+++ b/janitor/functions/clean_names.py
@@ -181,7 +181,7 @@ def _clean_names(
     """
     Generic function to clean labels in a pandas object.
     """
-    if enforce_string and not (_is_str_or_cat(obj)):
+    if enforce_string and not _is_str_or_cat(obj):
         obj = obj.astype(str)
     obj = _change_case(obj=obj, case_type=case_type)
     obj = _normalize_1(obj=obj)
diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index 9d2ca41de..5637575f5 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -143,7 +143,7 @@ def clean_names(
 
         Args:
             strip_underscores: Removes the outer underscores
-                from all labels in the Expression.
+                from all labels in the expression.
                 Default None keeps outer underscores.
                 Values can be either 'left', 'right'
                 or 'both' or the respective shorthand 'l',

From e9c370a50face684f2eb24db857090ff1f72deda Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 23 Apr 2024 13:31:54 +1000
Subject: [PATCH 30/46] rename functions.py

---
 janitor/polars/__init__.py                      | 2 +-
 janitor/polars/{functions.py => clean_names.py} | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename janitor/polars/{functions.py => clean_names.py} (99%)

diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index 5637575f5..449651504 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -2,7 +2,7 @@
 
 from janitor.utils import import_message
 
-from .functions import _clean_column_names, _clean_expr_names
+from .clean_names import _clean_column_names, _clean_expr_names
 
 try:
     import polars as pl
diff --git a/janitor/polars/functions.py b/janitor/polars/clean_names.py
similarity index 99%
rename from janitor/polars/functions.py
rename to janitor/polars/clean_names.py
index 31e6106d5..3226c9d33 100644
--- a/janitor/polars/functions.py
+++ b/janitor/polars/clean_names.py
@@ -1,4 +1,4 @@
-"""functions for polars."""
+"""clean_names implementation for polars."""
 
 import re
 import unicodedata

From ee66d2ae7659f7be6e80668fc0aa3264afbac6eb Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Mon, 29 Apr 2024 22:56:46 +1000
Subject: [PATCH 31/46] pivot_longer implemented for polars

---
 janitor/functions/pivot.py                    |  14 +-
 janitor/polars/__init__.py                    | 433 ++++++---
 janitor/polars/clean_names.py                 | 168 ----
 janitor/polars/pivot_longer.py                | 669 +++++++++++++
 mkdocs/api/polars.md                          |   1 -
 .../functions/test_clean_names_polars.py      | 102 --
 .../functions/test_pivot_longer_polars.py     | 913 ++++++++++++++++++
 7 files changed, 1887 insertions(+), 413 deletions(-)
 delete mode 100644 janitor/polars/clean_names.py
 create mode 100644 janitor/polars/pivot_longer.py
 delete mode 100644 tests/polars/functions/test_clean_names_polars.py
 create mode 100644 tests/polars/functions/test_pivot_longer_polars.py

diff --git a/janitor/functions/pivot.py b/janitor/functions/pivot.py
index 7efeba45b..51bc78419 100644
--- a/janitor/functions/pivot.py
+++ b/janitor/functions/pivot.py
@@ -98,7 +98,7 @@ def pivot_longer(
         6     setosa   Petal.Width    0.2
         7  virginica   Petal.Width    1.8
 
-        Split the column labels into parts:
+        Split the column labels into individual columns:
         >>> df.pivot_longer(
         ...     index = 'Species',
         ...     names_to = ('part', 'dimension'),
@@ -167,7 +167,7 @@ def pivot_longer(
         value        int64
         dtype: object
 
-        Use multiple `.value` to reshape dataframe:
+        Use multiple `.value` to reshape the dataframe:
         >>> df = pd.DataFrame(
         ...     [
         ...         {
@@ -265,16 +265,6 @@ def pivot_longer(
         ...             "Gin": [16, 200, 34],
         ...             "Vodka": [20, 33, 18],
         ...         },
-        ...         columns=[
-        ...             "City",
-        ...             "State",
-        ...             "Name",
-        ...             "Mango",
-        ...             "Orange",
-        ...             "Watermelon",
-        ...             "Gin",
-        ...             "Vodka",
-        ...         ],
         ...     )
         >>> df
               City    State      Name  Mango  Orange  Watermelon  Gin  Vodka
diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index 449651504..59b15ff72 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -1,8 +1,10 @@
-from typing import Optional, Union
+from typing import Any, Optional, Sequence, Union
+
+from polars.type_aliases import ColumnNameOrSelector
 
 from janitor.utils import import_message
 
-from .clean_names import _clean_column_names, _clean_expr_names
+from .pivot_longer import _pivot_longer
 
 try:
     import polars as pl
@@ -20,155 +22,326 @@ class PolarsFrame:
     def __init__(self, df: pl.DataFrame) -> pl.DataFrame:
         self._df = df
 
-    def clean_names(
+    def pivot_longer(
         self,
-        strip_underscores: Optional[Union[str, bool]] = None,
-        case_type: str = "lower",
-        remove_special: bool = False,
-        strip_accents: bool = False,
-        truncate_limit: int = None,
+        index: Union[
+            ColumnNameOrSelector, Sequence[ColumnNameOrSelector], None
+        ] = None,
+        column_names: Union[
+            ColumnNameOrSelector, Sequence[ColumnNameOrSelector], None
+        ] = None,
+        names_to: Optional[Union[list, tuple, str]] = "variable",
+        values_to: Optional[Union[list, tuple, str]] = "value",
+        names_sep: Optional[Union[str, None]] = None,
+        names_pattern: Optional[Union[list, tuple, str, None]] = None,
+        names_transform: Optional[Any] = pl.Utf8,
     ) -> pl.DataFrame:
         """
-        Clean the column names in a polars DataFrame.
+        Unpivots a DataFrame from *wide* to *long* format.
+
+        It is modeled after the `pivot_longer` function in R's tidyr package,
+        and also takes inspiration from the `melt` function in R's data.table package.
+
+        This function is useful to massage a DataFrame into a format where
+        one or more columns are considered measured variables, and all other
+        columns are considered as identifier variables.
+
+        All measured variables are *unpivoted* (and typically duplicated) along the
+        row axis.
 
         Examples:
             >>> import polars as pl
+            >>> import polars.selectors as cs
             >>> import janitor.polars
-            >>> df = pl.DataFrame(
+            >>> df = pd.DataFrame(
             ...     {
-            ...         "Aloha": range(3),
-            ...         "Bell Chart": range(3),
-            ...         "Animals@#$%^": range(3)
+            ...         "Sepal.Length": [5.1, 5.9],
+            ...         "Sepal.Width": [3.5, 3.0],
+            ...         "Petal.Length": [1.4, 5.1],
+            ...         "Petal.Width": [0.2, 1.8],
+            ...         "Species": ["setosa", "virginica"],
             ...     }
             ... )
             >>> df
-            shape: (3, 3)
-            ┌───────┬────────────┬──────────────┐
-            │ Aloha ┆ Bell Chart ┆ Animals@#$%^ │
-            │ ---   ┆ ---        ┆ ---          │
-            │ i64   ┆ i64        ┆ i64          │
-            ╞═══════╪════════════╪══════════════╡
-            │ 0     ┆ 0          ┆ 0            │
-            │ 1     ┆ 1          ┆ 1            │
-            │ 2     ┆ 2          ┆ 2            │
-            └───────┴────────────┴──────────────┘
-            >>> df.janitor.clean_names(remove_special=True)
-            shape: (3, 3)
-            ┌───────┬────────────┬─────────┐
-            │ aloha ┆ bell_chart ┆ animals │
-            │ ---   ┆ ---        ┆ ---     │
-            │ i64   ┆ i64        ┆ i64     │
-            ╞═══════╪════════════╪═════════╡
-            │ 0     ┆ 0          ┆ 0       │
-            │ 1     ┆ 1          ┆ 1       │
-            │ 2     ┆ 2          ┆ 2       │
-            └───────┴────────────┴─────────┘
+            shape: (2, 5)
+            ┌──────────────┬─────────────┬──────────────┬─────────────┬───────────┐
+            │ Sepal.Length ┆ Sepal.Width ┆ Petal.Length ┆ Petal.Width ┆ Species   │
+            │ ---          ┆ ---         ┆ ---          ┆ ---         ┆ ---       │
+            │ f64          ┆ f64         ┆ f64          ┆ f64         ┆ str       │
+            ╞══════════════╪═════════════╪══════════════╪═════════════╪═══════════╡
+            │ 5.1          ┆ 3.5         ┆ 1.4          ┆ 0.2         ┆ setosa    │
+            │ 5.9          ┆ 3.0         ┆ 5.1          ┆ 1.8         ┆ virginica │
+            └──────────────┴─────────────┴──────────────┴─────────────┴───────────┘
 
-        !!! info "New in version 0.28.0"
+            Replicate polars' [melt](https://docs.pola.rs/py-polars/html/reference/dataframe/api/polars.DataFrame.melt.html#polars-dataframe-melt):
+            >>> df.janitor.pivot_longer(index = 'Species')
+            shape: (8, 3)
+            ┌───────────┬──────────────┬───────┐
+            │ Species   ┆ variable     ┆ value │
+            │ ---       ┆ ---          ┆ ---   │
+            │ str       ┆ str          ┆ f64   │
+            ╞═══════════╪══════════════╪═══════╡
+            │ setosa    ┆ Sepal.Length ┆ 5.1   │
+            │ virginica ┆ Sepal.Length ┆ 5.9   │
+            │ setosa    ┆ Sepal.Width  ┆ 3.5   │
+            │ virginica ┆ Sepal.Width  ┆ 3.0   │
+            │ setosa    ┆ Petal.Length ┆ 1.4   │
+            │ virginica ┆ Petal.Length ┆ 5.1   │
+            │ setosa    ┆ Petal.Width  ┆ 0.2   │
+            │ virginica ┆ Petal.Width  ┆ 1.8   │
+            └───────────┴──────────────┴───────┘
 
-        Args:
-            strip_underscores: Removes the outer underscores from all
-                column names. Default None keeps outer underscores. Values can be
-                either 'left', 'right' or 'both' or the respective shorthand 'l',
-                'r' and True.
-            case_type: Whether to make the column names lower or uppercase.
-                Current case may be preserved with 'preserve',
-                while snake case conversion (from CamelCase or camelCase only)
-                can be turned on using "snake".
-                Default 'lower' makes all characters lowercase.
-            remove_special: Remove special characters from the column names.
-                Only letters, numbers and underscores are preserved.
-            strip_accents: Whether or not to remove accents from
-                the labels.
-            truncate_limit: Truncates formatted column names to
-                the specified length. Default None does not truncate.
-        Returns:
-            A polars DataFrame.
-        """  # noqa: E501
-        return self._df.rename(
-            lambda col: _clean_column_names(
-                obj=col,
-                strip_accents=strip_accents,
-                strip_underscores=strip_underscores,
-                case_type=case_type,
-                remove_special=remove_special,
-                truncate_limit=truncate_limit,
-            )
-        )
+            Split the column labels into individual columns:
+            >>> df.janitor.pivot_longer(
+            ...     index = 'Species',
+            ...     names_to = ('part', 'dimension'),
+            ...     names_sep = '.',
+            ... )
+            shape: (8, 4)
+            ┌───────────┬───────┬───────────┬───────┐
+            │ Species   ┆ part  ┆ dimension ┆ value │
+            │ ---       ┆ ---   ┆ ---       ┆ ---   │
+            │ str       ┆ str   ┆ str       ┆ f64   │
+            ╞═══════════╪═══════╪═══════════╪═══════╡
+            │ setosa    ┆ Sepal ┆ Length    ┆ 5.1   │
+            │ virginica ┆ Sepal ┆ Length    ┆ 5.9   │
+            │ setosa    ┆ Sepal ┆ Width     ┆ 3.5   │
+            │ virginica ┆ Sepal ┆ Width     ┆ 3.0   │
+            │ setosa    ┆ Petal ┆ Length    ┆ 1.4   │
+            │ virginica ┆ Petal ┆ Length    ┆ 5.1   │
+            │ setosa    ┆ Petal ┆ Width     ┆ 0.2   │
+            │ virginica ┆ Petal ┆ Width     ┆ 1.8   │
+            └───────────┴───────┴───────────┴───────┘
 
+            Retain parts of the column names as headers:
+            >>> df.janitor.pivot_longer(
+            ...     index = 'Species',
+            ...     names_to = ('part', '.value'),
+            ...     names_sep = '.',
+            ... )
+            shape: (4, 4)
+            ┌───────────┬───────┬────────┬───────┐
+            │ Species   ┆ part  ┆ Length ┆ Width │
+            │ ---       ┆ ---   ┆ ---    ┆ ---   │
+            │ str       ┆ str   ┆ f64    ┆ f64   │
+            ╞═══════════╪═══════╪════════╪═══════╡
+            │ setosa    ┆ Sepal ┆ 5.1    ┆ 3.5   │
+            │ virginica ┆ Sepal ┆ 5.9    ┆ 3.0   │
+            │ setosa    ┆ Petal ┆ 1.4    ┆ 0.2   │
+            │ virginica ┆ Petal ┆ 5.1    ┆ 1.8   │
+            └───────────┴───────┴────────┴───────┘
 
-@pl.api.register_expr_namespace("janitor")
-class PolarsExpr:
-    def __init__(self, expr: pl.Expr) -> pl.Expr:
-        self._expr = expr
+            Split the column labels based on regex:
+            >>> df = pl.DataFrame({"id": [1], "new_sp_m5564": [2], "newrel_f65": [3]})
+            >>> df
+            shape: (1, 3)
+            ┌─────┬──────────────┬────────────┐
+            │ id  ┆ new_sp_m5564 ┆ newrel_f65 │
+            │ --- ┆ ---          ┆ ---        │
+            │ i64 ┆ i64          ┆ i64        │
+            ╞═════╪══════════════╪════════════╡
+            │ 1   ┆ 2            ┆ 3          │
+            └─────┴──────────────┴────────────┘
+            >>> df.janitor.pivot_longer(
+            ...     index = 'id',
+            ...     names_to = ('diagnosis', 'gender', 'age'),
+            ...     names_pattern = r"new_?(.+)_(.)(\\d+)",
+            ... )
+            shape: (2, 5)
+            ┌─────┬───────────┬────────┬──────┬───────┐
+            │ id  ┆ diagnosis ┆ gender ┆ age  ┆ value │
+            │ --- ┆ ---       ┆ ---    ┆ ---  ┆ ---   │
+            │ i64 ┆ str       ┆ str    ┆ str  ┆ i64   │
+            ╞═════╪═══════════╪════════╪══════╪═══════╡
+            │ 1   ┆ sp        ┆ m      ┆ 5564 ┆ 2     │
+            │ 1   ┆ rel       ┆ f      ┆ 65   ┆ 3     │
+            └─────┴───────────┴────────┴──────┴───────┘
 
-    def clean_names(
-        self,
-        strip_underscores: Optional[Union[str, bool]] = None,
-        case_type: str = "lower",
-        remove_special: bool = False,
-        strip_accents: bool = False,
-        enforce_string: bool = False,
-        truncate_limit: int = None,
-    ) -> pl.Expr:
-        """
-        Clean the labels in a polars Expression.
+            Convert the dtypes of specific columns with `names_transform`:
+            >>> (
+            ...     df.janitor.pivot_longer(
+            ...         index="id",
+            ...         names_to=("diagnosis", "gender", "age"),
+            ...         names_pattern=r"new_?(.+)_(.)(\\d+)",
+            ...         names_transform={"age": pl.Int32},
+            ...     )
+            ... )
+            shape: (2, 5)
+            ┌─────┬───────────┬────────┬──────┬───────┐
+            │ id  ┆ diagnosis ┆ gender ┆ age  ┆ value │
+            │ --- ┆ ---       ┆ ---    ┆ ---  ┆ ---   │
+            │ i64 ┆ str       ┆ str    ┆ i32  ┆ i64   │
+            ╞═════╪═══════════╪════════╪══════╪═══════╡
+            │ 1   ┆ sp        ┆ m      ┆ 5564 ┆ 2     │
+            │ 1   ┆ rel       ┆ f      ┆ 65   ┆ 3     │
+            └─────┴───────────┴────────┴──────┴───────┘
 
-        Examples:
-            >>> import polars as pl
-            >>> import janitor.polars
-            >>> df = pl.DataFrame({"raw": ["Abçdê fgí j"]})
+            Use multiple `.value` to reshape the dataframe:
+            >>> df = pl.DataFrame(
+            ...     [
+            ...         {
+            ...             "x_1_mean": 10,
+            ...             "x_2_mean": 20,
+            ...             "y_1_mean": 30,
+            ...             "y_2_mean": 40,
+            ...             "unit": 50,
+            ...         }
+            ...     ]
+            ... )
             >>> df
-            shape: (1, 1)
-            ┌─────────────┐
-            │ raw         │
-            │ ---         │
-            │ str         │
-            ╞═════════════╡
-            │ Abçdê fgí j │
-            └─────────────┘
-
-            Clean the column values:
-            >>> df.with_columns(pl.col("raw").janitor.clean_names(strip_accents=True))
-            shape: (1, 1)
-            ┌─────────────┐
-            │ raw         │
-            │ ---         │
-            │ str         │
-            ╞═════════════╡
-            │ abcde_fgi_j │
-            └─────────────┘
+            shape: (1, 5)
+            ┌──────────┬──────────┬──────────┬──────────┬──────┐
+            │ x_1_mean ┆ x_2_mean ┆ y_1_mean ┆ y_2_mean ┆ unit │
+            │ ---      ┆ ---      ┆ ---      ┆ ---      ┆ ---  │
+            │ i64      ┆ i64      ┆ i64      ┆ i64      ┆ i64  │
+            ╞══════════╪══════════╪══════════╪══════════╪══════╡
+            │ 10       ┆ 20       ┆ 30       ┆ 40       ┆ 50   │
+            └──────────┴──────────┴──────────┴──────────┴──────┘
+            >>> df.janitor.pivot_longer(
+            ...     index="unit",
+            ...     names_to=(".value", "time", ".value"),
+            ...     names_pattern=r"(x|y)_([0-9])(_mean)",
+            ... )
+            shape: (2, 4)
+            ┌──────┬──────┬────────┬────────┐
+            │ unit ┆ time ┆ x_mean ┆ y_mean │
+            │ ---  ┆ ---  ┆ ---    ┆ ---    │
+            │ i64  ┆ str  ┆ i64    ┆ i64    │
+            ╞══════╪══════╪════════╪════════╡
+            │ 50   ┆ 1    ┆ 10     ┆ 30     │
+            │ 50   ┆ 2    ┆ 20     ┆ 40     │
+            └──────┴──────┴────────┴────────┘
+
+            Reshape the dataframe by passing a sequence to `names_pattern`:
+            >>> df = pl.DataFrame({'hr1': [514, 573],
+            ...                    'hr2': [545, 526],
+            ...                    'team': ['Red Sox', 'Yankees'],
+            ...                    'year1': [2007, 2007],
+            ...                    'year2': [2008, 2008]})
+            >>> df
+            shape: (2, 5)
+            ┌─────┬─────┬─────────┬───────┬───────┐
+            │ hr1 ┆ hr2 ┆ team    ┆ year1 ┆ year2 │
+            │ --- ┆ --- ┆ ---     ┆ ---   ┆ ---   │
+            │ i64 ┆ i64 ┆ str     ┆ i64   ┆ i64   │
+            ╞═════╪═════╪═════════╪═══════╪═══════╡
+            │ 514 ┆ 545 ┆ Red Sox ┆ 2007  ┆ 2008  │
+            │ 573 ┆ 526 ┆ Yankees ┆ 2007  ┆ 2008  │
+            └─────┴─────┴─────────┴───────┴───────┘
+            >>> df.pivot_longer(
+            ...     index = 'team',
+            ...     names_to = ['year', 'hr'],
+            ...     names_pattern = ['year', 'hr']
+            ... )
+            shape: (4, 3)
+            ┌─────────┬─────┬──────┐
+            │ team    ┆ hr  ┆ year │
+            │ ---     ┆ --- ┆ ---  │
+            │ str     ┆ i64 ┆ i64  │
+            ╞═════════╪═════╪══════╡
+            │ Red Sox ┆ 514 ┆ 2007 │
+            │ Yankees ┆ 573 ┆ 2007 │
+            │ Red Sox ┆ 545 ┆ 2008 │
+            │ Yankees ┆ 526 ┆ 2008 │
+            └─────────┴─────┴──────┘
+
+            Multiple `values_to`:
+            >>> df = pd.DataFrame(
+            ...         {
+            ...             "City": ["Houston", "Austin", "Hoover"],
+            ...             "State": ["Texas", "Texas", "Alabama"],
+            ...             "Name": ["Aria", "Penelope", "Niko"],
+            ...             "Mango": [4, 10, 90],
+            ...             "Orange": [10, 8, 14],
+            ...             "Watermelon": [40, 99, 43],
+            ...             "Gin": [16, 200, 34],
+            ...             "Vodka": [20, 33, 18],
+            ...         },
+            ...     )
+            shape: (3, 8)
+            ┌─────────┬─────────┬──────────┬───────┬────────┬────────────┬─────┬───────┐
+            │ City    ┆ State   ┆ Name     ┆ Mango ┆ Orange ┆ Watermelon ┆ Gin ┆ Vodka │
+            │ ---     ┆ ---     ┆ ---      ┆ ---   ┆ ---    ┆ ---        ┆ --- ┆ ---   │
+            │ str     ┆ str     ┆ str      ┆ i64   ┆ i64    ┆ i64        ┆ i64 ┆ i64   │
+            ╞═════════╪═════════╪══════════╪═══════╪════════╪════════════╪═════╪═══════╡
+            │ Houston ┆ Texas   ┆ Aria     ┆ 4     ┆ 10     ┆ 40         ┆ 16  ┆ 20    │
+            │ Austin  ┆ Texas   ┆ Penelope ┆ 10    ┆ 8      ┆ 99         ┆ 200 ┆ 33    │
+            │ Hoover  ┆ Alabama ┆ Niko     ┆ 90    ┆ 14     ┆ 43         ┆ 34  ┆ 18    │
+            └─────────┴─────────┴──────────┴───────┴────────┴────────────┴─────┴───────┘
+
+            >>> df.pivot_longer(
+            ...         index=["City", "State"],
+            ...         column_names=cs.numeric(),
+            ...         names_to=("Fruit", "Drink"),
+            ...         values_to=("Pounds", "Ounces"),
+            ...         names_pattern=["M|O|W", "G|V"],
+            ...     )
+            shape: (9, 6)
+            ┌─────────┬─────────┬────────────┬────────┬───────┬────────┐
+            │ City    ┆ State   ┆ Fruit      ┆ Pounds ┆ Drink ┆ Ounces │
+            │ ---     ┆ ---     ┆ ---        ┆ ---    ┆ ---   ┆ ---    │
+            │ str     ┆ str     ┆ str        ┆ i64    ┆ str   ┆ i64    │
+            ╞═════════╪═════════╪════════════╪════════╪═══════╪════════╡
+            │ Houston ┆ Texas   ┆ Mango      ┆ 4      ┆ Gin   ┆ 16     │
+            │ Austin  ┆ Texas   ┆ Mango      ┆ 10     ┆ Gin   ┆ 200    │
+            │ Hoover  ┆ Alabama ┆ Mango      ┆ 90     ┆ Gin   ┆ 34     │
+            │ Houston ┆ Texas   ┆ Orange     ┆ 10     ┆ Vodka ┆ 20     │
+            │ Austin  ┆ Texas   ┆ Orange     ┆ 8      ┆ Vodka ┆ 33     │
+            │ Hoover  ┆ Alabama ┆ Orange     ┆ 14     ┆ Vodka ┆ 18     │
+            │ Houston ┆ Texas   ┆ Watermelon ┆ 40     ┆ null  ┆ null   │
+            │ Austin  ┆ Texas   ┆ Watermelon ┆ 99     ┆ null  ┆ null   │
+            │ Hoover  ┆ Alabama ┆ Watermelon ┆ 43     ┆ null  ┆ null   │
+            └─────────┴─────────┴────────────┴────────┴───────┴────────┘
 
         !!! info "New in version 0.28.0"
 
         Args:
-            strip_underscores: Removes the outer underscores
-                from all labels in the expression.
-                Default None keeps outer underscores.
-                Values can be either 'left', 'right'
-                or 'both' or the respective shorthand 'l',
-                'r' and True.
-            case_type: Whether to make the labels in the expression lower or uppercase.
-                Current case may be preserved with 'preserve',
-                while snake case conversion (from CamelCase or camelCase only)
-                can be turned on using "snake".
-                Default 'lower' makes all characters lowercase.
-            remove_special: Remove special characters from the values in the expression.
-                Only letters, numbers and underscores are preserved.
-            strip_accents: Whether or not to remove accents from
-                the expression.
-            enforce_string: Whether or not to cast the expression to a string type.
-            truncate_limit: Truncates formatted labels in the expression to
-                the specified length. Default None does not truncate.
+            index: Column(s) or selector(s) to use as identifier variables.
+            column_names: Column(s) or selector(s) to unpivot.
+            names_to: Name of new column as a string that will contain
+                what were previously the column names in `column_names`.
+                The default is `variable` if no value is provided. It can
+                also be a list/tuple of strings that will serve as new column
+                names, if `name_sep` or `names_pattern` is provided.
+                If `.value` is in `names_to`, new column names will be extracted
+                from part of the existing column names and overrides `values_to`.
+            values_to: Name of new column as a string that will contain what
+                were previously the values of the columns in `column_names`.
+                `values_to` can also be a list/tuple
+                and requires that `names_pattern` is also a list/tuple.
+            names_sep: Determines how the column name is broken up, if
+                `names_to` contains multiple values. It takes the same
+                specification as polars' `str.split` method.
+            names_pattern: Determines how the column name is broken up.
+                It can be a regular expression containing matching groups.
+                It takes the same
+                specification as polars' `str.extract_groups` method.
+                `names_pattern` can also be a list/tuple of regular expressions.
+                It can also be a list/tuple of strings;
+                the strings will be treated as regular expressions.
+                Under the hood it is processed with polars' `str.contains` function.
+                For a list/tuple of regular expressions,
+                `names_to` must also be a list/tuple and the lengths of both
+                arguments must match.
+            names_transform: Use this option to change the types of columns that
+                have been transformed to rows.
+                This does not applies to the values' columns.
+                It can be a single valid polars dtype,
+                or a dictionary pairing the new column names
+                with a valid polars dtype.
+                Applicable only if one of names_sep
+                or names_pattern is provided.
         Returns:
-            A polars Expression.
-        """
-        return _clean_expr_names(
-            obj=self._expr,
-            strip_accents=strip_accents,
-            strip_underscores=strip_underscores,
-            case_type=case_type,
-            remove_special=remove_special,
-            enforce_string=enforce_string,
-            truncate_limit=truncate_limit,
+            A polars DataFrame that has been unpivoted from wide to long
+                format.
+        """  # noqa: E501
+        return _pivot_longer(
+            df=self._df,
+            index=index,
+            column_names=column_names,
+            names_pattern=names_pattern,
+            names_sep=names_sep,
+            names_to=names_to,
+            values_to=values_to,
+            names_transform=names_transform,
         )
diff --git a/janitor/polars/clean_names.py b/janitor/polars/clean_names.py
deleted file mode 100644
index 3226c9d33..000000000
--- a/janitor/polars/clean_names.py
+++ /dev/null
@@ -1,168 +0,0 @@
-"""clean_names implementation for polars."""
-
-import re
-import unicodedata
-from typing import Optional, Union
-
-from janitor.errors import JanitorError
-from janitor.functions.utils import (
-    _change_case,
-    _normalize_1,
-    _remove_special,
-    _strip_accents,
-    _strip_underscores_func,
-)
-from janitor.utils import import_message
-
-try:
-    import polars as pl
-except ImportError:
-    import_message(
-        submodule="polars",
-        package="polars",
-        conda_channel="conda-forge",
-        pip_install=True,
-    )
-
-
-def _change_case_expr(
-    obj: pl.Expr,
-    case_type: str,
-) -> pl.Expr:
-    """Change case of labels in obj."""
-    case_types = {"preserve", "upper", "lower", "snake"}
-    case_type = case_type.lower()
-    if case_type not in case_types:
-        raise JanitorError(f"type must be one of: {case_types}")
-
-    if case_type == "preserve":
-        return obj
-    if case_type == "upper":
-        return obj.str.to_uppercase()
-    if case_type == "lower":
-        return obj.str.to_lowercase()
-    # Implementation taken from: https://gist.github.com/jaytaylor/3660565
-    # by @jtaylor
-    return (
-        obj.str.replace_all(
-            pattern=r"(.)([A-Z][a-z]+)", value=r"${1}_${2}", literal=False
-        )
-        .str.replace_all(
-            pattern=r"([a-z0-9])([A-Z])", value=r"${1}_${2}", literal=False
-        )
-        .str.to_lowercase()
-    )
-
-
-def _normalize_expr(obj: pl.Expr) -> pl.Expr:
-    """Perform normalization of labels in obj."""
-    FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
-    for search, replace in FIXES:
-        obj = obj.str.replace_all(pattern=search, value=replace, literal=False)
-    return obj
-
-
-def _remove_special_expr(
-    obj: pl.Expr,
-) -> pl.Expr:
-    """Remove special characters from the labels in obj."""
-    return obj.str.replace_all(
-        pattern="[^A-Za-z_\\d]", value="", literal=False
-    ).str.strip_chars()
-
-
-def _strip_accents_expr(
-    obj: pl.Expr,
-) -> pl.Expr:
-    """Remove accents from the labels in obj.
-
-    Inspired from [StackOverflow][so].
-
-    [so]: https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-strin
-    """  # noqa: E501
-    # TODO: possible implementation in Rust
-    # or use a pyarrow implementation?
-    # https://github.com/pola-rs/polars/issues/11455
-    return obj.map_elements(
-        lambda word: [
-            letter
-            for letter in unicodedata.normalize("NFD", word)
-            if not unicodedata.combining(letter)
-        ],
-        return_dtype=pl.List(pl.Utf8),
-    ).list.join("")
-
-
-def _strip_underscores_func_expr(
-    obj: pl.Expr,
-    strip_underscores: Union[str, bool] = None,
-) -> pl.Expr:
-    """Strip underscores from obj."""
-    underscore_options = {None, "left", "right", "both", "l", "r", True}
-    if strip_underscores not in underscore_options:
-        raise JanitorError(
-            f"strip_underscores must be one of: {underscore_options}"
-        )
-    if strip_underscores in {"left", "l"}:
-        return obj.str.strip_chars_start("_")
-    if strip_underscores in {"right", "r"}:
-        return obj.str.strip_chars_end("_")
-    if strip_underscores in {True, "both"}:
-        return obj.str.strip_chars("_")
-    return obj
-
-
-def _clean_column_names(
-    obj: str,
-    strip_underscores: Optional[Union[str, bool]] = None,
-    case_type: str = "lower",
-    remove_special: bool = False,
-    strip_accents: bool = False,
-    truncate_limit: int = None,
-) -> str:
-    """
-    Function to clean the column names of a polars DataFrame.
-    """
-    obj = _change_case(obj=obj, case_type=case_type)
-    obj = _normalize_1(obj=obj)
-    if remove_special:
-        obj = _remove_special(obj=obj)
-    if strip_accents:
-        obj = _strip_accents(obj=obj)
-    obj = re.sub(pattern="_+", repl="_", string=obj)
-    obj = _strip_underscores_func(
-        obj,
-        strip_underscores=strip_underscores,
-    )
-    obj = obj[:truncate_limit]
-    return obj
-
-
-def _clean_expr_names(
-    obj: pl.Expr,
-    strip_underscores: Optional[Union[str, bool]] = None,
-    case_type: str = "lower",
-    remove_special: bool = False,
-    strip_accents: bool = False,
-    enforce_string: bool = False,
-    truncate_limit: int = None,
-) -> pl.Expr:
-    """
-    Function to clean the labels of a polars Expression.
-    """
-    if enforce_string:
-        obj = obj.cast(pl.Utf8)
-    obj = _change_case_expr(obj=obj, case_type=case_type)
-    obj = _normalize_expr(obj=obj)
-    if remove_special:
-        obj = _remove_special_expr(obj=obj)
-    if strip_accents:
-        obj = _strip_accents_expr(obj=obj)
-    obj = obj.str.replace(pattern="_+", value="_", literal=False)
-    obj = _strip_underscores_func_expr(
-        obj,
-        strip_underscores=strip_underscores,
-    )
-    if truncate_limit:
-        obj = obj.str.slice(offset=0, length=truncate_limit)
-    return obj
diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
new file mode 100644
index 000000000..9bb94000b
--- /dev/null
+++ b/janitor/polars/pivot_longer.py
@@ -0,0 +1,669 @@
+"""pivot_longer implementation for polars."""
+
+from collections import defaultdict
+from itertools import chain
+from typing import Any, Mapping, Optional, Pattern, Sequence, Union
+
+from janitor.utils import check, import_message
+
+try:
+    import polars as pl
+    import polars.selectors as cs
+    from polars.datatypes.classes import DataTypeClass
+    from polars.type_aliases import ColumnNameOrSelector, PolarsDataType
+except ImportError:
+    import_message(
+        submodule="polars",
+        package="polars",
+        conda_channel="conda-forge",
+        pip_install=True,
+    )
+
+
+def _pivot_longer(
+    df: pl.DataFrame,
+    index: Union[ColumnNameOrSelector, Sequence[ColumnNameOrSelector], None],
+    column_names: Union[
+        ColumnNameOrSelector, Sequence[ColumnNameOrSelector], None
+    ],
+    names_to: Optional[Union[list, str]],
+    values_to: Optional[str],
+    names_sep: Optional[Union[str, Pattern, None]],
+    names_pattern: Optional[Union[list, tuple, str, Pattern, None]],
+    names_transform: Optional[Union[PolarsDataType, dict]],
+) -> pl.DataFrame:
+
+    (
+        df,
+        index,
+        column_names,
+        names_to,
+        values_to,
+        names_sep,
+        names_pattern,
+        names_transform,
+    ) = _data_checks_pivot_longer(
+        df=df,
+        index=index,
+        column_names=column_names,
+        names_to=names_to,
+        values_to=values_to,
+        names_sep=names_sep,
+        names_pattern=names_pattern,
+        names_transform=names_transform,
+    )
+
+    if not column_names:
+        return df
+
+    if all((names_pattern is None, names_sep is None)):
+        return df.melt(
+            id_vars=index,
+            value_vars=column_names,
+            variable_name=names_to[0],
+            value_name=values_to,
+        )
+
+    if names_sep is not None:
+        return _pivot_longer_names_sep(
+            df=df,
+            index=index,
+            column_names=column_names,
+            names_to=names_to,
+            names_sep=names_sep,
+            values_to=values_to,
+            names_transform=names_transform,
+        )
+
+    if isinstance(names_pattern, (str, Pattern)):
+        return _pivot_longer_names_pattern_str(
+            df=df,
+            index=index,
+            column_names=column_names,
+            names_to=names_to,
+            names_pattern=names_pattern,
+            values_to=values_to,
+            names_transform=names_transform,
+        )
+    if isinstance(values_to, (list, tuple)):
+        return _pivot_longer_values_to_sequence(
+            df=df,
+            index=index,
+            column_names=column_names,
+            names_to=names_to,
+            names_pattern=names_pattern,
+            values_to=values_to,
+            names_transform=names_transform,
+        )
+
+    return _pivot_longer_names_pattern_sequence(
+        df=df,
+        index=index,
+        column_names=column_names,
+        names_to=names_to,
+        names_pattern=names_pattern,
+    )
+
+
+def _pivot_longer_names_sep(
+    df: pl.DataFrame,
+    index: Sequence,
+    column_names: Sequence,
+    names_to: Sequence,
+    names_sep: str,
+    values_to: str,
+    names_transform: dict,
+) -> pl.DataFrame:
+    """
+    This takes care of pivoting scenarios where
+    names_sep is provided.
+    """
+
+    columns = df.select(column_names).columns
+    outcome = (
+        pl.Series(columns)
+        .str.split(by=names_sep)
+        .list.to_struct(n_field_strategy="max_width")
+    )
+    len_outcome = len(outcome.struct.fields)
+    len_names_to = len(names_to)
+    if len_names_to != len_outcome:
+        raise ValueError(
+            "The length of names_to does not match "
+            "the number of fields extracted. "
+            f"The length of names_to is {len_names_to} "
+            "while the number of fields extracted is "
+            f"{len_outcome}."
+        )
+
+    if ".value" not in names_to:
+        outcome = outcome.struct.rename_fields(names_to)
+        return _pivot_longer_no_dot_value(
+            df=df,
+            outcome=outcome,
+            values_to=values_to,
+            index=index,
+            columns=columns,
+            names_to=names_to,
+            names_transform=names_transform,
+        )
+    if all(label == ".value" for label in names_to):
+        return _pivot_longer_dot_value_only(
+            df=df,
+            names_to=names_to,
+            columns=columns,
+            index=index,
+            outcome=outcome,
+        )
+    return _pivot_longer_dot_value(
+        df=df,
+        names_to=names_to,
+        columns=columns,
+        index=index,
+        outcome=outcome,
+        names_transform=names_transform,
+    )
+
+
+def _pivot_longer_names_pattern_str(
+    df: pl.DataFrame,
+    index: Union[Sequence, None],
+    column_names: Union[Sequence, None],
+    names_to: Sequence,
+    names_pattern: str,
+    values_to: str,
+    names_transform: dict,
+) -> pl.DataFrame:
+    """
+    This takes care of pivoting scenarios where
+    names_pattern is a string.
+    """
+
+    columns = df.select(column_names).columns
+    outcome = pl.Series(columns).str.extract_groups(names_pattern)
+    len_outcome = len(outcome.struct.fields)
+    len_names_to = len(names_to)
+    if len_names_to != len_outcome:
+        raise ValueError(
+            f"The length of names_to does not match "
+            "the number of fields extracted. "
+            f"The length of names_to is {len_names_to} "
+            "while the number of fields extracted is "
+            f"{len_outcome}."
+        )
+    if ".value" not in names_to:
+        outcome = outcome.struct.rename_fields(names_to)
+        return _pivot_longer_no_dot_value(
+            df=df,
+            outcome=outcome,
+            values_to=values_to,
+            index=index,
+            columns=columns,
+            names_to=names_to,
+            names_transform=names_transform,
+        )
+    if all(label == ".value" for label in names_to):
+        return _pivot_longer_dot_value_only(
+            df=df,
+            names_to=names_to,
+            columns=columns,
+            index=index,
+            outcome=outcome,
+        )
+    return _pivot_longer_dot_value(
+        df=df,
+        names_to=names_to,
+        columns=columns,
+        index=index,
+        outcome=outcome,
+        names_transform=names_transform,
+    )
+
+
+def _pivot_longer_values_to_sequence(
+    df: pl.DataFrame,
+    index: Union[Sequence, None],
+    column_names: Union[Sequence, None],
+    names_to: Sequence,
+    names_pattern: Sequence,
+    values_to: Sequence,
+    names_transform: dict,
+) -> pl.DataFrame:
+    """
+    This takes care of pivoting scenarios where
+    values_to is a list/tuple.
+    """
+    columns = df.select(column_names).columns
+    outcome = pl.DataFrame({"cols": columns})
+    expressions = [
+        pl.col("cols").str.contains(pattern).alias(f"cols{num}")
+        for num, pattern in enumerate(names_pattern)
+    ]
+    outcome = outcome.with_columns(expressions)
+    booleans = outcome.select(pl.exclude("cols").any())
+    for position in range(len(names_pattern)):
+        if not booleans.to_series(position).item():
+            raise ValueError(
+                "No match was returned for the regex "
+                f"at position {position} -> {names_pattern[position]}."
+            )
+    names_booleans = pl
+    values_booleans = pl
+    for boolean, repl_name, repl_value in zip(
+        booleans.columns, names_to, values_to
+    ):
+        names_booleans = names_booleans.when(pl.col(boolean)).then(
+            pl.lit(repl_name)
+        )
+        values_booleans = values_booleans.when(pl.col(boolean)).then(
+            pl.lit(repl_value)
+        )
+    names_booleans = names_booleans.alias("value")
+    values_booleans = values_booleans.alias(".value")
+    filter_expr = pl.col(".value").is_not_null()
+    cum_expr = pl.col(".value").cum_count().over(".value").sub(1).alias("idx")
+    outcome = (
+        outcome.select(names_booleans, values_booleans, pl.col("cols"))
+        .filter(filter_expr)
+        .with_columns(cum_expr)
+    )
+    headers_dict = defaultdict(list)
+    non_headers_dict = defaultdict(list)
+    for num, col_name, value_header, name_header in zip(
+        outcome.get_column("idx"),
+        outcome.get_column("cols"),
+        outcome.get_column(".value"),
+        outcome.get_column("value"),
+    ):
+        non_headers_dict[num].append((col_name, name_header))
+        headers_dict[num].append((col_name, value_header))
+    contents = []
+    for key, value in headers_dict.items():
+        expression = [] if index is None else [pl.col(index)]
+        columns_to_select = [
+            pl.col(col_name).alias(repl_name) for col_name, repl_name in value
+        ]
+        expression.extend(columns_to_select)
+        columns_to_append = [
+            pl.lit(col_name, dtype=names_transform[repl_name]).alias(repl_name)
+            for col_name, repl_name in non_headers_dict[key]
+        ]
+
+        contents.append(df.select(expression).with_columns(columns_to_append))
+    columns_to_select = [] if not index else list(index)
+    columns_to_select.extend(chain.from_iterable(zip(names_to, values_to)))
+    return pl.concat(contents, how="diagonal_relaxed").select(
+        columns_to_select
+    )
+
+
+def _pivot_longer_names_pattern_sequence(
+    df: pl.DataFrame,
+    index: Union[Sequence, None],
+    column_names: Union[Sequence, None],
+    names_to: Sequence,
+    names_pattern: Sequence,
+) -> pl.DataFrame:
+    """
+    This takes care of pivoting scenarios where
+    names_pattern is a list/tuple.
+    """
+    columns = df.select(column_names).columns
+    outcome = pl.DataFrame({"cols": columns})
+    expressions = [
+        pl.col("cols").str.contains(pattern).alias(f"cols{num}")
+        for num, pattern in enumerate(names_pattern)
+    ]
+    outcome = outcome.with_columns(expressions)
+    booleans = outcome.select(pl.exclude("cols").any())
+    for position in range(len(names_pattern)):
+        if not booleans.to_series(position).item():
+            raise ValueError(
+                "No match was returned for the regex "
+                f"at position {position} -> {names_pattern[position]}."
+            )
+    names_booleans = pl
+    for boolean, repl_name in zip(booleans.columns, names_to):
+        names_booleans = names_booleans.when(pl.col(boolean)).then(
+            pl.lit(repl_name)
+        )
+
+    names_booleans = names_booleans.alias(".value")
+    filter_expr = pl.col(".value").is_not_null()
+    cum_expr = pl.col(".value").cum_count().over(".value").sub(1).alias("idx")
+    outcome = (
+        outcome.select(names_booleans, pl.col("cols"))
+        .filter(filter_expr)
+        .with_columns(cum_expr)
+    )
+    headers_dict = defaultdict(list)
+    for num, col_name, name_header in zip(
+        outcome.get_column("idx"),
+        outcome.get_column("cols"),
+        outcome.get_column(".value"),
+    ):
+        headers_dict[num].append((col_name, name_header))
+
+    contents = []
+    for _, value in headers_dict.items():
+        expression = [] if index is None else [pl.col(index)]
+        columns_to_select = [
+            pl.col(col_name).alias(repl_name) for col_name, repl_name in value
+        ]
+        expression.extend(columns_to_select)
+
+        contents.append(df.select(expression))
+    return pl.concat(contents, how="diagonal_relaxed")
+
+
+def _pivot_longer_no_dot_value(
+    df: pl.DataFrame,
+    outcome: Mapping,
+    names_to: Sequence,
+    values_to: str,
+    index: Sequence,
+    columns: Sequence,
+    names_transform: dict,
+):
+    """
+    Reshape the data for scenarios where .value
+    is not present in names_to,
+    or names_to is not a list/tuple.
+
+    Returns a DataFrame.
+    """
+    contents = []
+    for col_name, mapping in zip(columns, outcome):
+        expression = (
+            [pl.col(col_name)]
+            if index is None
+            else [pl.col(index), pl.col(col_name).alias(values_to)]
+        )
+        columns_to_append = [
+            pl.lit(label, dtype=names_transform[header]).alias(header)
+            for header, label in mapping.items()
+        ]
+        _frame = df.select(expression).with_columns(columns_to_append)
+        contents.append(_frame)
+    columns_to_select = [] if not index else list(index)
+    columns_to_select.extend(names_to)
+    columns_to_select.append(values_to)
+    return pl.concat(contents, how="diagonal_relaxed").select(
+        pl.col(columns_to_select)
+    )
+
+
+def _pivot_longer_dot_value(
+    df: pl.DataFrame,
+    names_to: Sequence,
+    outcome: pl.DataFrame,
+    index: Sequence,
+    columns: Sequence,
+    names_transform: Union[PolarsDataType, dict],
+) -> pl.DataFrame:
+    """
+    Pivots the dataframe into the final form,
+    for scenarios where .value is in names_to.
+
+    Returns a DataFrame.
+    """
+    booleans = outcome.struct.unnest().select(pl.all().is_null().any())
+    for position in range(len(names_to)):
+        if booleans.to_series(position).item():
+            raise ValueError(
+                f"Column labels '{columns[position]}' "
+                "could not be matched with any of the groups "
+                "in the provided regex. Kindly provide a regular expression "
+                "(with the correct groups) that matches all labels in the columns."
+            )
+    if names_to.count(".value") > 1:
+        cols = outcome.struct.fields
+        dot_value = [
+            cols[num]
+            for num, label in enumerate(names_to)
+            if label == ".value"
+        ]
+        not_dot_value = [
+            pl.col(field_name).alias(repl_name)
+            for field_name, repl_name in zip(cols, names_to)
+            if field_name not in dot_value
+        ]
+
+        outcome = outcome.struct.unnest().select(
+            pl.concat_str(dot_value).alias(".value"), *not_dot_value
+        )
+    else:
+        outcome = outcome.struct.rename_fields(names_to).struct.unnest()
+    idx = "".join(names_to)
+    not_dot_value = [name for name in names_to if name != ".value"]
+    outcome = outcome.with_row_index(idx).with_columns(
+        pl.col(idx).first().over(not_dot_value).rank("dense").sub(1),
+        pl.struct(not_dot_value),
+    )
+    headers_dict = defaultdict(list)
+    for num, col_name, repl_name in zip(
+        outcome.get_column(idx),
+        columns,
+        outcome.get_column(".value"),
+    ):
+        headers_dict[num].append((col_name, repl_name))
+
+    non_headers_dict = dict()
+    outcome = outcome.select(idx, not_dot_value[0]).unique()
+
+    for key, value in zip(outcome.to_series(0), outcome.to_series(1)):
+        value = [
+            pl.lit(stub_name, dtype=names_transform[repl_name]).alias(
+                repl_name
+            )
+            for repl_name, stub_name in value.items()
+        ]
+        non_headers_dict[key] = value
+    contents = []
+    for key, value in headers_dict.items():
+        expression = [] if index is None else [pl.col(index)]
+        columns_to_select = [
+            pl.col(col_name).alias(repl_name) for col_name, repl_name in value
+        ]
+        expression.extend(columns_to_select)
+        _frame = df.select(expression).with_columns(non_headers_dict[key])
+        contents.append(_frame)
+    columns_to_select = [] if not index else list(index)
+    columns_to_select.extend(not_dot_value)
+    return pl.concat(contents, how="diagonal_relaxed").select(
+        pl.col(columns_to_select), pl.exclude(columns_to_select)
+    )
+
+
+def _pivot_longer_dot_value_only(
+    df: pl.DataFrame,
+    names_to: Sequence,
+    outcome: pl.DataFrame,
+    index: Sequence,
+    columns: Sequence,
+) -> pl.DataFrame:
+    """
+    Pivots the dataframe into the final form,
+    for scenarios where only '.value' is present in names_to.
+
+    Returns a DataFrame.
+    """
+
+    if names_to.count(".value") > 1:
+        outcome = outcome.struct.unnest().select(
+            pl.concat_str(pl.all()).alias(".value")
+        )
+    else:
+        outcome = outcome.struct.rename_fields(names_to).struct.unnest()
+    outcome = outcome.with_columns(
+        pl.col(".value").cum_count().over(".value").sub(1).alias("idx")
+    )
+    headers_dict = defaultdict(list)
+    for num, col_name, repl_name in zip(
+        outcome.get_column("idx"),
+        columns,
+        outcome.get_column(".value"),
+    ):
+        headers_dict[num].append((col_name, repl_name))
+
+    contents = []
+    for _, value in headers_dict.items():
+        expression = [] if index is None else [pl.col(index)]
+        columns_to_select = [
+            pl.col(col_name).alias(repl_name) for col_name, repl_name in value
+        ]
+        expression.extend(columns_to_select)
+        contents.append(df.select(expression))
+
+    return pl.concat(contents, how="diagonal_relaxed")
+
+
+def _data_checks_pivot_longer(
+    df,
+    index,
+    column_names,
+    names_to,
+    values_to,
+    names_sep,
+    names_pattern,
+    names_transform,
+) -> tuple:
+    """
+    This function majorly does type checks on the passed arguments.
+
+    This function is executed before proceeding to the computation phase.
+
+    Type annotations are not provided because this function is where type
+    checking happens.
+    """
+
+    def _check_type(arg_name: str, arg_value: Any):
+        """
+        Raise if argument is not a valid type
+        """
+
+        def _check_type_single(entry):
+            if (
+                not isinstance(entry, str)
+                and not cs.is_selector(entry)
+                and not isinstance(entry, pl.Expr)
+            ):
+                raise TypeError(
+                    f"The argument passed to the {arg_name} parameter "
+                    "should be a string type, a ColumnSelector,  "
+                    "or a list/tuple that contains "
+                    "a string and/or a ColumnSelector."
+                )
+
+        if isinstance(arg_value, (list, tuple)):
+            for entry in arg_value:
+                _check_type_single(entry=entry)
+        else:
+            _check_type_single(entry=arg_value)
+
+    if (index is None) and (column_names is None):
+        column_names = cs.expand_selector(df, pl.all())
+        index = []
+    elif (index is not None) and (column_names is not None):
+        _check_type(arg_name="index", arg_value=index)
+        index = cs.expand_selector(df, index)
+        _check_type(arg_name="column_names", arg_value=column_names)
+        column_names = cs.expand_selector(df, column_names)
+
+    elif (index is None) and (column_names is not None):
+        _check_type(arg_name="column_names", arg_value=column_names)
+        column_names = cs.expand_selector(df, column_names)
+        index = cs.expand_selector(df, pl.exclude(column_names))
+
+    elif (index is not None) and (column_names is None):
+        _check_type(arg_name="index", arg_value=index)
+        index = cs.expand_selector(df, index)
+        column_names = cs.expand_selector(df, pl.exclude(index))
+
+    check("names_to", names_to, [list, tuple, str])
+    if isinstance(names_to, (list, tuple)):
+        uniques = set()
+        for word in names_to:
+            check(f"'{word}' in names_to", word, [str])
+            if (word in uniques) and (word != ".value"):
+                raise ValueError(f"'{word}' is duplicated in names_to.")
+            uniques.add(word)
+    names_to = [names_to] if isinstance(names_to, str) else names_to
+
+    if names_sep and names_pattern:
+        raise ValueError(
+            "Only one of names_pattern or names_sep should be provided."
+        )
+
+    if names_sep is not None:
+        check("names_sep", names_sep, [str])
+
+    if names_pattern is not None:
+        check("names_pattern", names_pattern, [str, list, tuple])
+        if isinstance(names_pattern, (list, tuple)):
+            for word in names_pattern:
+                check(f"'{word}' in names_pattern", word, [str])
+            if ".value" in names_to:
+                raise ValueError(
+                    ".value is not accepted in names_to "
+                    "if names_pattern is a list/tuple."
+                )
+            if len(names_pattern) != len(names_to):
+                raise ValueError(
+                    f"The length of names_to does not match "
+                    "the number of regexes in names_pattern. "
+                    f"The length of names_to is {len(names_to)} "
+                    f"while the number of regexes is {len(names_pattern)}."
+                )
+
+    check("values_to", values_to, [str, list, tuple])
+    values_to_is_a_sequence = isinstance(values_to, (list, tuple))
+    names_pattern_is_a_sequence = isinstance(names_pattern, (list, tuple))
+    if values_to_is_a_sequence:
+        if not names_pattern_is_a_sequence:
+            raise TypeError(
+                "values_to can be a list/tuple only "
+                "if names_pattern is a list/tuple."
+            )
+
+        if len(names_pattern) != len(values_to):
+            raise ValueError(
+                f"The length of values_to does not match "
+                "the number of regexes in names_pattern. "
+                f"The length of values_to is {len(values_to)} "
+                f"while the number of regexes is {len(names_pattern)}."
+            )
+        uniques = set()
+        for word in values_to:
+            check(f"{word} in values_to", word, [str])
+            if word in uniques:
+                raise ValueError(f"'{word}' is duplicated in values_to.")
+            uniques.add(word)
+
+    columns_to_append = any(label != ".value" for label in names_to)
+    if values_to_is_a_sequence or columns_to_append:
+        check("names_transform", names_transform, [DataTypeClass, dict])
+        if isinstance(names_transform, dict):
+            for _, dtype in names_transform.items():
+                check(
+                    "dtype in the names_transform mapping",
+                    dtype,
+                    [DataTypeClass],
+                )
+            names_transform = {
+                label: names_transform.get(label, pl.Utf8)
+                for label in names_to
+            }
+        else:
+            names_transform = {label: names_transform for label in names_to}
+
+    return (
+        df,
+        index,
+        column_names,
+        names_to,
+        values_to,
+        names_sep,
+        names_pattern,
+        names_transform,
+    )
diff --git a/mkdocs/api/polars.md b/mkdocs/api/polars.md
index a2cbd574c..17a6a87aa 100644
--- a/mkdocs/api/polars.md
+++ b/mkdocs/api/polars.md
@@ -3,5 +3,4 @@
 ::: janitor.polars
     options:
       members:
-      - PolarsExpr
       - PolarsFrame
diff --git a/tests/polars/functions/test_clean_names_polars.py b/tests/polars/functions/test_clean_names_polars.py
deleted file mode 100644
index 23ce38742..000000000
--- a/tests/polars/functions/test_clean_names_polars.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import polars as pl
-import pytest
-
-from janitor import polars  # noqa: F401
-
-
-@pytest.mark.functions
-def test_clean_names_method_chain(dataframe):
-    """Tests clean_names default args in a method chain."""
-    df = pl.from_pandas(dataframe)
-    df = df.janitor.clean_names()
-    expected_columns = [
-        "a",
-        "bell_chart",
-        "decorated_elephant",
-        "animals@#$%^",
-        "cities",
-    ]
-    assert df.columns == expected_columns
-
-
-@pytest.mark.functions
-def test_clean_names_special_characters(dataframe):
-    """Tests clean_names `remove_special` parameter."""
-    df = pl.from_pandas(dataframe)
-    df = df.janitor.clean_names(remove_special=True)
-    expected_columns = [
-        "a",
-        "bell_chart",
-        "decorated_elephant",
-        "animals",
-        "cities",
-    ]
-    assert df.columns == expected_columns
-
-
-@pytest.mark.functions
-def test_clean_names_uppercase(dataframe):
-    """Tests clean_names `case_type` parameter = upper."""
-    df = pl.from_pandas(dataframe)
-    df = df.janitor.clean_names(remove_special=True, case_type="upper")
-    expected_columns = [
-        "A",
-        "BELL_CHART",
-        "DECORATED_ELEPHANT",
-        "ANIMALS",
-        "CITIES",
-    ]
-    assert df.columns == expected_columns
-
-
-@pytest.mark.functions
-def test_clean_names_strip_accents():
-    """Tests clean_names `strip_accents` parameter."""
-    df = pl.DataFrame({"João": [1, 2], "Лука́ся": [1, 2], "Käfer": [1, 2]})
-    df = df.janitor.clean_names(strip_accents=True)
-    expected_columns = ["joao", "лукася", "kafer"]
-    assert df.columns == expected_columns
-
-
-@pytest.mark.functions
-def test_clean_names_camelcase_to_snake(dataframe):
-    """Tests clean_names `case_type` parameter = snake."""
-    df = pl.from_pandas(dataframe)
-    df = (
-        df.select("a")
-        .rename({"a": "AColumnName"})
-        .janitor.clean_names(remove_special=True, case_type="snake")
-    )
-    assert df.columns == ["a_column_name"]
-
-
-@pytest.mark.functions
-def test_clean_names_truncate_limit(dataframe):
-    """Tests clean_names `truncate_limit` parameter."""
-    df = pl.from_pandas(dataframe)
-    df = df.janitor.clean_names(truncate_limit=7)
-    expected_columns = ["a", "bell_ch", "decorat", "animals", "cities"]
-    assert df.columns == expected_columns
-
-
-@pytest.mark.functions
-def test_charac():
-    """Ensure non standard characters and spaces have been cleaned up."""
-
-    df = pl.DataFrame(
-        {
-            r"Current accountbalance(in % of GDP)": range(5),
-        }
-    )
-    df = df.janitor.clean_names(strip_underscores=True, case_type="lower")
-
-    assert "current_accountbalance_in_%_of_gdp" in df.columns
-
-
-def test_clean_column_values():
-    """Clean column values"""
-    raw = pl.DataFrame({"raw": ["Abçdê fgí j"]})
-    outcome = raw.with_columns(
-        pl.col("raw").janitor.clean_names(strip_accents=True)
-    )
-    assert list(outcome)[0][0] == "abcde_fgi_j"
diff --git a/tests/polars/functions/test_pivot_longer_polars.py b/tests/polars/functions/test_pivot_longer_polars.py
new file mode 100644
index 000000000..86d2636c8
--- /dev/null
+++ b/tests/polars/functions/test_pivot_longer_polars.py
@@ -0,0 +1,913 @@
+import polars as pl
+import polars.selectors as cs
+import pytest
+from polars.testing import assert_frame_equal
+
+from janitor import polars  # noqa: F401
+
+
+@pytest.fixture
+def df_checks():
+    """fixture dataframe"""
+    return pl.DataFrame(
+        {
+            "famid": [1, 1, 1, 2, 2, 2, 3, 3, 3],
+            "birth": [1, 2, 3, 1, 2, 3, 1, 2, 3],
+            "ht1": [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
+            "ht2": [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9],
+        }
+    )
+
+
+def test_type_index(df_checks):
+    """Raise TypeError if wrong type is provided for the index."""
+    msg = "The argument passed to the index parameter "
+    msg += "should be a string type, a ColumnSelector.+"
+    with pytest.raises(TypeError, match=msg):
+        df_checks.janitor.pivot_longer(index=2007, names_sep="_")
+
+
+def test_type_column_names(df_checks):
+    """Raise TypeError if wrong type is provided for column_names."""
+    msg = "The argument passed to the column_names parameter "
+    msg += "should be a string type, a ColumnSelector.+"
+    with pytest.raises(TypeError, match=msg):
+        df_checks.janitor.pivot_longer(column_names=2007, names_sep="_")
+
+
+def test_type_names_to(df_checks):
+    """Raise TypeError if wrong type is provided for names_to."""
+    msg = "names_to should be one of .+"
+    with pytest.raises(TypeError, match=msg):
+        df_checks.janitor.pivot_longer(names_to=2007, names_sep="_")
+
+
+def test_subtype_names_to(df_checks):
+    """
+    Raise TypeError if names_to is a sequence
+    and the wrong type is provided for entries
+    in names_to.
+    """
+    with pytest.raises(TypeError, match="'1' in names_to.+"):
+        df_checks.janitor.pivot_longer(names_to=[1], names_sep="_")
+
+
+def test_duplicate_names_to(df_checks):
+    """Raise error if names_to contains duplicates."""
+    with pytest.raises(ValueError, match="'y' is duplicated in names_to."):
+        df_checks.janitor.pivot_longer(
+            names_to=["y", "y"], names_pattern="(.+)(.)"
+        )
+
+
+def test_both_names_sep_and_pattern(df_checks):
+    """
+    Raise ValueError if both names_sep
+    and names_pattern is provided.
+    """
+    with pytest.raises(
+        ValueError,
+        match="Only one of names_pattern or names_sep should be provided.",
+    ):
+        df_checks.janitor.pivot_longer(
+            names_to=["rar", "bar"], names_sep="-", names_pattern="(.+)(.)"
+        )
+
+
+def test_name_pattern_wrong_type(df_checks):
+    """Raise TypeError if the wrong type is provided for names_pattern."""
+    with pytest.raises(TypeError, match="names_pattern should be one of.+"):
+        df_checks.janitor.pivot_longer(
+            names_to=["rar", "bar"], names_pattern=2007
+        )
+
+
+def test_names_pattern_wrong_subtype(df_checks):
+    """
+    Raise TypeError if names_pattern is a list/tuple
+    and wrong subtype is supplied.
+    """
+    with pytest.raises(TypeError, match="'1' in names_pattern.+"):
+        df_checks.janitor.pivot_longer(
+            names_to=["ht", "num"], names_pattern=[1, "\\d"]
+        )
+
+
+def test_names_pattern_names_to_unequal_length(df_checks):
+    """
+    Raise ValueError if names_pattern is a list/tuple
+    and wrong number of items in names_to.
+    """
+    with pytest.raises(
+        ValueError,
+        match="The length of names_to does not match "
+        "the number of regexes in names_pattern.+",
+    ):
+        df_checks.janitor.pivot_longer(
+            names_to=["variable"], names_pattern=["^ht", ".+i.+"]
+        )
+
+
+def test_names_pattern_names_to_dot_value(df_checks):
+    """
+    Raise Error if names_pattern is a list/tuple and
+    .value in names_to.
+    """
+    with pytest.raises(
+        ValueError,
+        match=".value is not accepted in names_to "
+        "if names_pattern is a list/tuple.",
+    ):
+        df_checks.janitor.pivot_longer(
+            names_to=["variable", ".value"], names_pattern=["^ht", ".+i.+"]
+        )
+
+
+def test_name_sep_wrong_type(df_checks):
+    """Raise TypeError if the wrong type is provided for names_sep."""
+    with pytest.raises(TypeError, match="names_sep should be one of.+"):
+        df_checks.janitor.pivot_longer(
+            names_to=[".value", "num"], names_sep=["_"]
+        )
+
+
+def test_values_to_wrong_type(df_checks):
+    """Raise TypeError if the wrong type is provided for `values_to`."""
+    with pytest.raises(TypeError, match="values_to should be one of.+"):
+        df_checks.janitor.pivot_longer(values_to={"salvo"}, names_sep="_")
+
+
+def test_values_to_wrong_type_names_pattern(df_checks):
+    """
+    Raise TypeError if `values_to` is a list,
+    and names_pattern is not.
+    """
+    with pytest.raises(
+        TypeError,
+        match="values_to can be a list/tuple only "
+        "if names_pattern is a list/tuple.",
+    ):
+        df_checks.janitor.pivot_longer(
+            values_to=["salvo"], names_pattern=r"(.)"
+        )
+
+
+def test_values_to_names_pattern_unequal_length(df_checks):
+    """
+    Raise ValueError if `values_to` is a list,
+    and the length of names_pattern
+    does not match the length of values_to.
+    """
+    with pytest.raises(
+        ValueError,
+        match="The length of values_to does not match "
+        "the number of regexes in names_pattern.+",
+    ):
+        df_checks.janitor.pivot_longer(
+            values_to=["salvo"],
+            names_pattern=["ht", r"\d"],
+            names_to=["foo", "bar"],
+        )
+
+
+def test_sub_values_to(df_checks):
+    """Raise error if values_to is a sequence, and contains non strings."""
+    with pytest.raises(TypeError, match="1 in values_to.+"):
+        df_checks.janitor.pivot_longer(
+            names_to=["x", "y"],
+            names_pattern=[r"ht", r"\d"],
+            values_to=[1, "salvo"],
+        )
+
+
+def test_duplicate_values_to(df_checks):
+    """Raise error if values_to is a sequence, and contains duplicates."""
+    with pytest.raises(
+        ValueError, match="'salvo' is duplicated in values_to."
+    ):
+        df_checks.janitor.pivot_longer(
+            names_to=["x", "y"],
+            names_pattern=[r"ht", r"\d"],
+            values_to=["salvo", "salvo"],
+        )
+
+
+def test_names_transform_wrong_type(df_checks):
+    """Raise TypeError if the wrong type is provided for `names_transform`."""
+    with pytest.raises(TypeError, match="names_transform should be one of.+"):
+        df_checks.janitor.pivot_longer(names_sep="_", names_transform=1)
+
+
+def test_names_transform_wrong_subtype(df_checks):
+    """
+    Raise TypeError if the wrong subtype
+    is provided for values in the
+    `names_transform` dictionary.
+    """
+    with pytest.raises(
+        TypeError,
+        match="dtype in the names_transform mapping should be one of.+",
+    ):
+        df_checks.janitor.pivot_longer(
+            names_sep="_", names_transform={"rar": 1}
+        )
+
+
+def test_names_pattern_list_empty_any(df_checks):
+    """
+    Raise ValueError if names_pattern is a list,
+    and not all matches are returned.
+    """
+    with pytest.raises(
+        ValueError, match="No match was returned for the regex.+"
+    ):
+        df_checks.janitor.pivot_longer(
+            index=["famid", "birth"],
+            names_to=["ht"],
+            names_pattern=["rar"],
+        )
+
+
+def test_names_pattern_no_match(df_checks):
+    """Raise error if names_pattern is a regex and returns no matches."""
+    with pytest.raises(
+        ValueError, match="Column labels .+ could not be matched with any .+"
+    ):
+        df_checks.janitor.pivot_longer(
+            index="famid",
+            names_to=[".value", "value"],
+            names_pattern=r"(rar)(.)",
+        )
+
+
+def test_names_pattern_incomplete_match(df_checks):
+    """
+    Raise error if names_pattern is a regex
+    and returns incomplete matches.
+    """
+    with pytest.raises(
+        ValueError, match="Column labels .+ could not be matched with any .+"
+    ):
+        df_checks.janitor.pivot_longer(
+            index="famid",
+            names_to=[".value", "value"],
+            names_pattern=r"(ht)(.)",
+        )
+
+
+def test_names_sep_len(df_checks):
+    """
+    Raise error if names_sep,
+    and the number of  matches returned
+    is not equal to the length of names_to.
+    """
+    msg = "The length of names_to does not match "
+    msg += "the number of fields extracted.+ "
+    with pytest.raises(ValueError, match=msg):
+        df_checks.janitor.pivot_longer(names_to=".value", names_sep="t")
+
+
+def test_pivot_index_only(df_checks):
+    """Test output if only index is passed."""
+    result = df_checks.janitor.pivot_longer(
+        index=["famid", "birth"],
+        names_to="dim",
+        values_to="num",
+    )
+
+    actual = df_checks.melt(
+        ["famid", "birth"], variable_name="dim", value_name="num"
+    )
+
+    assert_frame_equal(result, actual)
+
+
+def test_pivot_column_only(df_checks):
+    """Test output if only column_names is passed."""
+    result = df_checks.janitor.pivot_longer(
+        column_names=["ht1", "ht2"],
+        names_to="dim",
+        values_to="num",
+    )
+
+    actual = df_checks.melt(
+        id_vars=["famid", "birth"],
+        variable_name="dim",
+        value_name="num",
+    )
+
+    assert_frame_equal(result, actual)
+
+
+def test_names_pat_str(df_checks):
+    """
+    Test output when names_pattern is a string,
+    and .value is present.
+    """
+    result = df_checks.janitor.pivot_longer(
+        column_names=cs.starts_with("ht"),
+        names_to=(".value", "age"),
+        names_pattern="(.+)(.)",
+        names_transform={"age": pl.Int64},
+    ).sort(by=cs.all())
+
+    actual = [
+        {"famid": 1, "birth": 1, "age": 1, "ht": 2.8},
+        {"famid": 1, "birth": 1, "age": 2, "ht": 3.4},
+        {"famid": 1, "birth": 2, "age": 1, "ht": 2.9},
+        {"famid": 1, "birth": 2, "age": 2, "ht": 3.8},
+        {"famid": 1, "birth": 3, "age": 1, "ht": 2.2},
+        {"famid": 1, "birth": 3, "age": 2, "ht": 2.9},
+        {"famid": 2, "birth": 1, "age": 1, "ht": 2.0},
+        {"famid": 2, "birth": 1, "age": 2, "ht": 3.2},
+        {"famid": 2, "birth": 2, "age": 1, "ht": 1.8},
+        {"famid": 2, "birth": 2, "age": 2, "ht": 2.8},
+        {"famid": 2, "birth": 3, "age": 1, "ht": 1.9},
+        {"famid": 2, "birth": 3, "age": 2, "ht": 2.4},
+        {"famid": 3, "birth": 1, "age": 1, "ht": 2.2},
+        {"famid": 3, "birth": 1, "age": 2, "ht": 3.3},
+        {"famid": 3, "birth": 2, "age": 1, "ht": 2.3},
+        {"famid": 3, "birth": 2, "age": 2, "ht": 3.4},
+        {"famid": 3, "birth": 3, "age": 1, "ht": 2.1},
+        {"famid": 3, "birth": 3, "age": 2, "ht": 2.9},
+    ]
+    actual = pl.DataFrame(actual).sort(by=cs.all())
+
+    assert_frame_equal(result, actual, check_dtype=False)
+
+
+def test_no_column_names(df_checks):
+    """
+    Test output if all the columns
+    are assigned to the index parameter.
+    """
+    assert_frame_equal(
+        df_checks.janitor.pivot_longer(index=cs.all()),
+        df_checks,
+    )
+
+
+@pytest.fixture
+def test_df():
+    """Fixture DataFrame"""
+    return pl.DataFrame(
+        {
+            "off_loc": ["A", "B", "C", "D", "E", "F"],
+            "pt_loc": ["G", "H", "I", "J", "K", "L"],
+            "pt_lat": [
+                100.07548220000001,
+                75.191326,
+                122.65134479999999,
+                124.13553329999999,
+                124.13553329999999,
+                124.01028909999998,
+            ],
+            "off_lat": [
+                121.271083,
+                75.93845266,
+                135.043791,
+                134.51128400000002,
+                134.484374,
+                137.962195,
+            ],
+            "pt_long": [
+                4.472089953,
+                -144.387785,
+                -40.45611048,
+                -46.07156181,
+                -46.07156181,
+                -46.01594293,
+            ],
+            "off_long": [
+                -7.188632000000001,
+                -143.2288569,
+                21.242563,
+                40.937416999999996,
+                40.78472,
+                22.905889000000002,
+            ],
+        }
+    )
+
+
+actual = [
+    {
+        "set": "off",
+        "loc": "A",
+        "lat": 121.271083,
+        "long": -7.188632000000001,
+    },
+    {"set": "off", "loc": "B", "lat": 75.93845266, "long": -143.2288569},
+    {"set": "off", "loc": "C", "lat": 135.043791, "long": 21.242563},
+    {
+        "set": "off",
+        "loc": "D",
+        "lat": 134.51128400000002,
+        "long": 40.937416999999996,
+    },
+    {"set": "off", "loc": "E", "lat": 134.484374, "long": 40.78472},
+    {
+        "set": "off",
+        "loc": "F",
+        "lat": 137.962195,
+        "long": 22.905889000000002,
+    },
+    {
+        "set": "pt",
+        "loc": "G",
+        "lat": 100.07548220000001,
+        "long": 4.472089953,
+    },
+    {"set": "pt", "loc": "H", "lat": 75.191326, "long": -144.387785},
+    {
+        "set": "pt",
+        "loc": "I",
+        "lat": 122.65134479999999,
+        "long": -40.45611048,
+    },
+    {
+        "set": "pt",
+        "loc": "J",
+        "lat": 124.13553329999999,
+        "long": -46.07156181,
+    },
+    {
+        "set": "pt",
+        "loc": "K",
+        "lat": 124.13553329999999,
+        "long": -46.07156181,
+    },
+    {
+        "set": "pt",
+        "loc": "L",
+        "lat": 124.01028909999998,
+        "long": -46.01594293,
+    },
+]
+
+actual = pl.DataFrame(actual).sort(by=pl.all())
+
+
+def test_names_pattern_str(test_df):
+    """Test output for names_pattern and .value."""
+
+    result = test_df.janitor.pivot_longer(
+        column_names=cs.all(),
+        names_to=["set", ".value"],
+        names_pattern="(.+)_(.+)",
+    ).sort(by=cs.all())
+    assert_frame_equal(result, actual)
+
+
+def test_names_sep_str(test_df):
+    """Test output for names_pattern and .value."""
+
+    result = test_df.janitor.pivot_longer(
+        column_names=cs.all(),
+        names_to=["set", ".value"],
+        names_sep="_",
+    ).sort(by=cs.all())
+    assert_frame_equal(result, actual)
+
+
+def test_names_pattern_list():
+    """Test output if names_pattern is a list/tuple."""
+
+    df = pl.DataFrame(
+        {
+            "Activity": ["P1", "P2"],
+            "General": ["AA", "BB"],
+            "m1": ["A1", "B1"],
+            "t1": ["TA1", "TB1"],
+            "m2": ["A2", "B2"],
+            "t2": ["TA2", "TB2"],
+            "m3": ["A3", "B3"],
+            "t3": ["TA3", "TB3"],
+        }
+    )
+
+    result = (
+        df.janitor.pivot_longer(
+            index=["Activity", "General"],
+            names_pattern=["^m", "^t"],
+            names_to=["M", "Task"],
+        )
+        .select(["Activity", "General", "Task", "M"])
+        .sort(by=pl.all())
+    )
+
+    actual = [
+        {"Activity": "P1", "General": "AA", "Task": "TA1", "M": "A1"},
+        {"Activity": "P1", "General": "AA", "Task": "TA2", "M": "A2"},
+        {"Activity": "P1", "General": "AA", "Task": "TA3", "M": "A3"},
+        {"Activity": "P2", "General": "BB", "Task": "TB1", "M": "B1"},
+        {"Activity": "P2", "General": "BB", "Task": "TB2", "M": "B2"},
+        {"Activity": "P2", "General": "BB", "Task": "TB3", "M": "B3"},
+    ]
+
+    actual = pl.DataFrame(actual).sort(by=pl.all())
+
+    assert_frame_equal(result, actual)
+
+
+@pytest.fixture
+def not_dot_value():
+    """Fixture DataFrame"""
+    return pl.DataFrame(
+        {
+            "country": ["United States", "Russia", "China"],
+            "vault_2012": [48.1, 46.4, 44.3],
+            "floor_2012": [45.4, 41.6, 40.8],
+            "vault_2016": [46.9, 45.7, 44.3],
+            "floor_2016": [46.0, 42.0, 42.1],
+        }
+    )
+
+
+actual2 = [
+    {"country": "China", "event": "floor", "year": "2012", "score": 40.8},
+    {"country": "China", "event": "floor", "year": "2016", "score": 42.1},
+    {"country": "China", "event": "vault", "year": "2012", "score": 44.3},
+    {"country": "China", "event": "vault", "year": "2016", "score": 44.3},
+    {"country": "Russia", "event": "floor", "year": "2012", "score": 41.6},
+    {"country": "Russia", "event": "floor", "year": "2016", "score": 42.0},
+    {"country": "Russia", "event": "vault", "year": "2012", "score": 46.4},
+    {"country": "Russia", "event": "vault", "year": "2016", "score": 45.7},
+    {
+        "country": "United States",
+        "event": "floor",
+        "year": "2012",
+        "score": 45.4,
+    },
+    {
+        "country": "United States",
+        "event": "floor",
+        "year": "2016",
+        "score": 46.0,
+    },
+    {
+        "country": "United States",
+        "event": "vault",
+        "year": "2012",
+        "score": 48.1,
+    },
+    {
+        "country": "United States",
+        "event": "vault",
+        "year": "2016",
+        "score": 46.9,
+    },
+]
+actual2 = pl.DataFrame(actual2).sort(by=pl.all())
+
+
+def test_not_dot_value_sep(not_dot_value):
+    """Test output when names_sep and no dot_value"""
+
+    result = not_dot_value.janitor.pivot_longer(
+        "country",
+        names_to=("event", "year"),
+        names_sep="_",
+        values_to="score",
+    ).sort(by=pl.all())
+
+    assert_frame_equal(result, actual2)
+
+
+def test_not_dot_value_sep2(not_dot_value):
+    """Test output when names_sep and no dot_value"""
+
+    result = not_dot_value.janitor.pivot_longer(
+        "country",
+        names_to="event",
+        names_sep="/",
+        values_to="score",
+    )
+
+    actual = not_dot_value.melt(
+        "country", variable_name="event", value_name="score"
+    )
+
+    assert_frame_equal(result, actual)
+
+
+def test_not_dot_value_pattern(not_dot_value):
+    """Test output when names_pattern is a string and no dot_value"""
+
+    result = not_dot_value.janitor.pivot_longer(
+        index="country",
+        names_to=("event", "year"),
+        names_pattern=r"(.+)_(.+)",
+        values_to="score",
+    ).sort(by=cs.all())
+
+    assert_frame_equal(result, actual2)
+
+
+def test_multiple_dot_value():
+    """Test output for multiple .value."""
+    df = pl.DataFrame(
+        {
+            "x_1_mean": [1, 2, 3, 4],
+            "x_2_mean": [1, 1, 0, 0],
+            "x_1_sd": [0, 1, 1, 1],
+            "x_2_sd": [0.739, 0.219, 1.46, 0.918],
+            "y_1_mean": [1, 2, 3, 4],
+            "y_2_mean": [1, 1, 0, 0],
+            "y_1_sd": [0, 1, 1, 1],
+            "y_2_sd": [-0.525, 0.623, -0.705, 0.662],
+            "unit": [1, 2, 3, 4],
+        }
+    )
+
+    result = df.janitor.pivot_longer(
+        index="unit",
+        names_to=(".value", "time", ".value"),
+        names_pattern=r"(x|y)_([0-9])(_mean|_sd)",
+        names_transform={"time": pl.Int64},
+    ).sort(by=cs.all())
+
+    actual = {
+        "unit": [1, 2, 3, 4, 1, 2, 3, 4],
+        "time": [1, 1, 1, 1, 2, 2, 2, 2],
+        "x_mean": [1, 2, 3, 4, 1, 1, 0, 0],
+        "x_sd": [0.0, 1.0, 1.0, 1.0, 0.739, 0.219, 1.46, 0.918],
+        "y_mean": [1, 2, 3, 4, 1, 1, 0, 0],
+        "y_sd": [0.0, 1.0, 1.0, 1.0, -0.525, 0.623, -0.705, 0.662],
+    }
+
+    actual = pl.DataFrame(actual).sort(by=cs.all())
+
+    assert_frame_equal(result, actual)
+
+
+@pytest.fixture
+def single_val():
+    """fixture dataframe"""
+    return pl.DataFrame(
+        {
+            "id": [1, 2, 3],
+            "x1": [4, 5, 6],
+            "x2": [5, 6, 7],
+        }
+    )
+
+
+def test_multiple_dot_value2(single_val):
+    """Test output for multiple .value."""
+
+    result = single_val.janitor.pivot_longer(
+        index="id", names_to=(".value", ".value"), names_pattern="(.)(.)"
+    )
+
+    assert_frame_equal(result, single_val)
+
+
+actual3 = [
+    {"id": 1, "x": 4},
+    {"id": 2, "x": 5},
+    {"id": 3, "x": 6},
+    {"id": 1, "x": 5},
+    {"id": 2, "x": 6},
+    {"id": 3, "x": 7},
+]
+
+actual3 = pl.DataFrame(actual3)
+
+
+def test_names_pattern_sequence_single_unique_column(single_val):
+    """
+    Test output if names_pattern is a sequence of length 1.
+    """
+
+    result = single_val.janitor.pivot_longer(
+        "id", names_to=["x"], names_pattern=("x",)
+    )
+
+    assert_frame_equal(result, actual3)
+
+
+def test_names_pattern_single_column(single_val):
+    """
+    Test output if names_to is only '.value'.
+    """
+
+    result = single_val.janitor.pivot_longer(
+        "id", names_to=".value", names_pattern="(.)."
+    )
+
+    assert_frame_equal(result, actual3)
+
+
+def test_names_pattern_single_column_not_dot_value(single_val):
+    """
+    Test output if names_to is not '.value'.
+    """
+    result = single_val.janitor.pivot_longer(
+        index="id", column_names="x1", names_to="yA", names_pattern="(.+)"
+    )
+
+    assert_frame_equal(
+        result,
+        single_val.melt(id_vars="id", value_vars="x1", variable_name="yA"),
+    )
+
+
+def test_names_pattern_single_column_not_dot_value1(single_val):
+    """
+    Test output if names_to is not '.value'.
+    """
+    result = single_val.select("x1").janitor.pivot_longer(
+        names_to="yA", names_pattern="(.+)"
+    )
+
+    assert_frame_equal(
+        result, single_val.select("x1").melt(variable_name="yA")
+    )
+
+
+@pytest.fixture
+def df_null():
+    "Dataframe with nulls."
+    return pl.DataFrame(
+        {
+            "family": [1, 2, 3, 4, 5],
+            "dob_child1": [
+                "1998-11-26",
+                "1996-06-22",
+                "2002-07-11",
+                "2004-10-10",
+                "2000-12-05",
+            ],
+            "dob_child2": [
+                "2000-01-29",
+                None,
+                "2004-04-05",
+                "2009-08-27",
+                "2005-02-28",
+            ],
+            "gender_child1": [1, 2, 2, 1, 2],
+            "gender_child2": [2.0, None, 2.0, 1.0, 1.0],
+        }
+    )
+
+
+def test_names_pattern_nulls_in_data(df_null):
+    """Test output if nulls are present in data."""
+    result = df_null.janitor.pivot_longer(
+        index="family",
+        names_to=[".value", "child"],
+        names_pattern=r"(.+)_(.+)",
+    ).sort(by=pl.all())
+
+    actual = [
+        {"family": 1, "child": "child1", "dob": "1998-11-26", "gender": 1.0},
+        {"family": 2, "child": "child1", "dob": "1996-06-22", "gender": 2.0},
+        {"family": 3, "child": "child1", "dob": "2002-07-11", "gender": 2.0},
+        {"family": 4, "child": "child1", "dob": "2004-10-10", "gender": 1.0},
+        {"family": 5, "child": "child1", "dob": "2000-12-05", "gender": 2.0},
+        {"family": 1, "child": "child2", "dob": "2000-01-29", "gender": 2.0},
+        {"family": 2, "child": "child2", "dob": None, "gender": None},
+        {"family": 3, "child": "child2", "dob": "2004-04-05", "gender": 2.0},
+        {"family": 4, "child": "child2", "dob": "2009-08-27", "gender": 1.0},
+        {"family": 5, "child": "child2", "dob": "2005-02-28", "gender": 1.0},
+    ]
+
+    actual = pl.DataFrame(actual).sort(by=pl.all())
+
+    assert_frame_equal(result, actual)
+
+
+@pytest.fixture
+def multiple_values_to():
+    """fixture for multiple values_to"""
+    # https://stackoverflow.com/q/51519101/7175713
+    return pl.DataFrame(
+        {
+            "City": ["Houston", "Austin", "Hoover"],
+            "State": ["Texas", "Texas", "Alabama"],
+            "Name": ["Aria", "Penelope", "Niko"],
+            "Mango": [4, 10, 90],
+            "Orange": [10, 8, 14],
+            "Watermelon": [40, 99, 43],
+            "Gin": [16, 200, 34],
+            "Vodka": [20, 33, 18],
+        },
+    )
+
+
+def test_output_values_to_seq(multiple_values_to):
+    """Test output when values_to is a list/tuple."""
+
+    expected = multiple_values_to.janitor.pivot_longer(
+        index=["City", "State"],
+        column_names=cs.numeric(),
+        names_to=("Fruit"),
+        values_to=("Pounds",),
+        names_pattern=[r"M|O|W"],
+    ).sort(by=cs.all())
+
+    actual = [
+        {"City": "Houston", "State": "Texas", "Fruit": "Mango", "Pounds": 4},
+        {"City": "Austin", "State": "Texas", "Fruit": "Mango", "Pounds": 10},
+        {"City": "Hoover", "State": "Alabama", "Fruit": "Mango", "Pounds": 90},
+        {"City": "Houston", "State": "Texas", "Fruit": "Orange", "Pounds": 10},
+        {"City": "Austin", "State": "Texas", "Fruit": "Orange", "Pounds": 8},
+        {
+            "City": "Hoover",
+            "State": "Alabama",
+            "Fruit": "Orange",
+            "Pounds": 14,
+        },
+        {
+            "City": "Houston",
+            "State": "Texas",
+            "Fruit": "Watermelon",
+            "Pounds": 40,
+        },
+        {
+            "City": "Austin",
+            "State": "Texas",
+            "Fruit": "Watermelon",
+            "Pounds": 99,
+        },
+        {
+            "City": "Hoover",
+            "State": "Alabama",
+            "Fruit": "Watermelon",
+            "Pounds": 43,
+        },
+    ]
+
+    actual = pl.DataFrame(actual).sort(by=pl.all())
+
+    assert_frame_equal(expected, actual)
+
+
+def test_output_values_to_seq1(multiple_values_to):
+    """Test output when values_to is a list/tuple."""
+    # https://stackoverflow.com/a/51520155/7175713
+    expected = (
+        multiple_values_to.janitor.pivot_longer(
+            index=["City", "State"],
+            column_names=cs.numeric(),
+            names_to=("Fruit", "Drink"),
+            values_to=("Pounds", "Ounces"),
+            names_pattern=[r"M|O|W", r"G|V"],
+        )
+        .with_columns(pl.col("Ounces").cast(float))
+        .sort(by=pl.all())
+    )
+
+    actual = {
+        "City": [
+            "Houston",
+            "Austin",
+            "Hoover",
+            "Houston",
+            "Austin",
+            "Hoover",
+            "Houston",
+            "Austin",
+            "Hoover",
+        ],
+        "State": [
+            "Texas",
+            "Texas",
+            "Alabama",
+            "Texas",
+            "Texas",
+            "Alabama",
+            "Texas",
+            "Texas",
+            "Alabama",
+        ],
+        "Fruit": [
+            "Mango",
+            "Mango",
+            "Mango",
+            "Orange",
+            "Orange",
+            "Orange",
+            "Watermelon",
+            "Watermelon",
+            "Watermelon",
+        ],
+        "Pounds": [4, 10, 90, 10, 8, 14, 40, 99, 43],
+        "Drink": [
+            "Gin",
+            "Gin",
+            "Gin",
+            "Vodka",
+            "Vodka",
+            "Vodka",
+            None,
+            None,
+            None,
+        ],
+        "Ounces": [16.0, 200.0, 34.0, 20.0, 33.0, 18.0, None, None, None],
+    }
+
+    actual = pl.DataFrame(actual).sort(by=pl.all())
+
+    assert_frame_equal(expected, actual)

From 959b08295a1a01337693ee741fef119006180590 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 30 Apr 2024 10:46:39 +1000
Subject: [PATCH 32/46] changelog

---
 CHANGELOG.md                                  |  2 +-
 janitor/polars/__init__.py                    | 14 +--
 janitor/polars/pivot_longer.py                | 98 +++++++++----------
 .../functions/test_pivot_longer_polars.py     | 22 ++---
 4 files changed, 65 insertions(+), 71 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5717193d6..8d95acbb5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
 # Changelog
 
 ## [Unreleased]
--  [ENH] Added a `clean_names` method for polars - it can be used to clean the column names, or clean column values . Issue #1343
+-  [ENH] Added a `pivot_longer` method for polars - Issue #1352
 
 ## [v0.27.0] - 2024-03-21
 
diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index 59b15ff72..c44cd635d 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -1,6 +1,6 @@
-from typing import Any, Optional, Sequence, Union
+from typing import Any, Iterable, Optional, Union
 
-from polars.type_aliases import ColumnNameOrSelector
+from polars.type_aliases import IntoExpr
 
 from janitor.utils import import_message
 
@@ -24,12 +24,8 @@ def __init__(self, df: pl.DataFrame) -> pl.DataFrame:
 
     def pivot_longer(
         self,
-        index: Union[
-            ColumnNameOrSelector, Sequence[ColumnNameOrSelector], None
-        ] = None,
-        column_names: Union[
-            ColumnNameOrSelector, Sequence[ColumnNameOrSelector], None
-        ] = None,
+        index: Union[IntoExpr, Iterable[IntoExpr], None] = None,
+        column_names: Union[IntoExpr, Iterable[IntoExpr], None] = None,
         names_to: Optional[Union[list, tuple, str]] = "variable",
         values_to: Optional[Union[list, tuple, str]] = "value",
         names_sep: Optional[Union[str, None]] = None,
@@ -317,8 +313,6 @@ def pivot_longer(
                 It takes the same
                 specification as polars' `str.extract_groups` method.
                 `names_pattern` can also be a list/tuple of regular expressions.
-                It can also be a list/tuple of strings;
-                the strings will be treated as regular expressions.
                 Under the hood it is processed with polars' `str.contains` function.
                 For a list/tuple of regular expressions,
                 `names_to` must also be a list/tuple and the lengths of both
diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
index 9bb94000b..ceb94903b 100644
--- a/janitor/polars/pivot_longer.py
+++ b/janitor/polars/pivot_longer.py
@@ -2,7 +2,7 @@
 
 from collections import defaultdict
 from itertools import chain
-from typing import Any, Mapping, Optional, Pattern, Sequence, Union
+from typing import Any, Iterable, Optional, Union
 
 from janitor.utils import check, import_message
 
@@ -10,7 +10,7 @@
     import polars as pl
     import polars.selectors as cs
     from polars.datatypes.classes import DataTypeClass
-    from polars.type_aliases import ColumnNameOrSelector, PolarsDataType
+    from polars.type_aliases import IntoExpr, PolarsDataType
 except ImportError:
     import_message(
         submodule="polars",
@@ -22,16 +22,17 @@
 
 def _pivot_longer(
     df: pl.DataFrame,
-    index: Union[ColumnNameOrSelector, Sequence[ColumnNameOrSelector], None],
-    column_names: Union[
-        ColumnNameOrSelector, Sequence[ColumnNameOrSelector], None
-    ],
+    index: Union[IntoExpr, Iterable[IntoExpr], None],
+    column_names: Union[IntoExpr, Iterable[IntoExpr], None],
     names_to: Optional[Union[list, str]],
     values_to: Optional[str],
-    names_sep: Optional[Union[str, Pattern, None]],
-    names_pattern: Optional[Union[list, tuple, str, Pattern, None]],
+    names_sep: Optional[Union[str, None]],
+    names_pattern: Optional[Union[list, tuple, str, None]],
     names_transform: Optional[Union[PolarsDataType, dict]],
 ) -> pl.DataFrame:
+    """
+    Unpivots a DataFrame to long form.
+    """
 
     (
         df,
@@ -64,6 +65,11 @@ def _pivot_longer(
             value_name=values_to,
         )
 
+    # the core idea is to do the transformation on the columns
+    # before flipping into long form
+    # typically less work is done this way
+    # compared to flipping and then processing the columns
+
     if names_sep is not None:
         return _pivot_longer_names_sep(
             df=df,
@@ -75,7 +81,7 @@ def _pivot_longer(
             names_transform=names_transform,
         )
 
-    if isinstance(names_pattern, (str, Pattern)):
+    if isinstance(names_pattern, str):
         return _pivot_longer_names_pattern_str(
             df=df,
             index=index,
@@ -107,15 +113,15 @@ def _pivot_longer(
 
 def _pivot_longer_names_sep(
     df: pl.DataFrame,
-    index: Sequence,
-    column_names: Sequence,
-    names_to: Sequence,
+    index: Iterable,
+    column_names: Iterable,
+    names_to: Iterable,
     names_sep: str,
     values_to: str,
     names_transform: dict,
 ) -> pl.DataFrame:
     """
-    This takes care of pivoting scenarios where
+    This takes care of unpivoting scenarios where
     names_sep is provided.
     """
 
@@ -167,15 +173,15 @@ def _pivot_longer_names_sep(
 
 def _pivot_longer_names_pattern_str(
     df: pl.DataFrame,
-    index: Union[Sequence, None],
-    column_names: Union[Sequence, None],
-    names_to: Sequence,
+    index: Iterable,
+    column_names: Iterable,
+    names_to: Iterable,
     names_pattern: str,
     values_to: str,
     names_transform: dict,
 ) -> pl.DataFrame:
     """
-    This takes care of pivoting scenarios where
+    This takes care of unpivoting scenarios where
     names_pattern is a string.
     """
 
@@ -222,15 +228,15 @@ def _pivot_longer_names_pattern_str(
 
 def _pivot_longer_values_to_sequence(
     df: pl.DataFrame,
-    index: Union[Sequence, None],
-    column_names: Union[Sequence, None],
-    names_to: Sequence,
-    names_pattern: Sequence,
-    values_to: Sequence,
+    index: Iterable,
+    column_names: Iterable,
+    names_to: Iterable,
+    names_pattern: Iterable,
+    values_to: Iterable,
     names_transform: dict,
 ) -> pl.DataFrame:
     """
-    This takes care of pivoting scenarios where
+    This takes care of unpivoting scenarios where
     values_to is a list/tuple.
     """
     columns = df.select(column_names).columns
@@ -299,13 +305,13 @@ def _pivot_longer_values_to_sequence(
 
 def _pivot_longer_names_pattern_sequence(
     df: pl.DataFrame,
-    index: Union[Sequence, None],
-    column_names: Union[Sequence, None],
-    names_to: Sequence,
-    names_pattern: Sequence,
+    index: Iterable,
+    column_names: Iterable,
+    names_to: Iterable,
+    names_pattern: Iterable,
 ) -> pl.DataFrame:
     """
-    This takes care of pivoting scenarios where
+    This takes care of unpivoting scenarios where
     names_pattern is a list/tuple.
     """
     columns = df.select(column_names).columns
@@ -358,19 +364,17 @@ def _pivot_longer_names_pattern_sequence(
 
 def _pivot_longer_no_dot_value(
     df: pl.DataFrame,
-    outcome: Mapping,
-    names_to: Sequence,
+    outcome: pl.Series,
+    names_to: Iterable,
     values_to: str,
-    index: Sequence,
-    columns: Sequence,
+    index: Iterable,
+    columns: Iterable,
     names_transform: dict,
-):
+) -> pl.DataFrame:
     """
     Reshape the data for scenarios where .value
     is not present in names_to,
     or names_to is not a list/tuple.
-
-    Returns a DataFrame.
     """
     contents = []
     for col_name, mapping in zip(columns, outcome):
@@ -395,17 +399,15 @@ def _pivot_longer_no_dot_value(
 
 def _pivot_longer_dot_value(
     df: pl.DataFrame,
-    names_to: Sequence,
-    outcome: pl.DataFrame,
-    index: Sequence,
-    columns: Sequence,
+    names_to: Iterable,
+    outcome: pl.Series,
+    index: Iterable,
+    columns: Iterable,
     names_transform: Union[PolarsDataType, dict],
 ) -> pl.DataFrame:
     """
     Pivots the dataframe into the final form,
     for scenarios where .value is in names_to.
-
-    Returns a DataFrame.
     """
     booleans = outcome.struct.unnest().select(pl.all().is_null().any())
     for position in range(len(names_to)):
@@ -477,16 +479,14 @@ def _pivot_longer_dot_value(
 
 def _pivot_longer_dot_value_only(
     df: pl.DataFrame,
-    names_to: Sequence,
-    outcome: pl.DataFrame,
-    index: Sequence,
-    columns: Sequence,
+    names_to: Iterable,
+    outcome: pl.Series,
+    index: Iterable,
+    columns: Iterable,
 ) -> pl.DataFrame:
     """
     Pivots the dataframe into the final form,
     for scenarios where only '.value' is present in names_to.
-
-    Returns a DataFrame.
     """
 
     if names_to.count(".value") > 1:
@@ -551,8 +551,8 @@ def _check_type_single(entry):
                 raise TypeError(
                     f"The argument passed to the {arg_name} parameter "
                     "should be a string type, a ColumnSelector,  "
-                    "or a list/tuple that contains "
-                    "a string and/or a ColumnSelector."
+                    "an expression or a list/tuple that contains "
+                    "a string and/or a ColumnSelector and/or an expression."
                 )
 
         if isinstance(arg_value, (list, tuple)):
diff --git a/tests/polars/functions/test_pivot_longer_polars.py b/tests/polars/functions/test_pivot_longer_polars.py
index 86d2636c8..ee3b59b60 100644
--- a/tests/polars/functions/test_pivot_longer_polars.py
+++ b/tests/polars/functions/test_pivot_longer_polars.py
@@ -309,7 +309,7 @@ def test_names_pat_str(df_checks):
         names_to=(".value", "age"),
         names_pattern="(.+)(.)",
         names_transform={"age": pl.Int64},
-    ).sort(by=cs.all())
+    ).sort(by=pl.all())
 
     actual = [
         {"famid": 1, "birth": 1, "age": 1, "ht": 2.8},
@@ -331,7 +331,7 @@ def test_names_pat_str(df_checks):
         {"famid": 3, "birth": 3, "age": 1, "ht": 2.1},
         {"famid": 3, "birth": 3, "age": 2, "ht": 2.9},
     ]
-    actual = pl.DataFrame(actual).sort(by=cs.all())
+    actual = pl.DataFrame(actual).sort(by=pl.all())
 
     assert_frame_equal(result, actual, check_dtype=False)
 
@@ -342,7 +342,7 @@ def test_no_column_names(df_checks):
     are assigned to the index parameter.
     """
     assert_frame_equal(
-        df_checks.janitor.pivot_longer(index=cs.all()),
+        df_checks.janitor.pivot_longer(index=pl.all()),
         df_checks,
     )
 
@@ -452,10 +452,10 @@ def test_names_pattern_str(test_df):
     """Test output for names_pattern and .value."""
 
     result = test_df.janitor.pivot_longer(
-        column_names=cs.all(),
+        column_names=pl.all(),
         names_to=["set", ".value"],
         names_pattern="(.+)_(.+)",
-    ).sort(by=cs.all())
+    ).sort(by=pl.all())
     assert_frame_equal(result, actual)
 
 
@@ -463,10 +463,10 @@ def test_names_sep_str(test_df):
     """Test output for names_pattern and .value."""
 
     result = test_df.janitor.pivot_longer(
-        column_names=cs.all(),
+        column_names=pl.all(),
         names_to=["set", ".value"],
         names_sep="_",
-    ).sort(by=cs.all())
+    ).sort(by=pl.all())
     assert_frame_equal(result, actual)
 
 
@@ -599,7 +599,7 @@ def test_not_dot_value_pattern(not_dot_value):
         names_to=("event", "year"),
         names_pattern=r"(.+)_(.+)",
         values_to="score",
-    ).sort(by=cs.all())
+    ).sort(by=pl.all())
 
     assert_frame_equal(result, actual2)
 
@@ -625,7 +625,7 @@ def test_multiple_dot_value():
         names_to=(".value", "time", ".value"),
         names_pattern=r"(x|y)_([0-9])(_mean|_sd)",
         names_transform={"time": pl.Int64},
-    ).sort(by=cs.all())
+    ).sort(by=pl.all())
 
     actual = {
         "unit": [1, 2, 3, 4, 1, 2, 3, 4],
@@ -636,7 +636,7 @@ def test_multiple_dot_value():
         "y_sd": [0.0, 1.0, 1.0, 1.0, -0.525, 0.623, -0.705, 0.662],
     }
 
-    actual = pl.DataFrame(actual).sort(by=cs.all())
+    actual = pl.DataFrame(actual).sort(by=pl.all())
 
     assert_frame_equal(result, actual)
 
@@ -805,7 +805,7 @@ def test_output_values_to_seq(multiple_values_to):
         names_to=("Fruit"),
         values_to=("Pounds",),
         names_pattern=[r"M|O|W"],
-    ).sort(by=cs.all())
+    ).sort(by=pl.all())
 
     actual = [
         {"City": "Houston", "State": "Texas", "Fruit": "Mango", "Pounds": 4},

From 317750379e3d24c5d6d79f6241ceafa0388e2b4a Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 30 Apr 2024 10:53:57 +1000
Subject: [PATCH 33/46] keep changes related only to pivot_longer

---
 janitor/functions/clean_names.py | 137 ++++++++++++++-----------------
 janitor/functions/utils.py       |  87 +-------------------
 janitor/spark/functions.py       |   2 +-
 3 files changed, 64 insertions(+), 162 deletions(-)

diff --git a/janitor/functions/clean_names.py b/janitor/functions/clean_names.py
index a38753fa8..71735a7fc 100644
--- a/janitor/functions/clean_names.py
+++ b/janitor/functions/clean_names.py
@@ -1,9 +1,7 @@
-"""Functions for cleaning columns/index names and/or column values."""
-
-from __future__ import annotations
+"""Functions for cleaning columns names."""
 
 import unicodedata
-from typing import Optional, Union
+from typing import Hashable, Optional, Union
 
 import pandas as pd
 import pandas_flavor as pf
@@ -79,9 +77,8 @@ def clean_names(
             Column selection is possible using the
             [`select`][janitor.functions.select.select] syntax.
         strip_underscores: Removes the outer underscores from all
-            column names/values. Default None keeps outer underscores.
-            Values can be either 'left', 'right' or 'both'
-            or the respective shorthand 'l',
+            column names. Default None keeps outer underscores. Values can be
+            either 'left', 'right' or 'both' or the respective shorthand 'l',
             'r' and True.
         case_type: Whether to make columns lower or uppercase.
             Current case may be preserved with 'preserve',
@@ -91,17 +88,15 @@ def clean_names(
         remove_special: Remove special characters from columns.
             Only letters, numbers and underscores are preserved.
         strip_accents: Whether or not to remove accents from
-            columns names/values.
+            columns names.
         preserve_original_labels: Preserve original names.
             This is later retrievable using `df.original_labels`.
             Applies if `axis` is not None.
-        enforce_string: Whether or not to convert all
-            column names/values to string type.
-            Defaults to True, but can be turned off.
+        enforce_string: Whether or not to convert all column names
+            to string type. Defaults to True, but can be turned off.
             Columns with >1 levels will not be converted by default.
-        truncate_limit: Truncates formatted column names/values
-            to the specified length.
-            Default None does not truncate.
+        truncate_limit: Truncates formatted column names to
+            the specified length. Default None does not truncate.
 
     Raises:
         ValueError: If `axis=None` and `column_names=None`.
@@ -121,7 +116,7 @@ def clean_names(
             column_names = [column_names]
         df = df.copy()
         for column_name in column_names:
-            df[column_name] = _clean_names(
+            df[column_name] = _clean_names_single_object(
                 obj=df[column_name],
                 enforce_string=enforce_string,
                 case_type=case_type,
@@ -141,7 +136,7 @@ def clean_names(
             for number in range(target_axis.nlevels)
         ]
         target_axis = [
-            _clean_names(
+            _clean_names_single_object(
                 obj=obj,
                 enforce_string=enforce_string,
                 case_type=case_type,
@@ -153,7 +148,7 @@ def clean_names(
             for obj in target_axis
         ]
     else:
-        target_axis = _clean_names(
+        target_axis = _clean_names_single_object(
             obj=target_axis,
             enforce_string=enforce_string,
             case_type=case_type,
@@ -169,108 +164,100 @@ def clean_names(
     return df
 
 
-def _clean_names(
+def _clean_names_single_object(
     obj: Union[pd.Index, pd.Series],
-    strip_underscores: Optional[Union[str, bool]] = None,
-    case_type: str = "lower",
-    remove_special: bool = False,
-    strip_accents: bool = False,
-    enforce_string: bool = False,
-    truncate_limit: int = None,
-) -> Union[pd.Index, pd.Series]:
+    enforce_string,
+    case_type,
+    remove_special,
+    strip_accents,
+    strip_underscores,
+    truncate_limit,
+):
     """
-    Generic function to clean labels in a pandas object.
+    Apply _clean_names on a single pandas object.
     """
-    if enforce_string and not _is_str_or_cat(obj):
+    if enforce_string and not (_is_str_or_cat(obj)):
         obj = obj.astype(str)
-    obj = _change_case(obj=obj, case_type=case_type)
-    obj = _normalize_1(obj=obj)
+    obj = _change_case(obj, case_type)
+    obj = _normalize_1(obj)
     if remove_special:
-        obj = obj.str.replace(
-            pat="[^A-Za-z_\\d]", repl="", regex=True
-        ).str.strip()
+        obj = obj.map(_remove_special)
     if strip_accents:
-        obj = _strip_accents(obj=obj)
+        obj = obj.map(_strip_accents)
     obj = obj.str.replace(pat="_+", repl="_", regex=True)
-    obj = _strip_underscores_func(
-        obj,
-        strip_underscores=strip_underscores,
-    )
+    obj = _strip_underscores_func(obj, strip_underscores=strip_underscores)
     if truncate_limit:
         obj = obj.str[:truncate_limit]
     return obj
 
 
-def _change_case(
-    obj: Union[pd.Index, pd.Series],
-    case_type: str,
-) -> Union[pd.Index, pd.Series]:
-    """Change case of labels in obj."""
+def _change_case(col: Union[pd.Index, pd.Series], case_type: str) -> str:
+    """Change case of labels in pandas object."""
     case_types = {"preserve", "upper", "lower", "snake"}
     case_type = case_type.lower()
     if case_type not in case_types:
         raise JanitorError(f"case_type must be one of: {case_types}")
-
     if case_type == "preserve":
-        return obj
+        return col
     if case_type == "upper":
-        return obj.str.upper()
+        return col.str.upper()
     if case_type == "lower":
-        return obj.str.lower()
+        return col.str.lower()
     # Implementation taken from: https://gist.github.com/jaytaylor/3660565
     # by @jtaylor
     return (
-        obj.str.replace(pat=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", regex=True)
+        col.str.replace(pat=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", regex=True)
         .str.replace(pat=r"([a-z0-9])([A-Z])", repl=r"\1_\2", regex=True)
         .str.lower()
     )
 
 
-def _normalize_1(
-    obj: Union[pd.Index, pd.Series]
-) -> Union[pd.Index, pd.Series]:
-    """Perform normalization of labels in obj."""
+def _remove_special(label: Hashable) -> str:
+    """Remove special characters from label."""
+    return "".join(
+        [item for item in str(label) if item.isalnum() or "_" in item]
+    )
+
+
+def _normalize_1(col: Union[pd.Index, pd.Series]) -> str:
+    """Perform normalization of labels in pandas object."""
     FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
     for search, replace in FIXES:
-        obj = obj.str.replace(pat=search, repl=replace, regex=True)
-
-    return obj
+        col = col.str.replace(pat=search, repl=replace, regex=True)
+    return col
 
 
-def _strip_accents(
-    obj: Union[pd.Index, pd.Series],
-) -> Union[pd.Index, pd.Series]:
+def _strip_accents(label: Hashable) -> str:
     """Remove accents from a label.
 
     Inspired from [StackOverflow][so].
 
     [so]: https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-strin
     """  # noqa: E501
-    return obj.map(
-        lambda f: "".join(
-            [
-                letter
-                for letter in unicodedata.normalize("NFD", str(f))
-                if not unicodedata.combining(letter)
-            ]
-        )
+
+    return "".join(
+        [
+            letter
+            for letter in unicodedata.normalize("NFD", str(label))
+            if not unicodedata.combining(letter)
+        ]
     )
 
 
 def _strip_underscores_func(
-    obj: Union[pd.Index, pd.Series],
-    strip_underscores: Union[str, bool] = None,
-) -> Union[pd.Index, pd.Series]:
-    """Strip underscores."""
+    col: Union[pd.Index, pd.Series], strip_underscores: Union[str, bool] = None
+) -> pd.DataFrame:
+    """Strip underscores from a pandas object."""
     underscore_options = {None, "left", "right", "both", "l", "r", True}
     if strip_underscores not in underscore_options:
         raise JanitorError(
             f"strip_underscores must be one of: {underscore_options}"
         )
-    if strip_underscores in {"left", "l"}:
-        return obj.str.lstrip("_")
-    if strip_underscores in {"right", "r"}:
-        return obj.str.rstrip("_")
+
+    if strip_underscores in ["left", "l"]:
+        return col.str.lstrip("_")
+    if strip_underscores in ["right", "r"]:
+        return col.str.rstrip("_")
     if strip_underscores in {True, "both"}:
-        return obj.str.strip("_")
-    return obj
+        return col.str.strip("_")
+    return col
diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py
index 4e1f443ee..8aa4d346b 100644
--- a/janitor/functions/utils.py
+++ b/janitor/functions/utils.py
@@ -5,7 +5,6 @@
 import fnmatch
 import inspect
 import re
-import unicodedata
 import warnings
 from collections.abc import Callable as dispatch_callable
 from dataclasses import dataclass
@@ -37,13 +36,7 @@
 from pandas.core.common import is_bool_indexer
 from pandas.core.groupby.generic import DataFrameGroupBy, SeriesGroupBy
 
-from janitor.errors import JanitorError
-from janitor.utils import (
-    _expand_grid,
-    check,
-    check_column,
-    find_stack_level,
-)
+from janitor.utils import _expand_grid, check, check_column, find_stack_level
 
 warnings.simplefilter("always", DeprecationWarning)
 
@@ -1140,81 +1133,3 @@ def __eq__(self, other):
         """
         self.join_args = (self.cols, other.cols, "==")
         return self
-
-
-def _change_case(
-    obj: str,
-    case_type: str,
-) -> str:
-    """Change case of obj."""
-    case_types = {"preserve", "upper", "lower", "snake"}
-    case_type = case_type.lower()
-    if case_type not in case_types:
-        raise JanitorError(f"type must be one of: {case_types}")
-
-    if case_type == "preserve":
-        return obj
-    if case_type == "upper":
-        return obj.upper()
-    if case_type == "lower":
-        return obj.lower()
-    # Implementation adapted from: https://gist.github.com/jaytaylor/3660565
-    # by @jtaylor
-    obj = re.sub(pattern=r"(.)([A-Z][a-z]+)", repl=r"\1_\2", string=obj)
-    obj = re.sub(pattern=r"([a-z0-9])([A-Z])", repl=r"\1_\2", string=obj)
-    return obj.lower()
-
-
-def _normalize_1(obj: str) -> str:
-    """Perform normalization of obj."""
-    FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
-    for search, replace in FIXES:
-        obj = re.sub(pattern=search, repl=replace, string=obj)
-
-    return obj
-
-
-def _remove_special(
-    obj: str,
-) -> str:
-    """Remove special characters from obj."""
-    obj = [item for item in obj if item.isalnum() or (item == "_")]
-    return "".join(obj)
-
-
-def _strip_accents(
-    obj: str,
-) -> str:
-    """Remove accents from obj.
-
-    Inspired from [StackOverflow][so].
-
-    [so]: https://stackoverflow.com/questions/517923/what-is-the-best-way-to-remove-accents-in-a-python-unicode-strin
-    """  # noqa: E501
-
-    obj = [
-        letter
-        for letter in unicodedata.normalize("NFD", obj)
-        if not unicodedata.combining(letter)
-    ]
-    return "".join(obj)
-
-
-def _strip_underscores_func(
-    obj: str,
-    strip_underscores: Union[str, bool] = None,
-) -> str:
-    """Strip underscores from obj."""
-    underscore_options = {None, "left", "right", "both", "l", "r", True}
-    if strip_underscores not in underscore_options:
-        raise JanitorError(
-            f"strip_underscores must be one of: {underscore_options}"
-        )
-
-    if strip_underscores in {"left", "l"}:
-        return obj.lstrip("_")
-    if strip_underscores in {"right", "r"}:
-        return obj.rstrip("_")
-    if strip_underscores in {True, "both"}:
-        return obj.strip("_")
-    return obj
diff --git a/janitor/spark/functions.py b/janitor/spark/functions.py
index 57abd1824..a43f7338d 100644
--- a/janitor/spark/functions.py
+++ b/janitor/spark/functions.py
@@ -4,7 +4,7 @@
 from typing import Union
 
 from janitor import utils as janitor_utils
-from janitor.functions.utils import (
+from janitor.functions.clean_names import (
     _change_case,
     _normalize_1,
     _remove_special,

From ee899b2404da373e15b840d8666f55769b6da662 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 30 Apr 2024 10:58:23 +1000
Subject: [PATCH 34/46] pd -> pl

---
 janitor/polars/__init__.py | 2 +-
 pyproject.toml             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index c44cd635d..d22e0b581 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -49,7 +49,7 @@ def pivot_longer(
             >>> import polars as pl
             >>> import polars.selectors as cs
             >>> import janitor.polars
-            >>> df = pd.DataFrame(
+            >>> df = pl.DataFrame(
             ...     {
             ...         "Sepal.Length": [5.1, 5.9],
             ...         "Sepal.Width": [3.5, 3.0],
diff --git a/pyproject.toml b/pyproject.toml
index 0a697589f..e1faf6275 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 55
+fail-under = 10
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 8ea9b712c83f592ebfcf582063023f2fbd1b9000 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 30 Apr 2024 10:58:30 +1000
Subject: [PATCH 35/46] pd -> pl

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e1faf6275..0a697589f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 10
+fail-under = 55
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From d12ae1aa00329be2699cdc2c3beab9a037a50ebf Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 30 Apr 2024 11:03:12 +1000
Subject: [PATCH 36/46] df.pivot_longer -> df.janitor.pivot_longer

---
 janitor/polars/__init__.py | 14 +++++++-------
 pyproject.toml             |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index d22e0b581..ba0930584 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -224,7 +224,7 @@ def pivot_longer(
             │ 514 ┆ 545 ┆ Red Sox ┆ 2007  ┆ 2008  │
             │ 573 ┆ 526 ┆ Yankees ┆ 2007  ┆ 2008  │
             └─────┴─────┴─────────┴───────┴───────┘
-            >>> df.pivot_longer(
+            >>> df.janitor.pivot_longer(
             ...     index = 'team',
             ...     names_to = ['year', 'hr'],
             ...     names_pattern = ['year', 'hr']
@@ -265,12 +265,12 @@ def pivot_longer(
             │ Hoover  ┆ Alabama ┆ Niko     ┆ 90    ┆ 14     ┆ 43         ┆ 34  ┆ 18    │
             └─────────┴─────────┴──────────┴───────┴────────┴────────────┴─────┴───────┘
 
-            >>> df.pivot_longer(
-            ...         index=["City", "State"],
-            ...         column_names=cs.numeric(),
-            ...         names_to=("Fruit", "Drink"),
-            ...         values_to=("Pounds", "Ounces"),
-            ...         names_pattern=["M|O|W", "G|V"],
+            >>> df.janitor.pivot_longer(
+            ...     index=["City", "State"],
+            ...     column_names=cs.numeric(),
+            ...     names_to=("Fruit", "Drink"),
+            ...     values_to=("Pounds", "Ounces"),
+            ...     names_pattern=["M|O|W", "G|V"],
             ...     )
             shape: (9, 6)
             ┌─────────┬─────────┬────────────┬────────┬───────┬────────┐
diff --git a/pyproject.toml b/pyproject.toml
index 0a697589f..e1faf6275 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 55
+fail-under = 10
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 652f3e3ffe78f361464ddb50569f12531b4cf6b2 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 30 Apr 2024 11:03:18 +1000
Subject: [PATCH 37/46] df.pivot_longer -> df.janitor.pivot_longer

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e1faf6275..0a697589f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 10
+fail-under = 55
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 9b9c1a940ba1ba833d4d5e755921f47c4c40e65a Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 30 Apr 2024 11:07:51 +1000
Subject: [PATCH 38/46] pd -> pl

---
 janitor/polars/__init__.py | 2 +-
 pyproject.toml             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index ba0930584..19f344386 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -242,7 +242,7 @@ def pivot_longer(
             └─────────┴─────┴──────┘
 
             Multiple `values_to`:
-            >>> df = pd.DataFrame(
+            >>> df = pl.DataFrame(
             ...         {
             ...             "City": ["Houston", "Austin", "Hoover"],
             ...             "State": ["Texas", "Texas", "Alabama"],
diff --git a/pyproject.toml b/pyproject.toml
index 0a697589f..e1faf6275 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 55
+fail-under = 10
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 69c273fb08bdf616de158730348970239bf4a160 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 30 Apr 2024 11:08:01 +1000
Subject: [PATCH 39/46] pd -> pl

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e1faf6275..0a697589f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 10
+fail-under = 55
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From b3391e8d5e166aa2a3b8f081d2bc8cf52c39db9b Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 30 Apr 2024 11:12:50 +1000
Subject: [PATCH 40/46] add >>> df

---
 janitor/polars/__init__.py | 1 +
 pyproject.toml             | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
index 19f344386..5c94ea870 100644
--- a/janitor/polars/__init__.py
+++ b/janitor/polars/__init__.py
@@ -254,6 +254,7 @@ def pivot_longer(
             ...             "Vodka": [20, 33, 18],
             ...         },
             ...     )
+            >>> df
             shape: (3, 8)
             ┌─────────┬─────────┬──────────┬───────┬────────┬────────────┬─────┬───────┐
             │ City    ┆ State   ┆ Name     ┆ Mango ┆ Orange ┆ Watermelon ┆ Gin ┆ Vodka │
diff --git a/pyproject.toml b/pyproject.toml
index 0a697589f..e1faf6275 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 55
+fail-under = 10
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 4ffaac5060ebc76a5f5599ffc8c4f863f034963d Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 30 Apr 2024 11:13:02 +1000
Subject: [PATCH 41/46] add >>> df

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e1faf6275..0a697589f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ target-version = ['py36', 'py37', 'py38']
 
 [tool.interrogate]
 exclude = ["setup.py", "docs", "nbconvert_config.py"]
-fail-under = 10
+fail-under = 55
 ignore-init-method = true
 ignore-init-module = true
 ignore-module = false

From 1de57bbfc44324ae1c56b2c2bb06cd42418f6a6c Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Tue, 30 Apr 2024 20:08:18 +1000
Subject: [PATCH 42/46] keep changes related only to polars pivot_longer

---
 janitor/functions/pivot.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/janitor/functions/pivot.py b/janitor/functions/pivot.py
index 51bc78419..7efeba45b 100644
--- a/janitor/functions/pivot.py
+++ b/janitor/functions/pivot.py
@@ -98,7 +98,7 @@ def pivot_longer(
         6     setosa   Petal.Width    0.2
         7  virginica   Petal.Width    1.8
 
-        Split the column labels into individual columns:
+        Split the column labels into parts:
         >>> df.pivot_longer(
         ...     index = 'Species',
         ...     names_to = ('part', 'dimension'),
@@ -167,7 +167,7 @@ def pivot_longer(
         value        int64
         dtype: object
 
-        Use multiple `.value` to reshape the dataframe:
+        Use multiple `.value` to reshape dataframe:
         >>> df = pd.DataFrame(
         ...     [
         ...         {
@@ -265,6 +265,16 @@ def pivot_longer(
         ...             "Gin": [16, 200, 34],
         ...             "Vodka": [20, 33, 18],
         ...         },
+        ...         columns=[
+        ...             "City",
+        ...             "State",
+        ...             "Name",
+        ...             "Mango",
+        ...             "Orange",
+        ...             "Watermelon",
+        ...             "Gin",
+        ...             "Vodka",
+        ...         ],
         ...     )
         >>> df
               City    State      Name  Mango  Orange  Watermelon  Gin  Vodka

From e4957908d1f9877ad8c8cd4b07e1a3a78d8da161 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Wed, 1 May 2024 20:16:31 +1000
Subject: [PATCH 43/46] add polars support to read_commandline

---
 CHANGELOG.md                   |   2 +-
 janitor/io.py                  |  28 +-
 janitor/polars/__init__.py     | 342 -----------------
 janitor/polars/pivot_longer.py | 669 ---------------------------------
 4 files changed, 23 insertions(+), 1018 deletions(-)
 delete mode 100644 janitor/polars/__init__.py
 delete mode 100644 janitor/polars/pivot_longer.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8d95acbb5..9aea6a879 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
 # Changelog
 
 ## [Unreleased]
--  [ENH] Added a `pivot_longer` method for polars - Issue #1352
+-  [ENH] `read_commandline` function now supports polars - Issue #1352
 
 ## [v0.27.0] - 2024-03-21
 
diff --git a/janitor/io.py b/janitor/io.py
index 1912afe8c..4741cb4d2 100644
--- a/janitor/io.py
+++ b/janitor/io.py
@@ -8,7 +8,7 @@
 from glob import glob
 from io import StringIO
 from itertools import chain
-from typing import IO, TYPE_CHECKING, Any, Iterable, Union
+from typing import IO, TYPE_CHECKING, Any, Iterable, Mapping, Union
 
 import pandas as pd
 
@@ -93,7 +93,7 @@ def read_csvs(
     return dfs_dict
 
 
-def read_commandline(cmd: str, **kwargs: Any) -> pd.DataFrame:
+def read_commandline(cmd: str, engine="pandas", **kwargs: Any) -> Mapping:
     """Read a CSV file based on a command-line command.
 
     For example, you may wish to run the following command on `sep-quarter.csv`
@@ -111,26 +111,42 @@ def read_commandline(cmd: str, **kwargs: Any) -> pd.DataFrame:
     ```
 
     This function assumes that your command line command will return
-    an output that is parsable using `pandas.read_csv` and StringIO.
-    We default to using `pd.read_csv` underneath the hood.
+    an output that is parsable using the relevant engine and StringIO.
+    This function defaults to using `pd.read_csv` underneath the hood.
     Keyword arguments are passed through to read_csv.
 
     Args:
         cmd: Shell command to preprocess a file on disk.
+        engine: DataFrame engine to process the output of the shell command.
+            Currently supports both pandas and polars.
         **kwargs: Keyword arguments that are passed through to
-            `pd.read_csv()`.
+            the engine's csv reader.
+
 
     Returns:
-        A pandas DataFrame parsed from the stdout of the underlying
+        A DataFrame parsed from the stdout of the underlying
             shell.
     """
 
     check("cmd", cmd, [str])
+    if engine not in {"pandas", "polars"}:
+        raise ValueError("engine should be either pandas or polars.")
     # adding check=True ensures that an explicit, clear error
     # is raised, so that the user can see the reason for the failure
     outcome = subprocess.run(
         cmd, shell=True, capture_output=True, text=True, check=True
     )
+    if engine == "polars":
+        try:
+            import polars as pl
+        except ImportError:
+            import_message(
+                submodule="polars",
+                package="polars",
+                conda_channel="conda-forge",
+                pip_install=True,
+            )
+        return pl.read_csv(StringIO(outcome.stdout), **kwargs)
     return pd.read_csv(StringIO(outcome.stdout), **kwargs)
 
 
diff --git a/janitor/polars/__init__.py b/janitor/polars/__init__.py
deleted file mode 100644
index 5c94ea870..000000000
--- a/janitor/polars/__init__.py
+++ /dev/null
@@ -1,342 +0,0 @@
-from typing import Any, Iterable, Optional, Union
-
-from polars.type_aliases import IntoExpr
-
-from janitor.utils import import_message
-
-from .pivot_longer import _pivot_longer
-
-try:
-    import polars as pl
-except ImportError:
-    import_message(
-        submodule="polars",
-        package="polars",
-        conda_channel="conda-forge",
-        pip_install=True,
-    )
-
-
-@pl.api.register_dataframe_namespace("janitor")
-class PolarsFrame:
-    def __init__(self, df: pl.DataFrame) -> pl.DataFrame:
-        self._df = df
-
-    def pivot_longer(
-        self,
-        index: Union[IntoExpr, Iterable[IntoExpr], None] = None,
-        column_names: Union[IntoExpr, Iterable[IntoExpr], None] = None,
-        names_to: Optional[Union[list, tuple, str]] = "variable",
-        values_to: Optional[Union[list, tuple, str]] = "value",
-        names_sep: Optional[Union[str, None]] = None,
-        names_pattern: Optional[Union[list, tuple, str, None]] = None,
-        names_transform: Optional[Any] = pl.Utf8,
-    ) -> pl.DataFrame:
-        """
-        Unpivots a DataFrame from *wide* to *long* format.
-
-        It is modeled after the `pivot_longer` function in R's tidyr package,
-        and also takes inspiration from the `melt` function in R's data.table package.
-
-        This function is useful to massage a DataFrame into a format where
-        one or more columns are considered measured variables, and all other
-        columns are considered as identifier variables.
-
-        All measured variables are *unpivoted* (and typically duplicated) along the
-        row axis.
-
-        Examples:
-            >>> import polars as pl
-            >>> import polars.selectors as cs
-            >>> import janitor.polars
-            >>> df = pl.DataFrame(
-            ...     {
-            ...         "Sepal.Length": [5.1, 5.9],
-            ...         "Sepal.Width": [3.5, 3.0],
-            ...         "Petal.Length": [1.4, 5.1],
-            ...         "Petal.Width": [0.2, 1.8],
-            ...         "Species": ["setosa", "virginica"],
-            ...     }
-            ... )
-            >>> df
-            shape: (2, 5)
-            ┌──────────────┬─────────────┬──────────────┬─────────────┬───────────┐
-            │ Sepal.Length ┆ Sepal.Width ┆ Petal.Length ┆ Petal.Width ┆ Species   │
-            │ ---          ┆ ---         ┆ ---          ┆ ---         ┆ ---       │
-            │ f64          ┆ f64         ┆ f64          ┆ f64         ┆ str       │
-            ╞══════════════╪═════════════╪══════════════╪═════════════╪═══════════╡
-            │ 5.1          ┆ 3.5         ┆ 1.4          ┆ 0.2         ┆ setosa    │
-            │ 5.9          ┆ 3.0         ┆ 5.1          ┆ 1.8         ┆ virginica │
-            └──────────────┴─────────────┴──────────────┴─────────────┴───────────┘
-
-            Replicate polars' [melt](https://docs.pola.rs/py-polars/html/reference/dataframe/api/polars.DataFrame.melt.html#polars-dataframe-melt):
-            >>> df.janitor.pivot_longer(index = 'Species')
-            shape: (8, 3)
-            ┌───────────┬──────────────┬───────┐
-            │ Species   ┆ variable     ┆ value │
-            │ ---       ┆ ---          ┆ ---   │
-            │ str       ┆ str          ┆ f64   │
-            ╞═══════════╪══════════════╪═══════╡
-            │ setosa    ┆ Sepal.Length ┆ 5.1   │
-            │ virginica ┆ Sepal.Length ┆ 5.9   │
-            │ setosa    ┆ Sepal.Width  ┆ 3.5   │
-            │ virginica ┆ Sepal.Width  ┆ 3.0   │
-            │ setosa    ┆ Petal.Length ┆ 1.4   │
-            │ virginica ┆ Petal.Length ┆ 5.1   │
-            │ setosa    ┆ Petal.Width  ┆ 0.2   │
-            │ virginica ┆ Petal.Width  ┆ 1.8   │
-            └───────────┴──────────────┴───────┘
-
-            Split the column labels into individual columns:
-            >>> df.janitor.pivot_longer(
-            ...     index = 'Species',
-            ...     names_to = ('part', 'dimension'),
-            ...     names_sep = '.',
-            ... )
-            shape: (8, 4)
-            ┌───────────┬───────┬───────────┬───────┐
-            │ Species   ┆ part  ┆ dimension ┆ value │
-            │ ---       ┆ ---   ┆ ---       ┆ ---   │
-            │ str       ┆ str   ┆ str       ┆ f64   │
-            ╞═══════════╪═══════╪═══════════╪═══════╡
-            │ setosa    ┆ Sepal ┆ Length    ┆ 5.1   │
-            │ virginica ┆ Sepal ┆ Length    ┆ 5.9   │
-            │ setosa    ┆ Sepal ┆ Width     ┆ 3.5   │
-            │ virginica ┆ Sepal ┆ Width     ┆ 3.0   │
-            │ setosa    ┆ Petal ┆ Length    ┆ 1.4   │
-            │ virginica ┆ Petal ┆ Length    ┆ 5.1   │
-            │ setosa    ┆ Petal ┆ Width     ┆ 0.2   │
-            │ virginica ┆ Petal ┆ Width     ┆ 1.8   │
-            └───────────┴───────┴───────────┴───────┘
-
-            Retain parts of the column names as headers:
-            >>> df.janitor.pivot_longer(
-            ...     index = 'Species',
-            ...     names_to = ('part', '.value'),
-            ...     names_sep = '.',
-            ... )
-            shape: (4, 4)
-            ┌───────────┬───────┬────────┬───────┐
-            │ Species   ┆ part  ┆ Length ┆ Width │
-            │ ---       ┆ ---   ┆ ---    ┆ ---   │
-            │ str       ┆ str   ┆ f64    ┆ f64   │
-            ╞═══════════╪═══════╪════════╪═══════╡
-            │ setosa    ┆ Sepal ┆ 5.1    ┆ 3.5   │
-            │ virginica ┆ Sepal ┆ 5.9    ┆ 3.0   │
-            │ setosa    ┆ Petal ┆ 1.4    ┆ 0.2   │
-            │ virginica ┆ Petal ┆ 5.1    ┆ 1.8   │
-            └───────────┴───────┴────────┴───────┘
-
-            Split the column labels based on regex:
-            >>> df = pl.DataFrame({"id": [1], "new_sp_m5564": [2], "newrel_f65": [3]})
-            >>> df
-            shape: (1, 3)
-            ┌─────┬──────────────┬────────────┐
-            │ id  ┆ new_sp_m5564 ┆ newrel_f65 │
-            │ --- ┆ ---          ┆ ---        │
-            │ i64 ┆ i64          ┆ i64        │
-            ╞═════╪══════════════╪════════════╡
-            │ 1   ┆ 2            ┆ 3          │
-            └─────┴──────────────┴────────────┘
-            >>> df.janitor.pivot_longer(
-            ...     index = 'id',
-            ...     names_to = ('diagnosis', 'gender', 'age'),
-            ...     names_pattern = r"new_?(.+)_(.)(\\d+)",
-            ... )
-            shape: (2, 5)
-            ┌─────┬───────────┬────────┬──────┬───────┐
-            │ id  ┆ diagnosis ┆ gender ┆ age  ┆ value │
-            │ --- ┆ ---       ┆ ---    ┆ ---  ┆ ---   │
-            │ i64 ┆ str       ┆ str    ┆ str  ┆ i64   │
-            ╞═════╪═══════════╪════════╪══════╪═══════╡
-            │ 1   ┆ sp        ┆ m      ┆ 5564 ┆ 2     │
-            │ 1   ┆ rel       ┆ f      ┆ 65   ┆ 3     │
-            └─────┴───────────┴────────┴──────┴───────┘
-
-            Convert the dtypes of specific columns with `names_transform`:
-            >>> (
-            ...     df.janitor.pivot_longer(
-            ...         index="id",
-            ...         names_to=("diagnosis", "gender", "age"),
-            ...         names_pattern=r"new_?(.+)_(.)(\\d+)",
-            ...         names_transform={"age": pl.Int32},
-            ...     )
-            ... )
-            shape: (2, 5)
-            ┌─────┬───────────┬────────┬──────┬───────┐
-            │ id  ┆ diagnosis ┆ gender ┆ age  ┆ value │
-            │ --- ┆ ---       ┆ ---    ┆ ---  ┆ ---   │
-            │ i64 ┆ str       ┆ str    ┆ i32  ┆ i64   │
-            ╞═════╪═══════════╪════════╪══════╪═══════╡
-            │ 1   ┆ sp        ┆ m      ┆ 5564 ┆ 2     │
-            │ 1   ┆ rel       ┆ f      ┆ 65   ┆ 3     │
-            └─────┴───────────┴────────┴──────┴───────┘
-
-            Use multiple `.value` to reshape the dataframe:
-            >>> df = pl.DataFrame(
-            ...     [
-            ...         {
-            ...             "x_1_mean": 10,
-            ...             "x_2_mean": 20,
-            ...             "y_1_mean": 30,
-            ...             "y_2_mean": 40,
-            ...             "unit": 50,
-            ...         }
-            ...     ]
-            ... )
-            >>> df
-            shape: (1, 5)
-            ┌──────────┬──────────┬──────────┬──────────┬──────┐
-            │ x_1_mean ┆ x_2_mean ┆ y_1_mean ┆ y_2_mean ┆ unit │
-            │ ---      ┆ ---      ┆ ---      ┆ ---      ┆ ---  │
-            │ i64      ┆ i64      ┆ i64      ┆ i64      ┆ i64  │
-            ╞══════════╪══════════╪══════════╪══════════╪══════╡
-            │ 10       ┆ 20       ┆ 30       ┆ 40       ┆ 50   │
-            └──────────┴──────────┴──────────┴──────────┴──────┘
-            >>> df.janitor.pivot_longer(
-            ...     index="unit",
-            ...     names_to=(".value", "time", ".value"),
-            ...     names_pattern=r"(x|y)_([0-9])(_mean)",
-            ... )
-            shape: (2, 4)
-            ┌──────┬──────┬────────┬────────┐
-            │ unit ┆ time ┆ x_mean ┆ y_mean │
-            │ ---  ┆ ---  ┆ ---    ┆ ---    │
-            │ i64  ┆ str  ┆ i64    ┆ i64    │
-            ╞══════╪══════╪════════╪════════╡
-            │ 50   ┆ 1    ┆ 10     ┆ 30     │
-            │ 50   ┆ 2    ┆ 20     ┆ 40     │
-            └──────┴──────┴────────┴────────┘
-
-            Reshape the dataframe by passing a sequence to `names_pattern`:
-            >>> df = pl.DataFrame({'hr1': [514, 573],
-            ...                    'hr2': [545, 526],
-            ...                    'team': ['Red Sox', 'Yankees'],
-            ...                    'year1': [2007, 2007],
-            ...                    'year2': [2008, 2008]})
-            >>> df
-            shape: (2, 5)
-            ┌─────┬─────┬─────────┬───────┬───────┐
-            │ hr1 ┆ hr2 ┆ team    ┆ year1 ┆ year2 │
-            │ --- ┆ --- ┆ ---     ┆ ---   ┆ ---   │
-            │ i64 ┆ i64 ┆ str     ┆ i64   ┆ i64   │
-            ╞═════╪═════╪═════════╪═══════╪═══════╡
-            │ 514 ┆ 545 ┆ Red Sox ┆ 2007  ┆ 2008  │
-            │ 573 ┆ 526 ┆ Yankees ┆ 2007  ┆ 2008  │
-            └─────┴─────┴─────────┴───────┴───────┘
-            >>> df.janitor.pivot_longer(
-            ...     index = 'team',
-            ...     names_to = ['year', 'hr'],
-            ...     names_pattern = ['year', 'hr']
-            ... )
-            shape: (4, 3)
-            ┌─────────┬─────┬──────┐
-            │ team    ┆ hr  ┆ year │
-            │ ---     ┆ --- ┆ ---  │
-            │ str     ┆ i64 ┆ i64  │
-            ╞═════════╪═════╪══════╡
-            │ Red Sox ┆ 514 ┆ 2007 │
-            │ Yankees ┆ 573 ┆ 2007 │
-            │ Red Sox ┆ 545 ┆ 2008 │
-            │ Yankees ┆ 526 ┆ 2008 │
-            └─────────┴─────┴──────┘
-
-            Multiple `values_to`:
-            >>> df = pl.DataFrame(
-            ...         {
-            ...             "City": ["Houston", "Austin", "Hoover"],
-            ...             "State": ["Texas", "Texas", "Alabama"],
-            ...             "Name": ["Aria", "Penelope", "Niko"],
-            ...             "Mango": [4, 10, 90],
-            ...             "Orange": [10, 8, 14],
-            ...             "Watermelon": [40, 99, 43],
-            ...             "Gin": [16, 200, 34],
-            ...             "Vodka": [20, 33, 18],
-            ...         },
-            ...     )
-            >>> df
-            shape: (3, 8)
-            ┌─────────┬─────────┬──────────┬───────┬────────┬────────────┬─────┬───────┐
-            │ City    ┆ State   ┆ Name     ┆ Mango ┆ Orange ┆ Watermelon ┆ Gin ┆ Vodka │
-            │ ---     ┆ ---     ┆ ---      ┆ ---   ┆ ---    ┆ ---        ┆ --- ┆ ---   │
-            │ str     ┆ str     ┆ str      ┆ i64   ┆ i64    ┆ i64        ┆ i64 ┆ i64   │
-            ╞═════════╪═════════╪══════════╪═══════╪════════╪════════════╪═════╪═══════╡
-            │ Houston ┆ Texas   ┆ Aria     ┆ 4     ┆ 10     ┆ 40         ┆ 16  ┆ 20    │
-            │ Austin  ┆ Texas   ┆ Penelope ┆ 10    ┆ 8      ┆ 99         ┆ 200 ┆ 33    │
-            │ Hoover  ┆ Alabama ┆ Niko     ┆ 90    ┆ 14     ┆ 43         ┆ 34  ┆ 18    │
-            └─────────┴─────────┴──────────┴───────┴────────┴────────────┴─────┴───────┘
-
-            >>> df.janitor.pivot_longer(
-            ...     index=["City", "State"],
-            ...     column_names=cs.numeric(),
-            ...     names_to=("Fruit", "Drink"),
-            ...     values_to=("Pounds", "Ounces"),
-            ...     names_pattern=["M|O|W", "G|V"],
-            ...     )
-            shape: (9, 6)
-            ┌─────────┬─────────┬────────────┬────────┬───────┬────────┐
-            │ City    ┆ State   ┆ Fruit      ┆ Pounds ┆ Drink ┆ Ounces │
-            │ ---     ┆ ---     ┆ ---        ┆ ---    ┆ ---   ┆ ---    │
-            │ str     ┆ str     ┆ str        ┆ i64    ┆ str   ┆ i64    │
-            ╞═════════╪═════════╪════════════╪════════╪═══════╪════════╡
-            │ Houston ┆ Texas   ┆ Mango      ┆ 4      ┆ Gin   ┆ 16     │
-            │ Austin  ┆ Texas   ┆ Mango      ┆ 10     ┆ Gin   ┆ 200    │
-            │ Hoover  ┆ Alabama ┆ Mango      ┆ 90     ┆ Gin   ┆ 34     │
-            │ Houston ┆ Texas   ┆ Orange     ┆ 10     ┆ Vodka ┆ 20     │
-            │ Austin  ┆ Texas   ┆ Orange     ┆ 8      ┆ Vodka ┆ 33     │
-            │ Hoover  ┆ Alabama ┆ Orange     ┆ 14     ┆ Vodka ┆ 18     │
-            │ Houston ┆ Texas   ┆ Watermelon ┆ 40     ┆ null  ┆ null   │
-            │ Austin  ┆ Texas   ┆ Watermelon ┆ 99     ┆ null  ┆ null   │
-            │ Hoover  ┆ Alabama ┆ Watermelon ┆ 43     ┆ null  ┆ null   │
-            └─────────┴─────────┴────────────┴────────┴───────┴────────┘
-
-        !!! info "New in version 0.28.0"
-
-        Args:
-            index: Column(s) or selector(s) to use as identifier variables.
-            column_names: Column(s) or selector(s) to unpivot.
-            names_to: Name of new column as a string that will contain
-                what were previously the column names in `column_names`.
-                The default is `variable` if no value is provided. It can
-                also be a list/tuple of strings that will serve as new column
-                names, if `name_sep` or `names_pattern` is provided.
-                If `.value` is in `names_to`, new column names will be extracted
-                from part of the existing column names and overrides `values_to`.
-            values_to: Name of new column as a string that will contain what
-                were previously the values of the columns in `column_names`.
-                `values_to` can also be a list/tuple
-                and requires that `names_pattern` is also a list/tuple.
-            names_sep: Determines how the column name is broken up, if
-                `names_to` contains multiple values. It takes the same
-                specification as polars' `str.split` method.
-            names_pattern: Determines how the column name is broken up.
-                It can be a regular expression containing matching groups.
-                It takes the same
-                specification as polars' `str.extract_groups` method.
-                `names_pattern` can also be a list/tuple of regular expressions.
-                Under the hood it is processed with polars' `str.contains` function.
-                For a list/tuple of regular expressions,
-                `names_to` must also be a list/tuple and the lengths of both
-                arguments must match.
-            names_transform: Use this option to change the types of columns that
-                have been transformed to rows.
-                This does not applies to the values' columns.
-                It can be a single valid polars dtype,
-                or a dictionary pairing the new column names
-                with a valid polars dtype.
-                Applicable only if one of names_sep
-                or names_pattern is provided.
-        Returns:
-            A polars DataFrame that has been unpivoted from wide to long
-                format.
-        """  # noqa: E501
-        return _pivot_longer(
-            df=self._df,
-            index=index,
-            column_names=column_names,
-            names_pattern=names_pattern,
-            names_sep=names_sep,
-            names_to=names_to,
-            values_to=values_to,
-            names_transform=names_transform,
-        )
diff --git a/janitor/polars/pivot_longer.py b/janitor/polars/pivot_longer.py
deleted file mode 100644
index ceb94903b..000000000
--- a/janitor/polars/pivot_longer.py
+++ /dev/null
@@ -1,669 +0,0 @@
-"""pivot_longer implementation for polars."""
-
-from collections import defaultdict
-from itertools import chain
-from typing import Any, Iterable, Optional, Union
-
-from janitor.utils import check, import_message
-
-try:
-    import polars as pl
-    import polars.selectors as cs
-    from polars.datatypes.classes import DataTypeClass
-    from polars.type_aliases import IntoExpr, PolarsDataType
-except ImportError:
-    import_message(
-        submodule="polars",
-        package="polars",
-        conda_channel="conda-forge",
-        pip_install=True,
-    )
-
-
-def _pivot_longer(
-    df: pl.DataFrame,
-    index: Union[IntoExpr, Iterable[IntoExpr], None],
-    column_names: Union[IntoExpr, Iterable[IntoExpr], None],
-    names_to: Optional[Union[list, str]],
-    values_to: Optional[str],
-    names_sep: Optional[Union[str, None]],
-    names_pattern: Optional[Union[list, tuple, str, None]],
-    names_transform: Optional[Union[PolarsDataType, dict]],
-) -> pl.DataFrame:
-    """
-    Unpivots a DataFrame to long form.
-    """
-
-    (
-        df,
-        index,
-        column_names,
-        names_to,
-        values_to,
-        names_sep,
-        names_pattern,
-        names_transform,
-    ) = _data_checks_pivot_longer(
-        df=df,
-        index=index,
-        column_names=column_names,
-        names_to=names_to,
-        values_to=values_to,
-        names_sep=names_sep,
-        names_pattern=names_pattern,
-        names_transform=names_transform,
-    )
-
-    if not column_names:
-        return df
-
-    if all((names_pattern is None, names_sep is None)):
-        return df.melt(
-            id_vars=index,
-            value_vars=column_names,
-            variable_name=names_to[0],
-            value_name=values_to,
-        )
-
-    # the core idea is to do the transformation on the columns
-    # before flipping into long form
-    # typically less work is done this way
-    # compared to flipping and then processing the columns
-
-    if names_sep is not None:
-        return _pivot_longer_names_sep(
-            df=df,
-            index=index,
-            column_names=column_names,
-            names_to=names_to,
-            names_sep=names_sep,
-            values_to=values_to,
-            names_transform=names_transform,
-        )
-
-    if isinstance(names_pattern, str):
-        return _pivot_longer_names_pattern_str(
-            df=df,
-            index=index,
-            column_names=column_names,
-            names_to=names_to,
-            names_pattern=names_pattern,
-            values_to=values_to,
-            names_transform=names_transform,
-        )
-    if isinstance(values_to, (list, tuple)):
-        return _pivot_longer_values_to_sequence(
-            df=df,
-            index=index,
-            column_names=column_names,
-            names_to=names_to,
-            names_pattern=names_pattern,
-            values_to=values_to,
-            names_transform=names_transform,
-        )
-
-    return _pivot_longer_names_pattern_sequence(
-        df=df,
-        index=index,
-        column_names=column_names,
-        names_to=names_to,
-        names_pattern=names_pattern,
-    )
-
-
-def _pivot_longer_names_sep(
-    df: pl.DataFrame,
-    index: Iterable,
-    column_names: Iterable,
-    names_to: Iterable,
-    names_sep: str,
-    values_to: str,
-    names_transform: dict,
-) -> pl.DataFrame:
-    """
-    This takes care of unpivoting scenarios where
-    names_sep is provided.
-    """
-
-    columns = df.select(column_names).columns
-    outcome = (
-        pl.Series(columns)
-        .str.split(by=names_sep)
-        .list.to_struct(n_field_strategy="max_width")
-    )
-    len_outcome = len(outcome.struct.fields)
-    len_names_to = len(names_to)
-    if len_names_to != len_outcome:
-        raise ValueError(
-            "The length of names_to does not match "
-            "the number of fields extracted. "
-            f"The length of names_to is {len_names_to} "
-            "while the number of fields extracted is "
-            f"{len_outcome}."
-        )
-
-    if ".value" not in names_to:
-        outcome = outcome.struct.rename_fields(names_to)
-        return _pivot_longer_no_dot_value(
-            df=df,
-            outcome=outcome,
-            values_to=values_to,
-            index=index,
-            columns=columns,
-            names_to=names_to,
-            names_transform=names_transform,
-        )
-    if all(label == ".value" for label in names_to):
-        return _pivot_longer_dot_value_only(
-            df=df,
-            names_to=names_to,
-            columns=columns,
-            index=index,
-            outcome=outcome,
-        )
-    return _pivot_longer_dot_value(
-        df=df,
-        names_to=names_to,
-        columns=columns,
-        index=index,
-        outcome=outcome,
-        names_transform=names_transform,
-    )
-
-
-def _pivot_longer_names_pattern_str(
-    df: pl.DataFrame,
-    index: Iterable,
-    column_names: Iterable,
-    names_to: Iterable,
-    names_pattern: str,
-    values_to: str,
-    names_transform: dict,
-) -> pl.DataFrame:
-    """
-    This takes care of unpivoting scenarios where
-    names_pattern is a string.
-    """
-
-    columns = df.select(column_names).columns
-    outcome = pl.Series(columns).str.extract_groups(names_pattern)
-    len_outcome = len(outcome.struct.fields)
-    len_names_to = len(names_to)
-    if len_names_to != len_outcome:
-        raise ValueError(
-            f"The length of names_to does not match "
-            "the number of fields extracted. "
-            f"The length of names_to is {len_names_to} "
-            "while the number of fields extracted is "
-            f"{len_outcome}."
-        )
-    if ".value" not in names_to:
-        outcome = outcome.struct.rename_fields(names_to)
-        return _pivot_longer_no_dot_value(
-            df=df,
-            outcome=outcome,
-            values_to=values_to,
-            index=index,
-            columns=columns,
-            names_to=names_to,
-            names_transform=names_transform,
-        )
-    if all(label == ".value" for label in names_to):
-        return _pivot_longer_dot_value_only(
-            df=df,
-            names_to=names_to,
-            columns=columns,
-            index=index,
-            outcome=outcome,
-        )
-    return _pivot_longer_dot_value(
-        df=df,
-        names_to=names_to,
-        columns=columns,
-        index=index,
-        outcome=outcome,
-        names_transform=names_transform,
-    )
-
-
-def _pivot_longer_values_to_sequence(
-    df: pl.DataFrame,
-    index: Iterable,
-    column_names: Iterable,
-    names_to: Iterable,
-    names_pattern: Iterable,
-    values_to: Iterable,
-    names_transform: dict,
-) -> pl.DataFrame:
-    """
-    This takes care of unpivoting scenarios where
-    values_to is a list/tuple.
-    """
-    columns = df.select(column_names).columns
-    outcome = pl.DataFrame({"cols": columns})
-    expressions = [
-        pl.col("cols").str.contains(pattern).alias(f"cols{num}")
-        for num, pattern in enumerate(names_pattern)
-    ]
-    outcome = outcome.with_columns(expressions)
-    booleans = outcome.select(pl.exclude("cols").any())
-    for position in range(len(names_pattern)):
-        if not booleans.to_series(position).item():
-            raise ValueError(
-                "No match was returned for the regex "
-                f"at position {position} -> {names_pattern[position]}."
-            )
-    names_booleans = pl
-    values_booleans = pl
-    for boolean, repl_name, repl_value in zip(
-        booleans.columns, names_to, values_to
-    ):
-        names_booleans = names_booleans.when(pl.col(boolean)).then(
-            pl.lit(repl_name)
-        )
-        values_booleans = values_booleans.when(pl.col(boolean)).then(
-            pl.lit(repl_value)
-        )
-    names_booleans = names_booleans.alias("value")
-    values_booleans = values_booleans.alias(".value")
-    filter_expr = pl.col(".value").is_not_null()
-    cum_expr = pl.col(".value").cum_count().over(".value").sub(1).alias("idx")
-    outcome = (
-        outcome.select(names_booleans, values_booleans, pl.col("cols"))
-        .filter(filter_expr)
-        .with_columns(cum_expr)
-    )
-    headers_dict = defaultdict(list)
-    non_headers_dict = defaultdict(list)
-    for num, col_name, value_header, name_header in zip(
-        outcome.get_column("idx"),
-        outcome.get_column("cols"),
-        outcome.get_column(".value"),
-        outcome.get_column("value"),
-    ):
-        non_headers_dict[num].append((col_name, name_header))
-        headers_dict[num].append((col_name, value_header))
-    contents = []
-    for key, value in headers_dict.items():
-        expression = [] if index is None else [pl.col(index)]
-        columns_to_select = [
-            pl.col(col_name).alias(repl_name) for col_name, repl_name in value
-        ]
-        expression.extend(columns_to_select)
-        columns_to_append = [
-            pl.lit(col_name, dtype=names_transform[repl_name]).alias(repl_name)
-            for col_name, repl_name in non_headers_dict[key]
-        ]
-
-        contents.append(df.select(expression).with_columns(columns_to_append))
-    columns_to_select = [] if not index else list(index)
-    columns_to_select.extend(chain.from_iterable(zip(names_to, values_to)))
-    return pl.concat(contents, how="diagonal_relaxed").select(
-        columns_to_select
-    )
-
-
-def _pivot_longer_names_pattern_sequence(
-    df: pl.DataFrame,
-    index: Iterable,
-    column_names: Iterable,
-    names_to: Iterable,
-    names_pattern: Iterable,
-) -> pl.DataFrame:
-    """
-    This takes care of unpivoting scenarios where
-    names_pattern is a list/tuple.
-    """
-    columns = df.select(column_names).columns
-    outcome = pl.DataFrame({"cols": columns})
-    expressions = [
-        pl.col("cols").str.contains(pattern).alias(f"cols{num}")
-        for num, pattern in enumerate(names_pattern)
-    ]
-    outcome = outcome.with_columns(expressions)
-    booleans = outcome.select(pl.exclude("cols").any())
-    for position in range(len(names_pattern)):
-        if not booleans.to_series(position).item():
-            raise ValueError(
-                "No match was returned for the regex "
-                f"at position {position} -> {names_pattern[position]}."
-            )
-    names_booleans = pl
-    for boolean, repl_name in zip(booleans.columns, names_to):
-        names_booleans = names_booleans.when(pl.col(boolean)).then(
-            pl.lit(repl_name)
-        )
-
-    names_booleans = names_booleans.alias(".value")
-    filter_expr = pl.col(".value").is_not_null()
-    cum_expr = pl.col(".value").cum_count().over(".value").sub(1).alias("idx")
-    outcome = (
-        outcome.select(names_booleans, pl.col("cols"))
-        .filter(filter_expr)
-        .with_columns(cum_expr)
-    )
-    headers_dict = defaultdict(list)
-    for num, col_name, name_header in zip(
-        outcome.get_column("idx"),
-        outcome.get_column("cols"),
-        outcome.get_column(".value"),
-    ):
-        headers_dict[num].append((col_name, name_header))
-
-    contents = []
-    for _, value in headers_dict.items():
-        expression = [] if index is None else [pl.col(index)]
-        columns_to_select = [
-            pl.col(col_name).alias(repl_name) for col_name, repl_name in value
-        ]
-        expression.extend(columns_to_select)
-
-        contents.append(df.select(expression))
-    return pl.concat(contents, how="diagonal_relaxed")
-
-
-def _pivot_longer_no_dot_value(
-    df: pl.DataFrame,
-    outcome: pl.Series,
-    names_to: Iterable,
-    values_to: str,
-    index: Iterable,
-    columns: Iterable,
-    names_transform: dict,
-) -> pl.DataFrame:
-    """
-    Reshape the data for scenarios where .value
-    is not present in names_to,
-    or names_to is not a list/tuple.
-    """
-    contents = []
-    for col_name, mapping in zip(columns, outcome):
-        expression = (
-            [pl.col(col_name)]
-            if index is None
-            else [pl.col(index), pl.col(col_name).alias(values_to)]
-        )
-        columns_to_append = [
-            pl.lit(label, dtype=names_transform[header]).alias(header)
-            for header, label in mapping.items()
-        ]
-        _frame = df.select(expression).with_columns(columns_to_append)
-        contents.append(_frame)
-    columns_to_select = [] if not index else list(index)
-    columns_to_select.extend(names_to)
-    columns_to_select.append(values_to)
-    return pl.concat(contents, how="diagonal_relaxed").select(
-        pl.col(columns_to_select)
-    )
-
-
-def _pivot_longer_dot_value(
-    df: pl.DataFrame,
-    names_to: Iterable,
-    outcome: pl.Series,
-    index: Iterable,
-    columns: Iterable,
-    names_transform: Union[PolarsDataType, dict],
-) -> pl.DataFrame:
-    """
-    Pivots the dataframe into the final form,
-    for scenarios where .value is in names_to.
-    """
-    booleans = outcome.struct.unnest().select(pl.all().is_null().any())
-    for position in range(len(names_to)):
-        if booleans.to_series(position).item():
-            raise ValueError(
-                f"Column labels '{columns[position]}' "
-                "could not be matched with any of the groups "
-                "in the provided regex. Kindly provide a regular expression "
-                "(with the correct groups) that matches all labels in the columns."
-            )
-    if names_to.count(".value") > 1:
-        cols = outcome.struct.fields
-        dot_value = [
-            cols[num]
-            for num, label in enumerate(names_to)
-            if label == ".value"
-        ]
-        not_dot_value = [
-            pl.col(field_name).alias(repl_name)
-            for field_name, repl_name in zip(cols, names_to)
-            if field_name not in dot_value
-        ]
-
-        outcome = outcome.struct.unnest().select(
-            pl.concat_str(dot_value).alias(".value"), *not_dot_value
-        )
-    else:
-        outcome = outcome.struct.rename_fields(names_to).struct.unnest()
-    idx = "".join(names_to)
-    not_dot_value = [name for name in names_to if name != ".value"]
-    outcome = outcome.with_row_index(idx).with_columns(
-        pl.col(idx).first().over(not_dot_value).rank("dense").sub(1),
-        pl.struct(not_dot_value),
-    )
-    headers_dict = defaultdict(list)
-    for num, col_name, repl_name in zip(
-        outcome.get_column(idx),
-        columns,
-        outcome.get_column(".value"),
-    ):
-        headers_dict[num].append((col_name, repl_name))
-
-    non_headers_dict = dict()
-    outcome = outcome.select(idx, not_dot_value[0]).unique()
-
-    for key, value in zip(outcome.to_series(0), outcome.to_series(1)):
-        value = [
-            pl.lit(stub_name, dtype=names_transform[repl_name]).alias(
-                repl_name
-            )
-            for repl_name, stub_name in value.items()
-        ]
-        non_headers_dict[key] = value
-    contents = []
-    for key, value in headers_dict.items():
-        expression = [] if index is None else [pl.col(index)]
-        columns_to_select = [
-            pl.col(col_name).alias(repl_name) for col_name, repl_name in value
-        ]
-        expression.extend(columns_to_select)
-        _frame = df.select(expression).with_columns(non_headers_dict[key])
-        contents.append(_frame)
-    columns_to_select = [] if not index else list(index)
-    columns_to_select.extend(not_dot_value)
-    return pl.concat(contents, how="diagonal_relaxed").select(
-        pl.col(columns_to_select), pl.exclude(columns_to_select)
-    )
-
-
-def _pivot_longer_dot_value_only(
-    df: pl.DataFrame,
-    names_to: Iterable,
-    outcome: pl.Series,
-    index: Iterable,
-    columns: Iterable,
-) -> pl.DataFrame:
-    """
-    Pivots the dataframe into the final form,
-    for scenarios where only '.value' is present in names_to.
-    """
-
-    if names_to.count(".value") > 1:
-        outcome = outcome.struct.unnest().select(
-            pl.concat_str(pl.all()).alias(".value")
-        )
-    else:
-        outcome = outcome.struct.rename_fields(names_to).struct.unnest()
-    outcome = outcome.with_columns(
-        pl.col(".value").cum_count().over(".value").sub(1).alias("idx")
-    )
-    headers_dict = defaultdict(list)
-    for num, col_name, repl_name in zip(
-        outcome.get_column("idx"),
-        columns,
-        outcome.get_column(".value"),
-    ):
-        headers_dict[num].append((col_name, repl_name))
-
-    contents = []
-    for _, value in headers_dict.items():
-        expression = [] if index is None else [pl.col(index)]
-        columns_to_select = [
-            pl.col(col_name).alias(repl_name) for col_name, repl_name in value
-        ]
-        expression.extend(columns_to_select)
-        contents.append(df.select(expression))
-
-    return pl.concat(contents, how="diagonal_relaxed")
-
-
-def _data_checks_pivot_longer(
-    df,
-    index,
-    column_names,
-    names_to,
-    values_to,
-    names_sep,
-    names_pattern,
-    names_transform,
-) -> tuple:
-    """
-    This function majorly does type checks on the passed arguments.
-
-    This function is executed before proceeding to the computation phase.
-
-    Type annotations are not provided because this function is where type
-    checking happens.
-    """
-
-    def _check_type(arg_name: str, arg_value: Any):
-        """
-        Raise if argument is not a valid type
-        """
-
-        def _check_type_single(entry):
-            if (
-                not isinstance(entry, str)
-                and not cs.is_selector(entry)
-                and not isinstance(entry, pl.Expr)
-            ):
-                raise TypeError(
-                    f"The argument passed to the {arg_name} parameter "
-                    "should be a string type, a ColumnSelector,  "
-                    "an expression or a list/tuple that contains "
-                    "a string and/or a ColumnSelector and/or an expression."
-                )
-
-        if isinstance(arg_value, (list, tuple)):
-            for entry in arg_value:
-                _check_type_single(entry=entry)
-        else:
-            _check_type_single(entry=arg_value)
-
-    if (index is None) and (column_names is None):
-        column_names = cs.expand_selector(df, pl.all())
-        index = []
-    elif (index is not None) and (column_names is not None):
-        _check_type(arg_name="index", arg_value=index)
-        index = cs.expand_selector(df, index)
-        _check_type(arg_name="column_names", arg_value=column_names)
-        column_names = cs.expand_selector(df, column_names)
-
-    elif (index is None) and (column_names is not None):
-        _check_type(arg_name="column_names", arg_value=column_names)
-        column_names = cs.expand_selector(df, column_names)
-        index = cs.expand_selector(df, pl.exclude(column_names))
-
-    elif (index is not None) and (column_names is None):
-        _check_type(arg_name="index", arg_value=index)
-        index = cs.expand_selector(df, index)
-        column_names = cs.expand_selector(df, pl.exclude(index))
-
-    check("names_to", names_to, [list, tuple, str])
-    if isinstance(names_to, (list, tuple)):
-        uniques = set()
-        for word in names_to:
-            check(f"'{word}' in names_to", word, [str])
-            if (word in uniques) and (word != ".value"):
-                raise ValueError(f"'{word}' is duplicated in names_to.")
-            uniques.add(word)
-    names_to = [names_to] if isinstance(names_to, str) else names_to
-
-    if names_sep and names_pattern:
-        raise ValueError(
-            "Only one of names_pattern or names_sep should be provided."
-        )
-
-    if names_sep is not None:
-        check("names_sep", names_sep, [str])
-
-    if names_pattern is not None:
-        check("names_pattern", names_pattern, [str, list, tuple])
-        if isinstance(names_pattern, (list, tuple)):
-            for word in names_pattern:
-                check(f"'{word}' in names_pattern", word, [str])
-            if ".value" in names_to:
-                raise ValueError(
-                    ".value is not accepted in names_to "
-                    "if names_pattern is a list/tuple."
-                )
-            if len(names_pattern) != len(names_to):
-                raise ValueError(
-                    f"The length of names_to does not match "
-                    "the number of regexes in names_pattern. "
-                    f"The length of names_to is {len(names_to)} "
-                    f"while the number of regexes is {len(names_pattern)}."
-                )
-
-    check("values_to", values_to, [str, list, tuple])
-    values_to_is_a_sequence = isinstance(values_to, (list, tuple))
-    names_pattern_is_a_sequence = isinstance(names_pattern, (list, tuple))
-    if values_to_is_a_sequence:
-        if not names_pattern_is_a_sequence:
-            raise TypeError(
-                "values_to can be a list/tuple only "
-                "if names_pattern is a list/tuple."
-            )
-
-        if len(names_pattern) != len(values_to):
-            raise ValueError(
-                f"The length of values_to does not match "
-                "the number of regexes in names_pattern. "
-                f"The length of values_to is {len(values_to)} "
-                f"while the number of regexes is {len(names_pattern)}."
-            )
-        uniques = set()
-        for word in values_to:
-            check(f"{word} in values_to", word, [str])
-            if word in uniques:
-                raise ValueError(f"'{word}' is duplicated in values_to.")
-            uniques.add(word)
-
-    columns_to_append = any(label != ".value" for label in names_to)
-    if values_to_is_a_sequence or columns_to_append:
-        check("names_transform", names_transform, [DataTypeClass, dict])
-        if isinstance(names_transform, dict):
-            for _, dtype in names_transform.items():
-                check(
-                    "dtype in the names_transform mapping",
-                    dtype,
-                    [DataTypeClass],
-                )
-            names_transform = {
-                label: names_transform.get(label, pl.Utf8)
-                for label in names_to
-            }
-        else:
-            names_transform = {label: names_transform for label in names_to}
-
-    return (
-        df,
-        index,
-        column_names,
-        names_to,
-        values_to,
-        names_sep,
-        names_pattern,
-        names_transform,
-    )

From a5c331a6062834cd21ad4b772970bae9b88a9d56 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Wed, 1 May 2024 20:23:27 +1000
Subject: [PATCH 44/46] remove irrelevant files

---
 .requirements/docs.in                         |   1 -
 mkdocs.yml                                    |   1 -
 mkdocs/api/polars.md                          |   6 -
 .../functions/test_pivot_longer_polars.py     | 913 ------------------
 4 files changed, 921 deletions(-)
 delete mode 100644 mkdocs/api/polars.md
 delete mode 100644 tests/polars/functions/test_pivot_longer_polars.py

diff --git a/.requirements/docs.in b/.requirements/docs.in
index b23e373aa..f0d4afc29 100644
--- a/.requirements/docs.in
+++ b/.requirements/docs.in
@@ -1,5 +1,4 @@
 mkdocs
-polars
 mkdocs-material
 mkdocstrings>=0.19.0
 mkdocstrings-python
diff --git a/mkdocs.yml b/mkdocs.yml
index a7545afc5..639d71bea 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -45,7 +45,6 @@ nav:
       - Machine Learning: api/ml.md
       - Math: api/math.md
       # - PySpark: api/pyspark.md  # will be added back later
-      - Polars: api/polars.md
       - Timeseries: api/timeseries.md
       - XArray: api/xarray.md
   - Development Guide: devguide.md
diff --git a/mkdocs/api/polars.md b/mkdocs/api/polars.md
deleted file mode 100644
index 17a6a87aa..000000000
--- a/mkdocs/api/polars.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# Polars
-
-::: janitor.polars
-    options:
-      members:
-      - PolarsFrame
diff --git a/tests/polars/functions/test_pivot_longer_polars.py b/tests/polars/functions/test_pivot_longer_polars.py
deleted file mode 100644
index ee3b59b60..000000000
--- a/tests/polars/functions/test_pivot_longer_polars.py
+++ /dev/null
@@ -1,913 +0,0 @@
-import polars as pl
-import polars.selectors as cs
-import pytest
-from polars.testing import assert_frame_equal
-
-from janitor import polars  # noqa: F401
-
-
-@pytest.fixture
-def df_checks():
-    """fixture dataframe"""
-    return pl.DataFrame(
-        {
-            "famid": [1, 1, 1, 2, 2, 2, 3, 3, 3],
-            "birth": [1, 2, 3, 1, 2, 3, 1, 2, 3],
-            "ht1": [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
-            "ht2": [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9],
-        }
-    )
-
-
-def test_type_index(df_checks):
-    """Raise TypeError if wrong type is provided for the index."""
-    msg = "The argument passed to the index parameter "
-    msg += "should be a string type, a ColumnSelector.+"
-    with pytest.raises(TypeError, match=msg):
-        df_checks.janitor.pivot_longer(index=2007, names_sep="_")
-
-
-def test_type_column_names(df_checks):
-    """Raise TypeError if wrong type is provided for column_names."""
-    msg = "The argument passed to the column_names parameter "
-    msg += "should be a string type, a ColumnSelector.+"
-    with pytest.raises(TypeError, match=msg):
-        df_checks.janitor.pivot_longer(column_names=2007, names_sep="_")
-
-
-def test_type_names_to(df_checks):
-    """Raise TypeError if wrong type is provided for names_to."""
-    msg = "names_to should be one of .+"
-    with pytest.raises(TypeError, match=msg):
-        df_checks.janitor.pivot_longer(names_to=2007, names_sep="_")
-
-
-def test_subtype_names_to(df_checks):
-    """
-    Raise TypeError if names_to is a sequence
-    and the wrong type is provided for entries
-    in names_to.
-    """
-    with pytest.raises(TypeError, match="'1' in names_to.+"):
-        df_checks.janitor.pivot_longer(names_to=[1], names_sep="_")
-
-
-def test_duplicate_names_to(df_checks):
-    """Raise error if names_to contains duplicates."""
-    with pytest.raises(ValueError, match="'y' is duplicated in names_to."):
-        df_checks.janitor.pivot_longer(
-            names_to=["y", "y"], names_pattern="(.+)(.)"
-        )
-
-
-def test_both_names_sep_and_pattern(df_checks):
-    """
-    Raise ValueError if both names_sep
-    and names_pattern is provided.
-    """
-    with pytest.raises(
-        ValueError,
-        match="Only one of names_pattern or names_sep should be provided.",
-    ):
-        df_checks.janitor.pivot_longer(
-            names_to=["rar", "bar"], names_sep="-", names_pattern="(.+)(.)"
-        )
-
-
-def test_name_pattern_wrong_type(df_checks):
-    """Raise TypeError if the wrong type is provided for names_pattern."""
-    with pytest.raises(TypeError, match="names_pattern should be one of.+"):
-        df_checks.janitor.pivot_longer(
-            names_to=["rar", "bar"], names_pattern=2007
-        )
-
-
-def test_names_pattern_wrong_subtype(df_checks):
-    """
-    Raise TypeError if names_pattern is a list/tuple
-    and wrong subtype is supplied.
-    """
-    with pytest.raises(TypeError, match="'1' in names_pattern.+"):
-        df_checks.janitor.pivot_longer(
-            names_to=["ht", "num"], names_pattern=[1, "\\d"]
-        )
-
-
-def test_names_pattern_names_to_unequal_length(df_checks):
-    """
-    Raise ValueError if names_pattern is a list/tuple
-    and wrong number of items in names_to.
-    """
-    with pytest.raises(
-        ValueError,
-        match="The length of names_to does not match "
-        "the number of regexes in names_pattern.+",
-    ):
-        df_checks.janitor.pivot_longer(
-            names_to=["variable"], names_pattern=["^ht", ".+i.+"]
-        )
-
-
-def test_names_pattern_names_to_dot_value(df_checks):
-    """
-    Raise Error if names_pattern is a list/tuple and
-    .value in names_to.
-    """
-    with pytest.raises(
-        ValueError,
-        match=".value is not accepted in names_to "
-        "if names_pattern is a list/tuple.",
-    ):
-        df_checks.janitor.pivot_longer(
-            names_to=["variable", ".value"], names_pattern=["^ht", ".+i.+"]
-        )
-
-
-def test_name_sep_wrong_type(df_checks):
-    """Raise TypeError if the wrong type is provided for names_sep."""
-    with pytest.raises(TypeError, match="names_sep should be one of.+"):
-        df_checks.janitor.pivot_longer(
-            names_to=[".value", "num"], names_sep=["_"]
-        )
-
-
-def test_values_to_wrong_type(df_checks):
-    """Raise TypeError if the wrong type is provided for `values_to`."""
-    with pytest.raises(TypeError, match="values_to should be one of.+"):
-        df_checks.janitor.pivot_longer(values_to={"salvo"}, names_sep="_")
-
-
-def test_values_to_wrong_type_names_pattern(df_checks):
-    """
-    Raise TypeError if `values_to` is a list,
-    and names_pattern is not.
-    """
-    with pytest.raises(
-        TypeError,
-        match="values_to can be a list/tuple only "
-        "if names_pattern is a list/tuple.",
-    ):
-        df_checks.janitor.pivot_longer(
-            values_to=["salvo"], names_pattern=r"(.)"
-        )
-
-
-def test_values_to_names_pattern_unequal_length(df_checks):
-    """
-    Raise ValueError if `values_to` is a list,
-    and the length of names_pattern
-    does not match the length of values_to.
-    """
-    with pytest.raises(
-        ValueError,
-        match="The length of values_to does not match "
-        "the number of regexes in names_pattern.+",
-    ):
-        df_checks.janitor.pivot_longer(
-            values_to=["salvo"],
-            names_pattern=["ht", r"\d"],
-            names_to=["foo", "bar"],
-        )
-
-
-def test_sub_values_to(df_checks):
-    """Raise error if values_to is a sequence, and contains non strings."""
-    with pytest.raises(TypeError, match="1 in values_to.+"):
-        df_checks.janitor.pivot_longer(
-            names_to=["x", "y"],
-            names_pattern=[r"ht", r"\d"],
-            values_to=[1, "salvo"],
-        )
-
-
-def test_duplicate_values_to(df_checks):
-    """Raise error if values_to is a sequence, and contains duplicates."""
-    with pytest.raises(
-        ValueError, match="'salvo' is duplicated in values_to."
-    ):
-        df_checks.janitor.pivot_longer(
-            names_to=["x", "y"],
-            names_pattern=[r"ht", r"\d"],
-            values_to=["salvo", "salvo"],
-        )
-
-
-def test_names_transform_wrong_type(df_checks):
-    """Raise TypeError if the wrong type is provided for `names_transform`."""
-    with pytest.raises(TypeError, match="names_transform should be one of.+"):
-        df_checks.janitor.pivot_longer(names_sep="_", names_transform=1)
-
-
-def test_names_transform_wrong_subtype(df_checks):
-    """
-    Raise TypeError if the wrong subtype
-    is provided for values in the
-    `names_transform` dictionary.
-    """
-    with pytest.raises(
-        TypeError,
-        match="dtype in the names_transform mapping should be one of.+",
-    ):
-        df_checks.janitor.pivot_longer(
-            names_sep="_", names_transform={"rar": 1}
-        )
-
-
-def test_names_pattern_list_empty_any(df_checks):
-    """
-    Raise ValueError if names_pattern is a list,
-    and not all matches are returned.
-    """
-    with pytest.raises(
-        ValueError, match="No match was returned for the regex.+"
-    ):
-        df_checks.janitor.pivot_longer(
-            index=["famid", "birth"],
-            names_to=["ht"],
-            names_pattern=["rar"],
-        )
-
-
-def test_names_pattern_no_match(df_checks):
-    """Raise error if names_pattern is a regex and returns no matches."""
-    with pytest.raises(
-        ValueError, match="Column labels .+ could not be matched with any .+"
-    ):
-        df_checks.janitor.pivot_longer(
-            index="famid",
-            names_to=[".value", "value"],
-            names_pattern=r"(rar)(.)",
-        )
-
-
-def test_names_pattern_incomplete_match(df_checks):
-    """
-    Raise error if names_pattern is a regex
-    and returns incomplete matches.
-    """
-    with pytest.raises(
-        ValueError, match="Column labels .+ could not be matched with any .+"
-    ):
-        df_checks.janitor.pivot_longer(
-            index="famid",
-            names_to=[".value", "value"],
-            names_pattern=r"(ht)(.)",
-        )
-
-
-def test_names_sep_len(df_checks):
-    """
-    Raise error if names_sep,
-    and the number of  matches returned
-    is not equal to the length of names_to.
-    """
-    msg = "The length of names_to does not match "
-    msg += "the number of fields extracted.+ "
-    with pytest.raises(ValueError, match=msg):
-        df_checks.janitor.pivot_longer(names_to=".value", names_sep="t")
-
-
-def test_pivot_index_only(df_checks):
-    """Test output if only index is passed."""
-    result = df_checks.janitor.pivot_longer(
-        index=["famid", "birth"],
-        names_to="dim",
-        values_to="num",
-    )
-
-    actual = df_checks.melt(
-        ["famid", "birth"], variable_name="dim", value_name="num"
-    )
-
-    assert_frame_equal(result, actual)
-
-
-def test_pivot_column_only(df_checks):
-    """Test output if only column_names is passed."""
-    result = df_checks.janitor.pivot_longer(
-        column_names=["ht1", "ht2"],
-        names_to="dim",
-        values_to="num",
-    )
-
-    actual = df_checks.melt(
-        id_vars=["famid", "birth"],
-        variable_name="dim",
-        value_name="num",
-    )
-
-    assert_frame_equal(result, actual)
-
-
-def test_names_pat_str(df_checks):
-    """
-    Test output when names_pattern is a string,
-    and .value is present.
-    """
-    result = df_checks.janitor.pivot_longer(
-        column_names=cs.starts_with("ht"),
-        names_to=(".value", "age"),
-        names_pattern="(.+)(.)",
-        names_transform={"age": pl.Int64},
-    ).sort(by=pl.all())
-
-    actual = [
-        {"famid": 1, "birth": 1, "age": 1, "ht": 2.8},
-        {"famid": 1, "birth": 1, "age": 2, "ht": 3.4},
-        {"famid": 1, "birth": 2, "age": 1, "ht": 2.9},
-        {"famid": 1, "birth": 2, "age": 2, "ht": 3.8},
-        {"famid": 1, "birth": 3, "age": 1, "ht": 2.2},
-        {"famid": 1, "birth": 3, "age": 2, "ht": 2.9},
-        {"famid": 2, "birth": 1, "age": 1, "ht": 2.0},
-        {"famid": 2, "birth": 1, "age": 2, "ht": 3.2},
-        {"famid": 2, "birth": 2, "age": 1, "ht": 1.8},
-        {"famid": 2, "birth": 2, "age": 2, "ht": 2.8},
-        {"famid": 2, "birth": 3, "age": 1, "ht": 1.9},
-        {"famid": 2, "birth": 3, "age": 2, "ht": 2.4},
-        {"famid": 3, "birth": 1, "age": 1, "ht": 2.2},
-        {"famid": 3, "birth": 1, "age": 2, "ht": 3.3},
-        {"famid": 3, "birth": 2, "age": 1, "ht": 2.3},
-        {"famid": 3, "birth": 2, "age": 2, "ht": 3.4},
-        {"famid": 3, "birth": 3, "age": 1, "ht": 2.1},
-        {"famid": 3, "birth": 3, "age": 2, "ht": 2.9},
-    ]
-    actual = pl.DataFrame(actual).sort(by=pl.all())
-
-    assert_frame_equal(result, actual, check_dtype=False)
-
-
-def test_no_column_names(df_checks):
-    """
-    Test output if all the columns
-    are assigned to the index parameter.
-    """
-    assert_frame_equal(
-        df_checks.janitor.pivot_longer(index=pl.all()),
-        df_checks,
-    )
-
-
-@pytest.fixture
-def test_df():
-    """Fixture DataFrame"""
-    return pl.DataFrame(
-        {
-            "off_loc": ["A", "B", "C", "D", "E", "F"],
-            "pt_loc": ["G", "H", "I", "J", "K", "L"],
-            "pt_lat": [
-                100.07548220000001,
-                75.191326,
-                122.65134479999999,
-                124.13553329999999,
-                124.13553329999999,
-                124.01028909999998,
-            ],
-            "off_lat": [
-                121.271083,
-                75.93845266,
-                135.043791,
-                134.51128400000002,
-                134.484374,
-                137.962195,
-            ],
-            "pt_long": [
-                4.472089953,
-                -144.387785,
-                -40.45611048,
-                -46.07156181,
-                -46.07156181,
-                -46.01594293,
-            ],
-            "off_long": [
-                -7.188632000000001,
-                -143.2288569,
-                21.242563,
-                40.937416999999996,
-                40.78472,
-                22.905889000000002,
-            ],
-        }
-    )
-
-
-actual = [
-    {
-        "set": "off",
-        "loc": "A",
-        "lat": 121.271083,
-        "long": -7.188632000000001,
-    },
-    {"set": "off", "loc": "B", "lat": 75.93845266, "long": -143.2288569},
-    {"set": "off", "loc": "C", "lat": 135.043791, "long": 21.242563},
-    {
-        "set": "off",
-        "loc": "D",
-        "lat": 134.51128400000002,
-        "long": 40.937416999999996,
-    },
-    {"set": "off", "loc": "E", "lat": 134.484374, "long": 40.78472},
-    {
-        "set": "off",
-        "loc": "F",
-        "lat": 137.962195,
-        "long": 22.905889000000002,
-    },
-    {
-        "set": "pt",
-        "loc": "G",
-        "lat": 100.07548220000001,
-        "long": 4.472089953,
-    },
-    {"set": "pt", "loc": "H", "lat": 75.191326, "long": -144.387785},
-    {
-        "set": "pt",
-        "loc": "I",
-        "lat": 122.65134479999999,
-        "long": -40.45611048,
-    },
-    {
-        "set": "pt",
-        "loc": "J",
-        "lat": 124.13553329999999,
-        "long": -46.07156181,
-    },
-    {
-        "set": "pt",
-        "loc": "K",
-        "lat": 124.13553329999999,
-        "long": -46.07156181,
-    },
-    {
-        "set": "pt",
-        "loc": "L",
-        "lat": 124.01028909999998,
-        "long": -46.01594293,
-    },
-]
-
-actual = pl.DataFrame(actual).sort(by=pl.all())
-
-
-def test_names_pattern_str(test_df):
-    """Test output for names_pattern and .value."""
-
-    result = test_df.janitor.pivot_longer(
-        column_names=pl.all(),
-        names_to=["set", ".value"],
-        names_pattern="(.+)_(.+)",
-    ).sort(by=pl.all())
-    assert_frame_equal(result, actual)
-
-
-def test_names_sep_str(test_df):
-    """Test output for names_pattern and .value."""
-
-    result = test_df.janitor.pivot_longer(
-        column_names=pl.all(),
-        names_to=["set", ".value"],
-        names_sep="_",
-    ).sort(by=pl.all())
-    assert_frame_equal(result, actual)
-
-
-def test_names_pattern_list():
-    """Test output if names_pattern is a list/tuple."""
-
-    df = pl.DataFrame(
-        {
-            "Activity": ["P1", "P2"],
-            "General": ["AA", "BB"],
-            "m1": ["A1", "B1"],
-            "t1": ["TA1", "TB1"],
-            "m2": ["A2", "B2"],
-            "t2": ["TA2", "TB2"],
-            "m3": ["A3", "B3"],
-            "t3": ["TA3", "TB3"],
-        }
-    )
-
-    result = (
-        df.janitor.pivot_longer(
-            index=["Activity", "General"],
-            names_pattern=["^m", "^t"],
-            names_to=["M", "Task"],
-        )
-        .select(["Activity", "General", "Task", "M"])
-        .sort(by=pl.all())
-    )
-
-    actual = [
-        {"Activity": "P1", "General": "AA", "Task": "TA1", "M": "A1"},
-        {"Activity": "P1", "General": "AA", "Task": "TA2", "M": "A2"},
-        {"Activity": "P1", "General": "AA", "Task": "TA3", "M": "A3"},
-        {"Activity": "P2", "General": "BB", "Task": "TB1", "M": "B1"},
-        {"Activity": "P2", "General": "BB", "Task": "TB2", "M": "B2"},
-        {"Activity": "P2", "General": "BB", "Task": "TB3", "M": "B3"},
-    ]
-
-    actual = pl.DataFrame(actual).sort(by=pl.all())
-
-    assert_frame_equal(result, actual)
-
-
-@pytest.fixture
-def not_dot_value():
-    """Fixture DataFrame"""
-    return pl.DataFrame(
-        {
-            "country": ["United States", "Russia", "China"],
-            "vault_2012": [48.1, 46.4, 44.3],
-            "floor_2012": [45.4, 41.6, 40.8],
-            "vault_2016": [46.9, 45.7, 44.3],
-            "floor_2016": [46.0, 42.0, 42.1],
-        }
-    )
-
-
-actual2 = [
-    {"country": "China", "event": "floor", "year": "2012", "score": 40.8},
-    {"country": "China", "event": "floor", "year": "2016", "score": 42.1},
-    {"country": "China", "event": "vault", "year": "2012", "score": 44.3},
-    {"country": "China", "event": "vault", "year": "2016", "score": 44.3},
-    {"country": "Russia", "event": "floor", "year": "2012", "score": 41.6},
-    {"country": "Russia", "event": "floor", "year": "2016", "score": 42.0},
-    {"country": "Russia", "event": "vault", "year": "2012", "score": 46.4},
-    {"country": "Russia", "event": "vault", "year": "2016", "score": 45.7},
-    {
-        "country": "United States",
-        "event": "floor",
-        "year": "2012",
-        "score": 45.4,
-    },
-    {
-        "country": "United States",
-        "event": "floor",
-        "year": "2016",
-        "score": 46.0,
-    },
-    {
-        "country": "United States",
-        "event": "vault",
-        "year": "2012",
-        "score": 48.1,
-    },
-    {
-        "country": "United States",
-        "event": "vault",
-        "year": "2016",
-        "score": 46.9,
-    },
-]
-actual2 = pl.DataFrame(actual2).sort(by=pl.all())
-
-
-def test_not_dot_value_sep(not_dot_value):
-    """Test output when names_sep and no dot_value"""
-
-    result = not_dot_value.janitor.pivot_longer(
-        "country",
-        names_to=("event", "year"),
-        names_sep="_",
-        values_to="score",
-    ).sort(by=pl.all())
-
-    assert_frame_equal(result, actual2)
-
-
-def test_not_dot_value_sep2(not_dot_value):
-    """Test output when names_sep and no dot_value"""
-
-    result = not_dot_value.janitor.pivot_longer(
-        "country",
-        names_to="event",
-        names_sep="/",
-        values_to="score",
-    )
-
-    actual = not_dot_value.melt(
-        "country", variable_name="event", value_name="score"
-    )
-
-    assert_frame_equal(result, actual)
-
-
-def test_not_dot_value_pattern(not_dot_value):
-    """Test output when names_pattern is a string and no dot_value"""
-
-    result = not_dot_value.janitor.pivot_longer(
-        index="country",
-        names_to=("event", "year"),
-        names_pattern=r"(.+)_(.+)",
-        values_to="score",
-    ).sort(by=pl.all())
-
-    assert_frame_equal(result, actual2)
-
-
-def test_multiple_dot_value():
-    """Test output for multiple .value."""
-    df = pl.DataFrame(
-        {
-            "x_1_mean": [1, 2, 3, 4],
-            "x_2_mean": [1, 1, 0, 0],
-            "x_1_sd": [0, 1, 1, 1],
-            "x_2_sd": [0.739, 0.219, 1.46, 0.918],
-            "y_1_mean": [1, 2, 3, 4],
-            "y_2_mean": [1, 1, 0, 0],
-            "y_1_sd": [0, 1, 1, 1],
-            "y_2_sd": [-0.525, 0.623, -0.705, 0.662],
-            "unit": [1, 2, 3, 4],
-        }
-    )
-
-    result = df.janitor.pivot_longer(
-        index="unit",
-        names_to=(".value", "time", ".value"),
-        names_pattern=r"(x|y)_([0-9])(_mean|_sd)",
-        names_transform={"time": pl.Int64},
-    ).sort(by=pl.all())
-
-    actual = {
-        "unit": [1, 2, 3, 4, 1, 2, 3, 4],
-        "time": [1, 1, 1, 1, 2, 2, 2, 2],
-        "x_mean": [1, 2, 3, 4, 1, 1, 0, 0],
-        "x_sd": [0.0, 1.0, 1.0, 1.0, 0.739, 0.219, 1.46, 0.918],
-        "y_mean": [1, 2, 3, 4, 1, 1, 0, 0],
-        "y_sd": [0.0, 1.0, 1.0, 1.0, -0.525, 0.623, -0.705, 0.662],
-    }
-
-    actual = pl.DataFrame(actual).sort(by=pl.all())
-
-    assert_frame_equal(result, actual)
-
-
-@pytest.fixture
-def single_val():
-    """fixture dataframe"""
-    return pl.DataFrame(
-        {
-            "id": [1, 2, 3],
-            "x1": [4, 5, 6],
-            "x2": [5, 6, 7],
-        }
-    )
-
-
-def test_multiple_dot_value2(single_val):
-    """Test output for multiple .value."""
-
-    result = single_val.janitor.pivot_longer(
-        index="id", names_to=(".value", ".value"), names_pattern="(.)(.)"
-    )
-
-    assert_frame_equal(result, single_val)
-
-
-actual3 = [
-    {"id": 1, "x": 4},
-    {"id": 2, "x": 5},
-    {"id": 3, "x": 6},
-    {"id": 1, "x": 5},
-    {"id": 2, "x": 6},
-    {"id": 3, "x": 7},
-]
-
-actual3 = pl.DataFrame(actual3)
-
-
-def test_names_pattern_sequence_single_unique_column(single_val):
-    """
-    Test output if names_pattern is a sequence of length 1.
-    """
-
-    result = single_val.janitor.pivot_longer(
-        "id", names_to=["x"], names_pattern=("x",)
-    )
-
-    assert_frame_equal(result, actual3)
-
-
-def test_names_pattern_single_column(single_val):
-    """
-    Test output if names_to is only '.value'.
-    """
-
-    result = single_val.janitor.pivot_longer(
-        "id", names_to=".value", names_pattern="(.)."
-    )
-
-    assert_frame_equal(result, actual3)
-
-
-def test_names_pattern_single_column_not_dot_value(single_val):
-    """
-    Test output if names_to is not '.value'.
-    """
-    result = single_val.janitor.pivot_longer(
-        index="id", column_names="x1", names_to="yA", names_pattern="(.+)"
-    )
-
-    assert_frame_equal(
-        result,
-        single_val.melt(id_vars="id", value_vars="x1", variable_name="yA"),
-    )
-
-
-def test_names_pattern_single_column_not_dot_value1(single_val):
-    """
-    Test output if names_to is not '.value'.
-    """
-    result = single_val.select("x1").janitor.pivot_longer(
-        names_to="yA", names_pattern="(.+)"
-    )
-
-    assert_frame_equal(
-        result, single_val.select("x1").melt(variable_name="yA")
-    )
-
-
-@pytest.fixture
-def df_null():
-    "Dataframe with nulls."
-    return pl.DataFrame(
-        {
-            "family": [1, 2, 3, 4, 5],
-            "dob_child1": [
-                "1998-11-26",
-                "1996-06-22",
-                "2002-07-11",
-                "2004-10-10",
-                "2000-12-05",
-            ],
-            "dob_child2": [
-                "2000-01-29",
-                None,
-                "2004-04-05",
-                "2009-08-27",
-                "2005-02-28",
-            ],
-            "gender_child1": [1, 2, 2, 1, 2],
-            "gender_child2": [2.0, None, 2.0, 1.0, 1.0],
-        }
-    )
-
-
-def test_names_pattern_nulls_in_data(df_null):
-    """Test output if nulls are present in data."""
-    result = df_null.janitor.pivot_longer(
-        index="family",
-        names_to=[".value", "child"],
-        names_pattern=r"(.+)_(.+)",
-    ).sort(by=pl.all())
-
-    actual = [
-        {"family": 1, "child": "child1", "dob": "1998-11-26", "gender": 1.0},
-        {"family": 2, "child": "child1", "dob": "1996-06-22", "gender": 2.0},
-        {"family": 3, "child": "child1", "dob": "2002-07-11", "gender": 2.0},
-        {"family": 4, "child": "child1", "dob": "2004-10-10", "gender": 1.0},
-        {"family": 5, "child": "child1", "dob": "2000-12-05", "gender": 2.0},
-        {"family": 1, "child": "child2", "dob": "2000-01-29", "gender": 2.0},
-        {"family": 2, "child": "child2", "dob": None, "gender": None},
-        {"family": 3, "child": "child2", "dob": "2004-04-05", "gender": 2.0},
-        {"family": 4, "child": "child2", "dob": "2009-08-27", "gender": 1.0},
-        {"family": 5, "child": "child2", "dob": "2005-02-28", "gender": 1.0},
-    ]
-
-    actual = pl.DataFrame(actual).sort(by=pl.all())
-
-    assert_frame_equal(result, actual)
-
-
-@pytest.fixture
-def multiple_values_to():
-    """fixture for multiple values_to"""
-    # https://stackoverflow.com/q/51519101/7175713
-    return pl.DataFrame(
-        {
-            "City": ["Houston", "Austin", "Hoover"],
-            "State": ["Texas", "Texas", "Alabama"],
-            "Name": ["Aria", "Penelope", "Niko"],
-            "Mango": [4, 10, 90],
-            "Orange": [10, 8, 14],
-            "Watermelon": [40, 99, 43],
-            "Gin": [16, 200, 34],
-            "Vodka": [20, 33, 18],
-        },
-    )
-
-
-def test_output_values_to_seq(multiple_values_to):
-    """Test output when values_to is a list/tuple."""
-
-    expected = multiple_values_to.janitor.pivot_longer(
-        index=["City", "State"],
-        column_names=cs.numeric(),
-        names_to=("Fruit"),
-        values_to=("Pounds",),
-        names_pattern=[r"M|O|W"],
-    ).sort(by=pl.all())
-
-    actual = [
-        {"City": "Houston", "State": "Texas", "Fruit": "Mango", "Pounds": 4},
-        {"City": "Austin", "State": "Texas", "Fruit": "Mango", "Pounds": 10},
-        {"City": "Hoover", "State": "Alabama", "Fruit": "Mango", "Pounds": 90},
-        {"City": "Houston", "State": "Texas", "Fruit": "Orange", "Pounds": 10},
-        {"City": "Austin", "State": "Texas", "Fruit": "Orange", "Pounds": 8},
-        {
-            "City": "Hoover",
-            "State": "Alabama",
-            "Fruit": "Orange",
-            "Pounds": 14,
-        },
-        {
-            "City": "Houston",
-            "State": "Texas",
-            "Fruit": "Watermelon",
-            "Pounds": 40,
-        },
-        {
-            "City": "Austin",
-            "State": "Texas",
-            "Fruit": "Watermelon",
-            "Pounds": 99,
-        },
-        {
-            "City": "Hoover",
-            "State": "Alabama",
-            "Fruit": "Watermelon",
-            "Pounds": 43,
-        },
-    ]
-
-    actual = pl.DataFrame(actual).sort(by=pl.all())
-
-    assert_frame_equal(expected, actual)
-
-
-def test_output_values_to_seq1(multiple_values_to):
-    """Test output when values_to is a list/tuple."""
-    # https://stackoverflow.com/a/51520155/7175713
-    expected = (
-        multiple_values_to.janitor.pivot_longer(
-            index=["City", "State"],
-            column_names=cs.numeric(),
-            names_to=("Fruit", "Drink"),
-            values_to=("Pounds", "Ounces"),
-            names_pattern=[r"M|O|W", r"G|V"],
-        )
-        .with_columns(pl.col("Ounces").cast(float))
-        .sort(by=pl.all())
-    )
-
-    actual = {
-        "City": [
-            "Houston",
-            "Austin",
-            "Hoover",
-            "Houston",
-            "Austin",
-            "Hoover",
-            "Houston",
-            "Austin",
-            "Hoover",
-        ],
-        "State": [
-            "Texas",
-            "Texas",
-            "Alabama",
-            "Texas",
-            "Texas",
-            "Alabama",
-            "Texas",
-            "Texas",
-            "Alabama",
-        ],
-        "Fruit": [
-            "Mango",
-            "Mango",
-            "Mango",
-            "Orange",
-            "Orange",
-            "Orange",
-            "Watermelon",
-            "Watermelon",
-            "Watermelon",
-        ],
-        "Pounds": [4, 10, 90, 10, 8, 14, 40, 99, 43],
-        "Drink": [
-            "Gin",
-            "Gin",
-            "Gin",
-            "Vodka",
-            "Vodka",
-            "Vodka",
-            None,
-            None,
-            None,
-        ],
-        "Ounces": [16.0, 200.0, 34.0, 20.0, 33.0, 18.0, None, None, None],
-    }
-
-    actual = pl.DataFrame(actual).sort(by=pl.all())
-
-    assert_frame_equal(expected, actual)

From 4d9c35feff182ac99bb760ed47bfd73472c0b0b1 Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Wed, 1 May 2024 20:30:39 +1000
Subject: [PATCH 45/46] minor edit to docs

---
 janitor/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/janitor/io.py b/janitor/io.py
index 4741cb4d2..4522be258 100644
--- a/janitor/io.py
+++ b/janitor/io.py
@@ -113,7 +113,7 @@ def read_commandline(cmd: str, engine="pandas", **kwargs: Any) -> Mapping:
     This function assumes that your command line command will return
     an output that is parsable using the relevant engine and StringIO.
     This function defaults to using `pd.read_csv` underneath the hood.
-    Keyword arguments are passed through to read_csv.
+    Keyword arguments are passed through as-is.
 
     Args:
         cmd: Shell command to preprocess a file on disk.

From 3b781c1a8e01848cc81a5341ad6b31914341a2bb Mon Sep 17 00:00:00 2001
From: "samuel.oranyeli" <samuel.oranyeli@grow.inc>
Date: Wed, 1 May 2024 21:42:09 +1000
Subject: [PATCH 46/46] xlsx_table now supports polars

---
 .requirements/docs.in |  1 +
 CHANGELOG.md          |  2 +-
 janitor/io.py         | 76 +++++++++++++++++++++++++++----------------
 mkdocs.yml            |  1 +
 4 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/.requirements/docs.in b/.requirements/docs.in
index f0d4afc29..b23e373aa 100644
--- a/.requirements/docs.in
+++ b/.requirements/docs.in
@@ -1,4 +1,5 @@
 mkdocs
+polars
 mkdocs-material
 mkdocstrings>=0.19.0
 mkdocstrings-python
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9aea6a879..7e0651811 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
 # Changelog
 
 ## [Unreleased]
--  [ENH] `read_commandline` function now supports polars - Issue #1352
+-  [ENH] `xlsx_table` function now supports polars - Issue #1352
 
 ## [v0.27.0] - 2024-03-21
 
diff --git a/janitor/io.py b/janitor/io.py
index 4522be258..4829b3e1c 100644
--- a/janitor/io.py
+++ b/janitor/io.py
@@ -93,7 +93,7 @@ def read_csvs(
     return dfs_dict
 
 
-def read_commandline(cmd: str, engine="pandas", **kwargs: Any) -> Mapping:
+def read_commandline(cmd: str, **kwargs: Any) -> pd.DataFrame:
     """Read a CSV file based on a command-line command.
 
     For example, you may wish to run the following command on `sep-quarter.csv`
@@ -111,42 +111,26 @@ def read_commandline(cmd: str, engine="pandas", **kwargs: Any) -> Mapping:
     ```
 
     This function assumes that your command line command will return
-    an output that is parsable using the relevant engine and StringIO.
-    This function defaults to using `pd.read_csv` underneath the hood.
-    Keyword arguments are passed through as-is.
+    an output that is parsable using `pandas.read_csv` and StringIO.
+    We default to using `pd.read_csv` underneath the hood.
+    Keyword arguments are passed through to read_csv.
 
     Args:
         cmd: Shell command to preprocess a file on disk.
-        engine: DataFrame engine to process the output of the shell command.
-            Currently supports both pandas and polars.
         **kwargs: Keyword arguments that are passed through to
-            the engine's csv reader.
-
+            `pd.read_csv()`.
 
     Returns:
-        A DataFrame parsed from the stdout of the underlying
+        A pandas DataFrame parsed from the stdout of the underlying
             shell.
     """
 
     check("cmd", cmd, [str])
-    if engine not in {"pandas", "polars"}:
-        raise ValueError("engine should be either pandas or polars.")
     # adding check=True ensures that an explicit, clear error
     # is raised, so that the user can see the reason for the failure
     outcome = subprocess.run(
         cmd, shell=True, capture_output=True, text=True, check=True
     )
-    if engine == "polars":
-        try:
-            import polars as pl
-        except ImportError:
-            import_message(
-                submodule="polars",
-                package="polars",
-                conda_channel="conda-forge",
-                pip_install=True,
-            )
-        return pl.read_csv(StringIO(outcome.stdout), **kwargs)
     return pd.read_csv(StringIO(outcome.stdout), **kwargs)
 
 
@@ -158,14 +142,15 @@ def xlsx_table(
     path: Union[str, IO, Workbook],
     sheetname: str = None,
     table: Union[str, list, tuple] = None,
-) -> Union[pd.DataFrame, dict]:
+    engine: str = "pandas",
+) -> Mapping:
     """Returns a DataFrame of values in a table in the Excel file.
 
     This applies to an Excel file, where the data range is explicitly
     specified as a Microsoft Excel table.
 
     If there is a single table in the sheet, or a string is provided
-    as an argument to the `table` parameter, a pandas DataFrame is returned;
+    as an argument to the `table` parameter, a DataFrame is returned;
     if there is more than one table in the sheet,
     and the `table` argument is `None`, or a list/tuple of names,
     a dictionary of DataFrames is returned, where the keys of the dictionary
@@ -173,6 +158,7 @@ def xlsx_table(
 
     Examples:
         >>> import pandas as pd
+        >>> import polars as pl
         >>> from janitor import xlsx_table
         >>> filename="../pyjanitor/tests/test_data/016-MSPTDA-Excel.xlsx"
 
@@ -186,6 +172,20 @@ def xlsx_table(
         3           4    Competition
         4           5  Long Distance
 
+        >>> xlsx_table(filename, table='dCategory', engine='polars')
+        shape: (5, 2)
+        ┌────────────┬───────────────┐
+        │ CategoryID ┆ Category      │
+        │ ---        ┆ ---           │
+        │ i64        ┆ str           │
+        ╞════════════╪═══════════════╡
+        │ 1          ┆ Beginner      │
+        │ 2          ┆ Advanced      │
+        │ 3          ┆ Freestyle     │
+        │ 4          ┆ Competition   │
+        │ 5          ┆ Long Distance │
+        └────────────┴───────────────┘
+
         Multiple tables:
 
         >>> out=xlsx_table(filename, table=["dCategory", "dSalesReps"])
@@ -205,6 +205,8 @@ def xlsx_table(
     Args:
           path: Path to the Excel File. It can also be an openpyxl Workbook.
           table: Name of a table, or list of tables in the sheet.
+          engine: DataFrame engine. Should be either pandas or polars.
+            Defaults to pandas
 
     Raises:
         AttributeError: If a workbook is provided, and is a ReadOnlyWorksheet.
@@ -212,7 +214,7 @@ def xlsx_table(
         KeyError: If the provided table does not exist in the sheet.
 
     Returns:
-        A pandas DataFrame, or a dictionary of DataFrames,
+        A DataFrame, or a dictionary of DataFrames,
             if there are multiple arguments for the `table` parameter,
             or the argument to `table` is `None`.
     """  # noqa : E501
@@ -235,6 +237,22 @@ def xlsx_table(
             DeprecationWarning,
             stacklevel=find_stack_level(),
         )
+    if engine not in {"pandas", "polars"}:
+        raise ValueError("engine should be one of pandas or polars.")
+    base_engine = pd
+    if engine == "polars":
+        try:
+            import polars as pl
+
+            base_engine = pl
+        except ImportError:
+            import_message(
+                submodule="polars",
+                package="polars",
+                conda_channel="conda-forge",
+                pip_install=True,
+            )
+
     if table is not None:
         check("table", table, [str, list, tuple])
         if isinstance(table, (list, tuple)):
@@ -261,13 +279,15 @@ def _create_dataframe_or_dictionary_from_table(
             header_exist = contents.headerRowCount
             coordinates = contents.ref
             data = worksheet[coordinates]
-            data = [[entry.value for entry in cell] for cell in data]
             if header_exist:
                 header, *data = data
+                header = [cell.value for cell in header]
             else:
                 header = [f"C{num}" for num in range(len(data[0]))]
-            data = pd.DataFrame(data, columns=header)
-            dictionary[table_name] = data
+            data = zip(*data)
+            data = ([entry.value for entry in cell] for cell in data)
+            data = dict(zip(header, data))
+            dictionary[table_name] = base_engine.DataFrame(data)
         return dictionary
 
     worksheets = [worksheet for worksheet in ws if worksheet.tables.items()]
diff --git a/mkdocs.yml b/mkdocs.yml
index 639d71bea..a7545afc5 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -45,6 +45,7 @@ nav:
       - Machine Learning: api/ml.md
       - Math: api/math.md
       # - PySpark: api/pyspark.md  # will be added back later
+      - Polars: api/polars.md
       - Timeseries: api/timeseries.md
       - XArray: api/xarray.md
   - Development Guide: devguide.md