Add get_feature_names_out

MaxHalford · Sep 7, 2024 · 81b7a13 · 81b7a13
1 parent 0828ee6
commit 81b7a13
Show file tree

Hide file tree

Showing 5 changed files with 145 additions and 1 deletion.
diff --git a/docs/config.toml b/docs/config.toml
@@ -8,7 +8,7 @@ theme = 'hugo-bearblog'
 # Basic metadata configuration for your blog.
 title = "Prince"
 author = "Max Halford"
-copyright = "Copyright © 2023, Max Halford."
+copyright = "Copyright © 2024, Max Halford."
 languageCode = "en-US"
 
 # Generate a nice robots.txt for SEO

diff --git a/docs/content/faq.ipynb b/docs/content/faq.ipynb
@@ -0,0 +1,136 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "+++\n",
+    "title = \"Frequently Asked Questions\"\n",
+    "menu = \"main\"\n",
+    "weight = 7\n",
+    "toc = true\n",
+    "aliases = [\"faq\"]\n",
+    "+++"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**How to use Prince with sklearn pipelines?**\n",
+    "\n",
+    "Prince estimators consume and produce pandas DataFrames. If you want to use them in a sklearn pipeline, you can [sklearn's `set_output` API](https://scikit-learn.org/stable/auto_examples/miscellaneous/plot_set_output.html). This way, you can tell sklearn that the pipeline should exchange DataFrames instead of numpy arrays between the steps."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>component</th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-2.264703</td>\n",
+       "      <td>0.480027</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-2.080961</td>\n",
+       "      <td>-0.674134</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-2.364229</td>\n",
+       "      <td>-0.341908</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-2.299384</td>\n",
+       "      <td>-0.597395</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-2.389842</td>\n",
+       "      <td>0.646835</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "component         0         1\n",
+       "0         -2.264703  0.480027\n",
+       "1         -2.080961 -0.674134\n",
+       "2         -2.364229 -0.341908\n",
+       "3         -2.299384 -0.597395\n",
+       "4         -2.389842  0.646835"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import prince\n",
+    "from sklearn import datasets\n",
+    "from sklearn import impute\n",
+    "from sklearn import pipeline\n",
+    "\n",
+    "pipe = pipeline.make_pipeline(\n",
+    "    impute.SimpleImputer(),\n",
+    "    prince.PCA()\n",
+    ")\n",
+    "pipe.set_output(transform='pandas')\n",
+    "dataset = datasets.load_iris()\n",
+    "pipe.fit_transform(dataset.data).head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "prince-NQ1O93Uh-py3.11",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/prince/mca.py b/prince/mca.py
@@ -37,6 +37,9 @@ def _prepare(self, X):
             X = pd.get_dummies(X, columns=X.columns)
         return X
 
+    def get_feature_names_out(self, input_features=None):
+        return np.arange(self.n_components_)
+
     @utils.check_is_dataframe_input
     def fit(self, X, y=None):
         """Fit the MCA for the dataframe X.

diff --git a/prince/pca.py b/prince/pca.py
@@ -67,6 +67,9 @@ def _check_input(self, X):
         if self.check_input:
             sklearn.utils.check_array(X)
 
+    def get_feature_names_out(self, input_features=None):
+        return np.arange(self.n_components_)
+
     @utils.check_is_dataframe_input
     def fit(self, X, y=None, supplementary_columns=None):
         self._check_input(X)

diff --git a/tests/test_mca.py b/tests/test_mca.py
@@ -138,6 +138,8 @@ def test_issue_131():
 def test_issue_171():
     """
 
+    https://github.com/MaxHalford/prince/issues/171
+
     >>> from sklearn import impute
     >>> from sklearn import pipeline