Skip to content

Commit

Permalink
Add get_feature_names_out
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxHalford committed Sep 7, 2024
1 parent 0828ee6 commit 81b7a13
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 1 deletion.
2 changes: 1 addition & 1 deletion docs/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ theme = 'hugo-bearblog'
# Basic metadata configuration for your blog.
title = "Prince"
author = "Max Halford"
copyright = "Copyright © 2023, Max Halford."
copyright = "Copyright © 2024, Max Halford."
languageCode = "en-US"

# Generate a nice robots.txt for SEO
Expand Down
136 changes: 136 additions & 0 deletions docs/content/faq.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"+++\n",
"title = \"Frequently Asked Questions\"\n",
"menu = \"main\"\n",
"weight = 7\n",
"toc = true\n",
"aliases = [\"faq\"]\n",
"+++"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**How to use Prince with sklearn pipelines?**\n",
"\n",
"Prince estimators consume and produce pandas DataFrames. If you want to use them in a sklearn pipeline, you can [sklearn's `set_output` API](https://scikit-learn.org/stable/auto_examples/miscellaneous/plot_set_output.html). This way, you can tell sklearn that the pipeline should exchange DataFrames instead of numpy arrays between the steps."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>component</th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-2.264703</td>\n",
" <td>0.480027</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-2.080961</td>\n",
" <td>-0.674134</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-2.364229</td>\n",
" <td>-0.341908</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-2.299384</td>\n",
" <td>-0.597395</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>-2.389842</td>\n",
" <td>0.646835</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"component 0 1\n",
"0 -2.264703 0.480027\n",
"1 -2.080961 -0.674134\n",
"2 -2.364229 -0.341908\n",
"3 -2.299384 -0.597395\n",
"4 -2.389842 0.646835"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import prince\n",
"from sklearn import datasets\n",
"from sklearn import impute\n",
"from sklearn import pipeline\n",
"\n",
"pipe = pipeline.make_pipeline(\n",
" impute.SimpleImputer(),\n",
" prince.PCA()\n",
")\n",
"pipe.set_output(transform='pandas')\n",
"dataset = datasets.load_iris()\n",
"pipe.fit_transform(dataset.data).head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "prince-NQ1O93Uh-py3.11",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
3 changes: 3 additions & 0 deletions prince/mca.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ def _prepare(self, X):
X = pd.get_dummies(X, columns=X.columns)
return X

def get_feature_names_out(self, input_features=None):
return np.arange(self.n_components_)

@utils.check_is_dataframe_input
def fit(self, X, y=None):
"""Fit the MCA for the dataframe X.
Expand Down
3 changes: 3 additions & 0 deletions prince/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def _check_input(self, X):
if self.check_input:
sklearn.utils.check_array(X)

def get_feature_names_out(self, input_features=None):
return np.arange(self.n_components_)

@utils.check_is_dataframe_input
def fit(self, X, y=None, supplementary_columns=None):
self._check_input(X)
Expand Down
2 changes: 2 additions & 0 deletions tests/test_mca.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ def test_issue_131():
def test_issue_171():
"""
https://github.com/MaxHalford/prince/issues/171
>>> from sklearn import impute
>>> from sklearn import pipeline
Expand Down

0 comments on commit 81b7a13

Please sign in to comment.