Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into dark
Browse files Browse the repository at this point in the history
  • Loading branch information
DeaMariaLeon committed Feb 17, 2025
2 parents adec057 + 033a46c commit dc422c5
Show file tree
Hide file tree
Showing 14 changed files with 57 additions and 16 deletions.
6 changes: 3 additions & 3 deletions .binder/postBuild
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ set -e
# inside a git checkout of the scikit-learn/scikit-learn repo. This script is
# generating notebooks from the scikit-learn python examples.

if [[ ! -f /.dockerenv ]]; then
echo "This script was written for repo2docker and is supposed to run inside a docker container."
echo "Exiting because this script can delete data if run outside of a docker container."
if [[ -z "${REPO_DIR}" ]]; then
echo "This script was written for repo2docker and the REPO_DIR environment variable is supposed to be set."
echo "Exiting because this script can delete data if run outside of a repo2docker context."
exit 1
fi

Expand Down
5 changes: 5 additions & 0 deletions doc/modules/clustering.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ model with equal covariance per component.
:term:`inductive` clustering methods) are not designed to be applied to new,
unseen data.

.. rubric:: Examples

* :ref:`sphx_glr_auto_examples_cluster_plot_inductive_clustering.py`: An example
of an inductive clustering model for handling new data.

.. _k_means:

K-means
Expand Down
9 changes: 6 additions & 3 deletions sklearn/cluster/_affinity_propagation.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,6 @@ class AffinityPropagation(ClusterMixin, BaseEstimator):
Notes
-----
For an example usage,
see :ref:`sphx_glr_auto_examples_cluster_plot_affinity_propagation.py`.
The algorithmic complexity of affinity propagation is quadratic
in the number of points.
Expand Down Expand Up @@ -442,6 +439,12 @@ class AffinityPropagation(ClusterMixin, BaseEstimator):
>>> clustering.cluster_centers_
array([[1, 2],
[4, 2]])
For an example usage,
see :ref:`sphx_glr_auto_examples_cluster_plot_affinity_propagation.py`.
For a comparison of Affinity Propagation with other clustering algorithms, see
:ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py`
"""

_parameter_constraints: dict = {
Expand Down
3 changes: 3 additions & 0 deletions sklearn/cluster/_agglomerative.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,9 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator):
AgglomerativeClustering()
>>> clustering.labels_
array([1, 1, 1, 0, 0, 0])
For a comparison of Agglomerative clustering with other clustering algorithms, see
:ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py`
"""

_parameter_constraints: dict = {
Expand Down
3 changes: 3 additions & 0 deletions sklearn/cluster/_birch.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,9 @@ class Birch(
Birch(n_clusters=None)
>>> brc.predict(X)
array([0, 0, 0, 1, 1, 1])
For a comparison of the BIRCH clustering algorithm with other clustering algorithms,
see :ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py`
"""

_parameter_constraints: dict = {
Expand Down
9 changes: 6 additions & 3 deletions sklearn/cluster/_dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,6 @@ class DBSCAN(ClusterMixin, BaseEstimator):
Notes
-----
For an example, see
:ref:`sphx_glr_auto_examples_cluster_plot_dbscan.py`.
This implementation bulk-computes all neighborhood queries, which increases
the memory complexity to O(n.d) where d is the average number of neighbors,
while original DBSCAN had memory complexity O(n). It may attract a higher
Expand Down Expand Up @@ -322,6 +319,12 @@ class DBSCAN(ClusterMixin, BaseEstimator):
array([ 0, 0, 0, 1, 1, -1])
>>> clustering
DBSCAN(eps=3, min_samples=2)
For an example, see
:ref:`sphx_glr_auto_examples_cluster_plot_dbscan.py`.
For a comparison of DBSCAN with other clustering algorithms, see
:ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py`
"""

_parameter_constraints: dict = {
Expand Down
4 changes: 0 additions & 4 deletions sklearn/cluster/_hdbscan/hdbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,10 +427,6 @@ class HDBSCAN(ClusterMixin, BaseEstimator):
:class:`~sklearn.cluster.DBSCAN`), and be more robust to parameter selection.
Read more in the :ref:`User Guide <hdbscan>`.
For an example of how to use HDBSCAN, as well as a comparison to
:class:`~sklearn.cluster.DBSCAN`, please see the :ref:`plotting demo
<sphx_glr_auto_examples_cluster_plot_hdbscan.py>`.
.. versionadded:: 1.3
Parameters
Expand Down
3 changes: 3 additions & 0 deletions sklearn/cluster/_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -1873,6 +1873,9 @@ class MiniBatchKMeans(_BaseKMeans):
[1.06896552, 1. ]])
>>> kmeans.predict([[0, 0], [4, 4]])
array([1, 0], dtype=int32)
For a comparison of Mini-Batch K-Means clustering with other clustering algorithms,
see :ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py`
"""

_parameter_constraints: dict = {
Expand Down
3 changes: 3 additions & 0 deletions sklearn/cluster/_mean_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,9 @@ class MeanShift(ClusterMixin, BaseEstimator):
array([1, 0])
>>> clustering
MeanShift(bandwidth=2)
For a comparison of Mean Shift clustering with other clustering algorithms, see
:ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py`
"""

_parameter_constraints: dict = {
Expand Down
3 changes: 3 additions & 0 deletions sklearn/cluster/_optics.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@ class OPTICS(ClusterMixin, BaseEstimator):
For a more detailed example see
:ref:`sphx_glr_auto_examples_cluster_plot_optics.py`.
For a comparison of OPTICS with other clustering algorithms, see
:ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py`
"""

_parameter_constraints: dict = {
Expand Down
3 changes: 3 additions & 0 deletions sklearn/cluster/_spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,9 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
>>> clustering
SpectralClustering(assign_labels='discretize', n_clusters=2,
random_state=0)
For a comparison of Spectral clustering with other clustering algorithms, see
:ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py`
"""

_parameter_constraints: dict = {
Expand Down
4 changes: 2 additions & 2 deletions sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1492,7 +1492,7 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin
is always reserved for missing values. Must be no larger than 255.
categorical_features : array-like of {bool, int, str} of shape (n_features) \
or shape (n_categorical_features,), default=None
or shape (n_categorical_features,), default='from_dtype'
Indicates the categorical features.
- None : no feature will be considered categorical.
Expand Down Expand Up @@ -1880,7 +1880,7 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting):
``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin
is always reserved for missing values. Must be no larger than 255.
categorical_features : array-like of {bool, int, str} of shape (n_features) \
or shape (n_categorical_features,), default=None
or shape (n_categorical_features,), default='from_dtype'
Indicates the categorical features.
- None : no feature will be considered categorical.
Expand Down
15 changes: 14 additions & 1 deletion sklearn/metrics/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1611,7 +1611,7 @@ def test_multiclass_sample_weight_invariance(name):
@pytest.mark.parametrize(
"name",
sorted(
(MULTILABELS_METRICS | THRESHOLDED_MULTILABEL_METRICS | MULTIOUTPUT_METRICS)
(MULTILABELS_METRICS | THRESHOLDED_MULTILABEL_METRICS)
- METRICS_WITHOUT_SAMPLE_WEIGHT
),
)
Expand All @@ -1638,6 +1638,19 @@ def test_multilabel_sample_weight_invariance(name):
check_sample_weight_invariance(name, metric, y_true, y_pred)


@pytest.mark.parametrize(
"name",
sorted(MULTIOUTPUT_METRICS - METRICS_WITHOUT_SAMPLE_WEIGHT),
)
def test_multioutput_sample_weight_invariance(name):
random_state = check_random_state(0)
y_true = random_state.uniform(0, 2, size=(20, 5))
y_pred = random_state.uniform(0, 2, size=(20, 5))

metric = ALL_METRICS[name]
check_sample_weight_invariance(name, metric, y_true, y_pred)


def test_no_averaging_labels():
# test labels argument when not using averaging
# in multi-class and multi-label cases
Expand Down
3 changes: 3 additions & 0 deletions sklearn/mixture/_gaussian_mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,9 @@ class GaussianMixture(BaseMixture):
[ 1., 2.]])
>>> gm.predict([[0, 0], [12, 3]])
array([1, 0])
For a comparison of Gaussian Mixture with other clustering algorithms, see
:ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py`
"""

_parameter_constraints: dict = {
Expand Down

0 comments on commit dc422c5

Please sign in to comment.