From 31a1ba07ceb0b9fe3cdeebbe3df594b7e0592901 Mon Sep 17 00:00:00 2001 From: Brian Hie Date: Sun, 3 May 2020 14:15:37 -0400 Subject: [PATCH] remove custom tsne --- scanorama/__init__.py | 2 +- scanorama/scanorama.py | 10 +- scanorama/t_sne_approx.py | 886 -------------------------------------- setup.py | 4 +- 4 files changed, 8 insertions(+), 894 deletions(-) delete mode 100644 scanorama/t_sne_approx.py diff --git a/scanorama/__init__.py b/scanorama/__init__.py index e4a76be..c17ffa1 100644 --- a/scanorama/__init__.py +++ b/scanorama/__init__.py @@ -1,3 +1,3 @@ from .scanorama import * -__version__ = '1.5.2' +__version__ = '1.6' diff --git a/scanorama/scanorama.py b/scanorama/scanorama.py index 6c86b79..dad5139 100644 --- a/scanorama/scanorama.py +++ b/scanorama/scanorama.py @@ -13,7 +13,6 @@ import sys import warnings -from .t_sne_approx import TSNEApprox from .utils import plt, dispersion, reduce_dimensionality from .utils import visualize_cluster, visualize_expr, visualize_dropout from .utils import handle_zeros_in_scale @@ -421,14 +420,15 @@ def visualize(assembled, labels, namespace, data_names, n_iter=N_ITER, perplexity=PERPLEXITY, verbose=VERBOSE, learn_rate=200., early_exag=12., embedding=None, shuffle_ds=False, size=1, multicore_tsne=True, - image_suffix='.svg', viz_cluster=False, colors=None): + image_suffix='.svg', viz_cluster=False, colors=None, + random_state=None,): # Fit t-SNE. if embedding is None: try: from MulticoreTSNE import MulticoreTSNE tsne = MulticoreTSNE( n_iter=n_iter, perplexity=perplexity, - verbose=verbose, random_state=69, + verbose=verbose, random_state=random_state, learning_rate=learn_rate, early_exaggeration=early_exag, n_jobs=40 @@ -437,9 +437,9 @@ def visualize(assembled, labels, namespace, data_names, multicore_tsne = False if not multicore_tsne: - tsne = TSNEApprox( + tsne = TSNE( n_iter=n_iter, perplexity=perplexity, - verbose=verbose, random_state=69, + verbose=verbose, random_state=random_state, learning_rate=learn_rate, early_exaggeration=early_exag ) diff --git a/scanorama/t_sne_approx.py b/scanorama/t_sne_approx.py deleted file mode 100644 index 78e21a7..0000000 --- a/scanorama/t_sne_approx.py +++ /dev/null @@ -1,886 +0,0 @@ -# Modified by Brian Hie to use an approximate nearest -# neighbors search. -# Original source code available at: -# https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/manifold/t_sne.py - -# Author: Alexander Fabisch -- -# Author: Christopher Moody -# Author: Nick Travers -# License: BSD 3 clause (C) 2014 - -# This is the exact and Barnes-Hut t-SNE implementation. There are other -# modifications of the algorithm: -# * Fast Optimization for t-SNE: -# http://cseweb.ucsd.edu/~lvdmaaten/workshops/nips2010/papers/vandermaaten.pdf - -from time import time -import numpy as np -from scipy import linalg -import scipy.sparse as sp -from scipy.spatial.distance import pdist -from scipy.spatial.distance import squareform -from scipy.sparse import csr_matrix -from sklearn.neighbors import NearestNeighbors -from sklearn.base import BaseEstimator -from sklearn.utils import check_array -from sklearn.utils import check_random_state -from sklearn.decomposition import PCA -from sklearn.metrics.pairwise import pairwise_distances -from sklearn.manifold import _utils -from sklearn.manifold import _barnes_hut_tsne -from sklearn.externals.six import string_types -from sklearn.utils import deprecated - -from annoy import AnnoyIndex - -MACHINE_EPSILON = np.finfo(np.double).eps - - -def _joint_probabilities(distances, desired_perplexity, verbose): - """Compute joint probabilities p_ij from distances. - - Parameters - ---------- - distances : array, shape (n_samples * (n_samples-1) / 2,) - Distances of samples are stored as condensed matrices, i.e. - we omit the diagonal and duplicate entries and store everything - in a one-dimensional array. - - desired_perplexity : float - Desired perplexity of the joint probability distributions. - - verbose : int - Verbosity level. - - Returns - ------- - P : array, shape (n_samples * (n_samples-1) / 2,) - Condensed joint probability matrix. - """ - # Compute conditional probabilities such that they approximately match - # the desired perplexity - distances = distances.astype(np.float32, copy=False) - conditional_P = _utils._binary_search_perplexity( - distances, None, desired_perplexity, verbose) - P = conditional_P + conditional_P.T - sum_P = np.maximum(np.sum(P), MACHINE_EPSILON) - P = np.maximum(squareform(P) / sum_P, MACHINE_EPSILON) - return P - - -def _joint_probabilities_nn(distances, neighbors, desired_perplexity, verbose): - """Compute joint probabilities p_ij from distances using just nearest - neighbors. - - This method is approximately equal to _joint_probabilities. The latter - is O(N), but limiting the joint probability to nearest neighbors improves - this substantially to O(uN). - - Parameters - ---------- - distances : array, shape (n_samples, k) - Distances of samples to its k nearest neighbors. - - neighbors : array, shape (n_samples, k) - Indices of the k nearest-neighbors for each samples. - - desired_perplexity : float - Desired perplexity of the joint probability distributions. - - verbose : int - Verbosity level. - - Returns - ------- - P : csr sparse matrix, shape (n_samples, n_samples) - Condensed joint probability matrix with only nearest neighbors. - """ - t0 = time() - # Compute conditional probabilities such that they approximately match - # the desired perplexity - n_samples, k = neighbors.shape - distances = distances.astype(np.float32, copy=False) - neighbors = neighbors.astype(np.int64, copy=False) - conditional_P = _utils._binary_search_perplexity( - distances, neighbors, desired_perplexity, verbose) - assert np.all(np.isfinite(conditional_P)), \ - "All probabilities should be finite" - - # Symmetrize the joint probability distribution using sparse operations - P = csr_matrix((conditional_P.ravel(), neighbors.ravel(), - range(0, n_samples * k + 1, k)), - shape=(n_samples, n_samples)) - P = P + P.T - - # Normalize the joint probability distribution - sum_P = np.maximum(P.sum(), MACHINE_EPSILON) - P /= sum_P - - assert np.all(np.abs(P.data) <= 1.0) - if verbose >= 2: - duration = time() - t0 - print("[t-SNE] Computed conditional probabilities in {:.3f}s" - .format(duration)) - return P - - -def _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components, - skip_num_points=0): - """t-SNE objective function: gradient of the KL divergence - of p_ijs and q_ijs and the absolute error. - - Parameters - ---------- - params : array, shape (n_params,) - Unraveled embedding. - - P : array, shape (n_samples * (n_samples-1) / 2,) - Condensed joint probability matrix. - - degrees_of_freedom : float - Degrees of freedom of the Student's-t distribution. - - n_samples : int - Number of samples. - - n_components : int - Dimension of the embedded space. - - skip_num_points : int (optional, default:0) - This does not compute the gradient for points with indices below - `skip_num_points`. This is useful when computing transforms of new - data where you'd like to keep the old data fixed. - - Returns - ------- - kl_divergence : float - Kullback-Leibler divergence of p_ij and q_ij. - - grad : array, shape (n_params,) - Unraveled gradient of the Kullback-Leibler divergence with respect to - the embedding. - """ - X_embedded = params.reshape(n_samples, n_components) - - # Q is a heavy-tailed distribution: Student's t-distribution - dist = pdist(X_embedded, "sqeuclidean") - dist += 1. - dist /= degrees_of_freedom - dist **= (degrees_of_freedom + 1.0) / -2.0 - Q = np.maximum(dist / (2.0 * np.sum(dist)), MACHINE_EPSILON) - - # Optimization trick below: np.dot(x, y) is faster than - # np.sum(x * y) because it calls BLAS - - # Objective: C (Kullback-Leibler divergence of P and Q) - kl_divergence = 2.0 * np.dot(P, np.log(np.maximum(P, MACHINE_EPSILON) / Q)) - - # Gradient: dC/dY - # pdist always returns double precision distances. Thus we need to take - grad = np.ndarray((n_samples, n_components), dtype=params.dtype) - PQd = squareform((P - Q) * dist) - for i in range(skip_num_points, n_samples): - grad[i] = np.dot(np.ravel(PQd[i], order='K'), - X_embedded[i] - X_embedded) - grad = grad.ravel() - c = 2.0 * (degrees_of_freedom + 1.0) / degrees_of_freedom - grad *= c - - return kl_divergence, grad - - -def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components, - angle=0.5, skip_num_points=0, verbose=False): - """t-SNE objective function: KL divergence of p_ijs and q_ijs. - - Uses Barnes-Hut tree methods to calculate the gradient that - runs in O(NlogN) instead of O(N^2) - - Parameters - ---------- - params : array, shape (n_params,) - Unraveled embedding. - - P : csr sparse matrix, shape (n_samples, n_sample) - Sparse approximate joint probability matrix, computed only for the - k nearest-neighbors and symmetrized. - - degrees_of_freedom : float - Degrees of freedom of the Student's-t distribution. - - n_samples : int - Number of samples. - - n_components : int - Dimension of the embedded space. - - angle : float (default: 0.5) - This is the trade-off between speed and accuracy for Barnes-Hut T-SNE. - 'angle' is the angular size (referred to as theta in [3]) of a distant - node as measured from a point. If this size is below 'angle' then it is - used as a summary node of all points contained within it. - This method is not very sensitive to changes in this parameter - in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing - computation time and angle greater 0.8 has quickly increasing error. - - skip_num_points : int (optional, default:0) - This does not compute the gradient for points with indices below - `skip_num_points`. This is useful when computing transforms of new - data where you'd like to keep the old data fixed. - - verbose : int - Verbosity level. - - Returns - ------- - kl_divergence : float - Kullback-Leibler divergence of p_ij and q_ij. - - grad : array, shape (n_params,) - Unraveled gradient of the Kullback-Leibler divergence with respect to - the embedding. - """ - params = params.astype(np.float32, copy=False) - X_embedded = params.reshape(n_samples, n_components) - - val_P = P.data.astype(np.float32, copy=False) - neighbors = P.indices.astype(np.int64, copy=False) - indptr = P.indptr.astype(np.int64, copy=False) - - grad = np.zeros(X_embedded.shape, dtype=np.float32) - error = _barnes_hut_tsne.gradient(val_P, X_embedded, neighbors, indptr, - grad, angle, n_components, verbose, - dof=degrees_of_freedom) - c = 2.0 * (degrees_of_freedom + 1.0) / degrees_of_freedom - grad = grad.ravel() - grad *= c - - return error, grad - - -def _gradient_descent(objective, p0, it, n_iter, - n_iter_check=1, n_iter_without_progress=300, - momentum=0.8, learning_rate=200.0, min_gain=0.01, - min_grad_norm=1e-7, verbose=0, args=None, kwargs=None): - """Batch gradient descent with momentum and individual gains. - - Parameters - ---------- - objective : function or callable - Should return a tuple of cost and gradient for a given parameter - vector. When expensive to compute, the cost can optionally - be None and can be computed every n_iter_check steps using - the objective_error function. - - p0 : array-like, shape (n_params,) - Initial parameter vector. - - it : int - Current number of iterations (this function will be called more than - once during the optimization). - - n_iter : int - Maximum number of gradient descent iterations. - - n_iter_check : int - Number of iterations before evaluating the global error. If the error - is sufficiently low, we abort the optimization. - - n_iter_without_progress : int, optional (default: 300) - Maximum number of iterations without progress before we abort the - optimization. - - momentum : float, within (0.0, 1.0), optional (default: 0.8) - The momentum generates a weight for previous gradients that decays - exponentially. - - learning_rate : float, optional (default: 200.0) - The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If - the learning rate is too high, the data may look like a 'ball' with any - point approximately equidistant from its nearest neighbours. If the - learning rate is too low, most points may look compressed in a dense - cloud with few outliers. - - min_gain : float, optional (default: 0.01) - Minimum individual gain for each parameter. - - min_grad_norm : float, optional (default: 1e-7) - If the gradient norm is below this threshold, the optimization will - be aborted. - - verbose : int, optional (default: 0) - Verbosity level. - - args : sequence - Arguments to pass to objective function. - - kwargs : dict - Keyword arguments to pass to objective function. - - Returns - ------- - p : array, shape (n_params,) - Optimum parameters. - - error : float - Optimum. - - i : int - Last iteration. - """ - if args is None: - args = [] - if kwargs is None: - kwargs = {} - - p = p0.copy().ravel() - update = np.zeros_like(p) - gains = np.ones_like(p) - error = np.finfo(np.float).max - best_error = np.finfo(np.float).max - best_iter = i = it - - tic = time() - for i in range(it, n_iter): - error, grad = objective(p, *args, **kwargs) - grad_norm = linalg.norm(grad) - - inc = update * grad < 0.0 - dec = np.invert(inc) - gains[inc] += 0.2 - gains[dec] *= 0.8 - np.clip(gains, min_gain, np.inf, out=gains) - grad *= gains - update = momentum * update - learning_rate * grad - p += update - - if (i + 1) % n_iter_check == 0: - toc = time() - duration = toc - tic - tic = toc - - if verbose >= 2: - print("[t-SNE] Iteration %d: error = %.7f," - " gradient norm = %.7f" - " (%s iterations in %0.3fs)" - % (i + 1, error, grad_norm, n_iter_check, duration)) - - if error < best_error: - best_error = error - best_iter = i - elif i - best_iter > n_iter_without_progress: - if verbose >= 2: - print("[t-SNE] Iteration %d: did not make any progress " - "during the last %d episodes. Finished." - % (i + 1, n_iter_without_progress)) - break - if grad_norm <= min_grad_norm: - if verbose >= 2: - print("[t-SNE] Iteration %d: gradient norm %f. Finished." - % (i + 1, grad_norm)) - break - - return p, error, i - - -def trustworthiness(X, X_embedded, n_neighbors=5, precomputed=False): - """Expresses to what extent the local structure is retained. - - The trustworthiness is within [0, 1]. It is defined as - - .. math:: - - T(k) = 1 - \frac{2}{nk (2n - 3k - 1)} \sum^n_{i=1} - \sum_{j \in U^{(k)}_i} (r(i, j) - k) - - where :math:`r(i, j)` is the rank of the embedded datapoint j - according to the pairwise distances between the embedded datapoints, - :math:`U^{(k)}_i` is the set of points that are in the k nearest - neighbors in the embedded space but not in the original space. - - * "Neighborhood Preservation in Nonlinear Projection Methods: An - Experimental Study" - J. Venna, S. Kaski - * "Learning a Parametric Embedding by Preserving Local Structure" - L.J.P. van der Maaten - - Parameters - ---------- - X : array, shape (n_samples, n_features) or (n_samples, n_samples) - If the metric is 'precomputed' X must be a square distance - matrix. Otherwise it contains a sample per row. - - X_embedded : array, shape (n_samples, n_components) - Embedding of the training data in low-dimensional space. - - n_neighbors : int, optional (default: 5) - Number of neighbors k that will be considered. - - precomputed : bool, optional (default: False) - Set this flag if X is a precomputed square distance matrix. - - Returns - ------- - trustworthiness : float - Trustworthiness of the low-dimensional embedding. - """ - if precomputed: - dist_X = X - else: - dist_X = pairwise_distances(X, squared=True) - dist_X_embedded = pairwise_distances(X_embedded, squared=True) - ind_X = np.argsort(dist_X, axis=1) - ind_X_embedded = np.argsort(dist_X_embedded, axis=1)[:, 1:n_neighbors + 1] - - n_samples = X.shape[0] - t = 0.0 - ranks = np.zeros(n_neighbors) - for i in range(n_samples): - for j in range(n_neighbors): - ranks[j] = np.where(ind_X[i] == ind_X_embedded[i, j])[0][0] - ranks -= n_neighbors - t += np.sum(ranks[ranks > 0]) - t = 1.0 - t * (2.0 / (n_samples * n_neighbors * - (2.0 * n_samples - 3.0 * n_neighbors - 1.0))) - return t - - -class TSNEApprox(BaseEstimator): - """t-distributed Stochastic Neighbor Embedding. - - t-SNE [1] is a tool to visualize high-dimensional data. It converts - similarities between data points to joint probabilities and tries - to minimize the Kullback-Leibler divergence between the joint - probabilities of the low-dimensional embedding and the - high-dimensional data. t-SNE has a cost function that is not convex, - i.e. with different initializations we can get different results. - - It is highly recommended to use another dimensionality reduction - method (e.g. PCA for dense data or TruncatedSVD for sparse data) - to reduce the number of dimensions to a reasonable amount (e.g. 50) - if the number of features is very high. This will suppress some - noise and speed up the computation of pairwise distances between - samples. For more tips see Laurens van der Maaten's FAQ [2]. - - Read more in the :ref:`User Guide `. - - Parameters - ---------- - n_components : int, optional (default: 2) - Dimension of the embedded space. - - perplexity : float, optional (default: 30) - The perplexity is related to the number of nearest neighbors that - is used in other manifold learning algorithms. Larger datasets - usually require a larger perplexity. Consider selecting a value - between 5 and 50. The choice is not extremely critical since t-SNE - is quite insensitive to this parameter. - - early_exaggeration : float, optional (default: 12.0) - Controls how tight natural clusters in the original space are in - the embedded space and how much space will be between them. For - larger values, the space between natural clusters will be larger - in the embedded space. Again, the choice of this parameter is not - very critical. If the cost function increases during initial - optimization, the early exaggeration factor or the learning rate - might be too high. - - learning_rate : float, optional (default: 200.0) - The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If - the learning rate is too high, the data may look like a 'ball' with any - point approximately equidistant from its nearest neighbours. If the - learning rate is too low, most points may look compressed in a dense - cloud with few outliers. If the cost function gets stuck in a bad local - minimum increasing the learning rate may help. - - n_iter : int, optional (default: 1000) - Maximum number of iterations for the optimization. Should be at - least 250. - - n_iter_without_progress : int, optional (default: 300) - Maximum number of iterations without progress before we abort the - optimization, used after 250 initial iterations with early - exaggeration. Note that progress is only checked every 50 iterations so - this value is rounded to the next multiple of 50. - - .. versionadded:: 0.17 - parameter *n_iter_without_progress* to control stopping criteria. - - min_grad_norm : float, optional (default: 1e-7) - If the gradient norm is below this threshold, the optimization will - be stopped. - - metric : string or callable, optional - The metric to use when calculating distance between instances in a - feature array. If metric is a string, it must be one of the options - allowed by scipy.spatial.distance.pdist for its metric parameter, or - a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS. - If metric is "precomputed", X is assumed to be a distance matrix. - Alternatively, if metric is a callable function, it is called on each - pair of instances (rows) and the resulting value recorded. The callable - should take two arrays from X as input and return a value indicating - the distance between them. The default is "euclidean" which is - interpreted as squared euclidean distance. - - init : string or numpy array, optional (default: "random") - Initialization of embedding. Possible options are 'random', 'pca', - and a numpy array of shape (n_samples, n_components). - PCA initialization cannot be used with precomputed distances and is - usually more globally stable than random initialization. - - verbose : int, optional (default: 0) - Verbosity level. - - random_state : int, RandomState instance or None, optional (default: None) - If int, random_state is the seed used by the random number generator; - If RandomState instance, random_state is the random number generator; - If None, the random number generator is the RandomState instance used - by `np.random`. Note that different initializations might result in - different local minima of the cost function. - - method : string (default: 'barnes_hut') - By default the gradient calculation algorithm uses Barnes-Hut - approximation running in O(NlogN) time. method='exact' - will run on the slower, but exact, algorithm in O(N^2) time. The - exact algorithm should be used when nearest-neighbor errors need - to be better than 3%. However, the exact method cannot scale to - millions of examples. - - .. versionadded:: 0.17 - Approximate optimization *method* via the Barnes-Hut. - - angle : float (default: 0.5) - Only used if method='barnes_hut' - This is the trade-off between speed and accuracy for Barnes-Hut T-SNE. - 'angle' is the angular size (referred to as theta in [3]) of a distant - node as measured from a point. If this size is below 'angle' then it is - used as a summary node of all points contained within it. - This method is not very sensitive to changes in this parameter - in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing - computation time and angle greater 0.8 has quickly increasing error. - - Attributes - ---------- - embedding_ : array-like, shape (n_samples, n_components) - Stores the embedding vectors. - - kl_divergence_ : float - Kullback-Leibler divergence after optimization. - - n_iter_ : int - Number of iterations run. - - Examples - -------- - - >>> import numpy as np - >>> from sklearn.manifold import TSNE - >>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) - >>> X_embedded = TSNE(n_components=2).fit_transform(X) - >>> X_embedded.shape - (4, 2) - - References - ---------- - - [1] van der Maaten, L.J.P.; Hinton, G.E. Visualizing High-Dimensional Data - Using t-SNE. Journal of Machine Learning Research 9:2579-2605, 2008. - - [2] van der Maaten, L.J.P. t-Distributed Stochastic Neighbor Embedding - http://homepage.tudelft.nl/19j49/t-SNE.html - - [3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms. - Journal of Machine Learning Research 15(Oct):3221-3245, 2014. - http://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf - """ - # Control the number of exploration iterations with early_exaggeration on - _EXPLORATION_N_ITER = 250 - - # Control the number of iterations between progress checks - _N_ITER_CHECK = 50 - - def __init__(self, n_components=2, perplexity=30.0, - early_exaggeration=12.0, learning_rate=200.0, n_iter=1000, - n_iter_without_progress=300, min_grad_norm=1e-7, - metric="euclidean", init="random", verbose=0, - random_state=None, method='barnes_hut', angle=0.5): - self.n_components = n_components - self.perplexity = perplexity - self.early_exaggeration = early_exaggeration - self.learning_rate = learning_rate - self.n_iter = n_iter - self.n_iter_without_progress = n_iter_without_progress - self.min_grad_norm = min_grad_norm - self.metric = metric - self.init = init - self.verbose = verbose - self.random_state = random_state - self.method = method - self.angle = angle - - def _fit(self, X, skip_num_points=0): - """Fit the model using X as training data. - - Note that sparse arrays can only be handled by method='exact'. - It is recommended that you convert your sparse array to dense - (e.g. `X.toarray()`) if it fits in memory, or otherwise using a - dimensionality reduction technique (e.g. TruncatedSVD). - - Parameters - ---------- - X : array, shape (n_samples, n_features) or (n_samples, n_samples) - If the metric is 'precomputed' X must be a square distance - matrix. Otherwise it contains a sample per row. Note that this - when method='barnes_hut', X cannot be a sparse array and if need be - will be converted to a 32 bit float array. Method='exact' allows - sparse arrays and 64bit floating point inputs. - - skip_num_points : int (optional, default:0) - This does not compute the gradient for points with indices below - `skip_num_points`. This is useful when computing transforms of new - data where you'd like to keep the old data fixed. - """ - if self.method not in ['barnes_hut', 'exact']: - raise ValueError("'method' must be 'barnes_hut' or 'exact'") - if self.angle < 0.0 or self.angle > 1.0: - raise ValueError("'angle' must be between 0.0 - 1.0") - if self.metric == "precomputed": - if isinstance(self.init, string_types) and self.init == 'pca': - raise ValueError("The parameter init=\"pca\" cannot be " - "used with metric=\"precomputed\".") - if X.shape[0] != X.shape[1]: - raise ValueError("X should be a square distance matrix") - if np.any(X < 0): - raise ValueError("All distances should be positive, the " - "precomputed distances given as X is not " - "correct") - if self.method == 'barnes_hut' and sp.issparse(X): - raise TypeError('A sparse matrix was passed, but dense ' - 'data is required for method="barnes_hut". Use ' - 'X.toarray() to convert to a dense numpy array if ' - 'the array is small enough for it to fit in ' - 'memory. Otherwise consider dimensionality ' - 'reduction techniques (e.g. TruncatedSVD)') - else: - X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], - dtype=[np.float32, np.float64]) - if self.method == 'barnes_hut' and self.n_components > 3: - raise ValueError("'n_components' should be inferior to 4 for the " - "barnes_hut algorithm as it relies on " - "quad-tree or oct-tree.") - random_state = check_random_state(self.random_state) - - if self.early_exaggeration < 1.0: - raise ValueError("early_exaggeration must be at least 1, but is {}" - .format(self.early_exaggeration)) - - if self.n_iter < 250: - raise ValueError("n_iter should be at least 250") - - n_samples = X.shape[0] - - neighbors_nn = None - if self.method == "exact": - # Retrieve the distance matrix, either using the precomputed one or - # computing it. - if self.metric == "precomputed": - distances = X - else: - if self.verbose: - print("[t-SNE] Computing pairwise distances...") - - if self.metric == "euclidean": - distances = pairwise_distances(X, metric=self.metric, - squared=True) - else: - distances = pairwise_distances(X, metric=self.metric) - - if np.any(distances < 0): - raise ValueError("All distances should be positive, the " - "metric given is not correct") - - # compute the joint probability distribution for the input space - P = _joint_probabilities(distances, self.perplexity, self.verbose) - assert np.all(np.isfinite(P)), "All probabilities should be finite" - assert np.all(P >= 0), "All probabilities should be non-negative" - assert np.all(P <= 1), ("All probabilities should be less " - "or then equal to one") - - else: - # Cpmpute the number of nearest neighbors to find. - # LvdM uses 3 * perplexity as the number of neighbors. - # In the event that we have very small # of points - # set the neighbors to n - 1. - k = min(n_samples - 1, int(3. * self.perplexity + 1)) - - if self.verbose: - print("[t-SNE] Computing {} nearest neighbors...".format(k)) - - # Find the nearest neighbors for every point - neighbors_method = 'ball_tree' - if (self.metric == 'precomputed'): - neighbors_method = 'brute' - knn = AnnoyIndex(X.shape[1], metric='euclidean') - t0 = time() - for i in range(n_samples): - knn.add_item(i, X[i, :]) - knn.build(50) - duration = time() - t0 - if self.verbose: - print("[t-SNE] Indexed {} samples in {:.3f}s...".format( - n_samples, duration)) - - t0 = time() - neighbors_nn = np.zeros((n_samples, k), dtype=int) - distances_nn = np.zeros((n_samples, k)) - for i in range(n_samples): - (neighbors_nn[i, :], distances_nn[i, :]) = knn.get_nns_by_vector( - X[i, :], k, include_distances=True - ) - duration = time() - t0 - if self.verbose: - print("[t-SNE] Computed neighbors for {} samples in {:.3f}s..." - .format(n_samples, duration)) - - # Free the memory used by the ball_tree - del knn - - if self.metric == "euclidean": - # knn return the euclidean distance but we need it squared - # to be consistent with the 'exact' method. Note that the - # the method was derived using the euclidean method as in the - # input space. Not sure of the implication of using a different - # metric. - distances_nn **= 2 - - # compute the joint probability distribution for the input space - P = _joint_probabilities_nn(distances_nn, neighbors_nn, - self.perplexity, self.verbose) - - if isinstance(self.init, np.ndarray): - X_embedded = self.init - elif self.init == 'pca': - pca = PCA(n_components=self.n_components, svd_solver='randomized', - random_state=random_state) - X_embedded = pca.fit_transform(X).astype(np.float32, copy=False) - elif self.init == 'random': - # The embedding is initialized with iid samples from Gaussians with - # standard deviation 1e-4. - X_embedded = 1e-4 * random_state.randn( - n_samples, self.n_components).astype(np.float32) - else: - raise ValueError("'init' must be 'pca', 'random', or " - "a numpy array") - - # Degrees of freedom of the Student's t-distribution. The suggestion - # degrees_of_freedom = n_components - 1 comes from - # "Learning a Parametric Embedding by Preserving Local Structure" - # Laurens van der Maaten, 2009. - degrees_of_freedom = max(self.n_components - 1.0, 1) - - return self._tsne(P, degrees_of_freedom, n_samples, random_state, - X_embedded=X_embedded, - neighbors=neighbors_nn, - skip_num_points=skip_num_points) - - @property - @deprecated("Attribute n_iter_final was deprecated in version 0.19 and " - "will be removed in 0.21. Use ``n_iter_`` instead") - def n_iter_final(self): - return self.n_iter_ - - def _tsne(self, P, degrees_of_freedom, n_samples, random_state, X_embedded, - neighbors=None, skip_num_points=0): - """Runs t-SNE.""" - # t-SNE minimizes the Kullback-Leiber divergence of the Gaussians P - # and the Student's t-distributions Q. The optimization algorithm that - # we use is batch gradient descent with two stages: - # * initial optimization with early exaggeration and momentum at 0.5 - # * final optimization with momentum at 0.8 - params = X_embedded.ravel() - - opt_args = { - "it": 0, - "n_iter_check": self._N_ITER_CHECK, - "min_grad_norm": self.min_grad_norm, - "learning_rate": self.learning_rate, - "verbose": self.verbose, - "kwargs": dict(skip_num_points=skip_num_points), - "args": [P, degrees_of_freedom, n_samples, self.n_components], - "n_iter_without_progress": self._EXPLORATION_N_ITER, - "n_iter": self._EXPLORATION_N_ITER, - "momentum": 0.5, - } - if self.method == 'barnes_hut': - obj_func = _kl_divergence_bh - opt_args['kwargs']['angle'] = self.angle - # Repeat verbose argument for _kl_divergence_bh - opt_args['kwargs']['verbose'] = self.verbose - else: - obj_func = _kl_divergence - - # Learning schedule (part 1): do 250 iteration with lower momentum but - # higher learning rate controlled via the early exageration parameter - P *= self.early_exaggeration - params, kl_divergence, it = _gradient_descent(obj_func, params, - **opt_args) - if self.verbose: - print("[t-SNE] KL divergence after %d iterations with early " - "exaggeration: %f" % (it + 1, kl_divergence)) - - # Learning schedule (part 2): disable early exaggeration and finish - # optimization with a higher momentum at 0.8 - P /= self.early_exaggeration - remaining = self.n_iter - self._EXPLORATION_N_ITER - if it < self._EXPLORATION_N_ITER or remaining > 0: - opt_args['n_iter'] = self.n_iter - opt_args['it'] = it + 1 - opt_args['momentum'] = 0.8 - opt_args['n_iter_without_progress'] = self.n_iter_without_progress - params, kl_divergence, it = _gradient_descent(obj_func, params, - **opt_args) - - # Save the final number of iterations - self.n_iter_ = it - - if self.verbose: - print("[t-SNE] Error after %d iterations: %f" - % (it + 1, kl_divergence)) - - X_embedded = params.reshape(n_samples, self.n_components) - self.kl_divergence_ = kl_divergence - - return X_embedded - - def fit_transform(self, X, y=None): - """Fit X into an embedded space and return that transformed - output. - - Parameters - ---------- - X : array, shape (n_samples, n_features) or (n_samples, n_samples) - If the metric is 'precomputed' X must be a square distance - matrix. Otherwise it contains a sample per row. - - Returns - ------- - X_new : array, shape (n_samples, n_components) - Embedding of the training data in low-dimensional space. - """ - embedding = self._fit(X) - self.embedding_ = embedding - return self.embedding_ - - def fit(self, X, y=None): - """Fit X into an embedded space. - - Parameters - ---------- - X : array, shape (n_samples, n_features) or (n_samples, n_samples) - If the metric is 'precomputed' X must be a square distance - matrix. Otherwise it contains a sample per row. If the method - is 'exact', X may be a sparse matrix of type 'csr', 'csc' - or 'coo'. - """ - self.fit_transform(X) - return self diff --git a/setup.py b/setup.py index 9b6158d..cf5d9d2 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,10 @@ setup( name='scanorama', - version='1.5.2', + version='1.6', description='Panoramic stitching of heterogeneous single cell transcriptomic data', url='https://github.com/brianhie/scanorama', - download_url='https://github.com/brianhie/scanorama/archive/v1.5.2.tar.gz', + download_url='https://github.com/brianhie/scanorama/archive/v1.6.tar.gz', packages=find_packages(exclude=['bin', 'conf', 'data', 'target']), install_requires=[ 'annoy>=1.11.5',