diff --git a/polyfuzz/models/_utils.py b/polyfuzz/models/_utils.py index c155287..686dcf8 100644 --- a/polyfuzz/models/_utils.py +++ b/polyfuzz/models/_utils.py @@ -1,5 +1,3 @@ -import importlib.util - import numpy as np import pandas as pd from scipy.sparse import csr_matrix @@ -7,10 +5,8 @@ from sklearn.neighbors import NearestNeighbors from sklearn.metrics.pairwise import cosine_similarity as scikit_cosine_similarity -_HAVE_SPARSE_DOT = importlib.util.find_spec("sparse_dot_topn") is not None -if _HAVE_SPARSE_DOT: - from sparse_dot_topn import sp_matmul_topn +from polyfuzz.models._utils_sdtn import _HAVE_SPARSE_DOT, sp_matmul_topn def cosine_similarity(from_vector: np.ndarray, to_vector: np.ndarray, diff --git a/polyfuzz/models/_utils_sdtn.py b/polyfuzz/models/_utils_sdtn.py new file mode 100644 index 0000000..32a2f9b --- /dev/null +++ b/polyfuzz/models/_utils_sdtn.py @@ -0,0 +1,33 @@ +import sys +import importlib.util +from scipy.sparse import csr_matrix + +from typing import Optional + +_HAVE_SPARSE_DOT = importlib.util.find_spec("sparse_dot_topn") is not None +if _HAVE_SPARSE_DOT: + if sys.version_info >= (3, 8): + from sparse_dot_topn import sp_matmul_topn + else: + from sparse_dot_topn import awesome_cossim_topn + + def sp_matmul_topn( + A: csr_matrix, + B: csr_matrix, + top_n: int, + threshold: float, + sort: bool = True, + n_threads: Optional[int] = None, + ): + n_threads = n_threads or 1 + use_threads = n_threads > 1 + return awesome_cossim_topn( + A, + B.T, + ntop=max(top_n, 2), + lower_bound=threshold, + use_threads=use_threads, + n_jobs=n_threads, + ) + + __all__ = ["sp_matmul_topn"] diff --git a/setup.py b/setup.py index aa021b9..55dd828 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,10 @@ "sentence-transformers>=0.4.1" ] -fast_cosine = ["sparse_dot_topn>=1.1.1"] +fast_cosine = [ + "sparse_dot_topn<1.0; python_version < '3.8'", + "sparse_dot_topn>=1.1.1; python_version >= '3.8'", +] embeddings_packages = [ "torch>=1.4.0",