Prepare for first release

scverse · Jun 10, 2022 · 0b51861 · 0b51861
1 parent cef4934
commit 0b51861
Show file tree

Hide file tree

Showing 9 changed files with 290 additions and 266 deletions.
diff --git a/.gitignore b/.gitignore
@@ -20,7 +20,6 @@ parts/
 sdist/
 var/
 wheels/
-pip-wheel-metadata/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
@@ -50,6 +49,7 @@ coverage.xml
 *.py,cover
 .hypothesis/
 .pytest_cache/
+cover/
 
 # Translations
 *.mo
@@ -72,6 +72,7 @@ instance/
 docs/_build/
 
 # PyBuilder
+.pybuilder/
 target/
 
 # Jupyter Notebook
@@ -82,7 +83,9 @@ profile_default/
 ipython_config.py
 
 # pyenv
-.python-version
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
 
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
@@ -91,7 +94,22 @@ ipython_config.py
 #   install all needed dependencies.
 #Pipfile.lock
 
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 
 # Celery stuff
@@ -127,3 +145,14 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# MacOS
+.DS_Store
+._.*
+._*
diff --git a/README.md b/README.md
@@ -6,14 +6,14 @@ The functions are analogous versions of functions that can be found within [scan
 
 ## Requirements
 
-To run the code in this repository you need a conda environment with rapids and scanpy installed. To use the full functionality of this repo please use `rapids-22.02`. You also need an Nvidia GPU.
+To run the code in this repository you need a conda environment with rapids and scanpy installed. To use the full functionality of this repo please use `rapids-22.04`. You also need an Nvidia GPU.
 ```
-conda create -n rapids-22.02_sc -c rapidsai -c nvidia -c conda-forge -c bioconda \
-    rapids=22.02 python=3.9 cudatoolkit=11.5 cudnn cutensor cusparselt \
-    leidenalg louvain fa2
-conda activate rapids-22.02_sc
-pip install scanpy gdown
-ipython kernel install --user --name=rapids-22.02_sc
+conda create -n rapids_singelcell -f conda/rapids_singecell.yml
+ipython kernel install --user --name=rapids_singelcell
+```
+After you set up the enviroment you can install this package from this wheel into the enviroment. The wheel doesn't install any dependencies
+```
+pip install https://github.com/Intron7/rapids_singlecell/releases/download/v0.1.0/rapids_singlecell-0.1.0-py3-none-any.whl
 ```
 
 With this enviroment, you should be able to run the notebooks. So far I have only tested these Notebooks on a Quadro RTX 6000 and an RTX 3090.
@@ -47,6 +47,7 @@ Please have look at the notebooks to assess the functionality. I tried to write
 * TSNE
 * Kmeans Clustering 
 * Diffusion Maps
+* Force Atlas 2 (draw_grah) 
 * rank_genes_groups with logistic regression
 * some plotting functions for cunnData objects
 
@@ -74,5 +75,6 @@ Here are some benchmarks. I ran the notebook on the CPU with as many cores as we
 |Logistic_Regression           | 66 s                      | 3.7 s        | 94 s     | 8 s          |
 |Diffusion Map                 | 612 ms                    | 358 ms       | 1 s      | 1.9 s        |
 
+
 It seems like Turing based GPUs are a lot slower running the eigenvector calculations on sparse matrixes needed for Diffusion Maps than Ampere based ones.
 I also observed that the first GPU run in a new enviroment is slower than the runs after that (with a restarted kernel) (RTX 6000). 
diff --git a/conda/rapids_singecell.yml b/conda/rapids_singecell.yml
@@ -0,0 +1,19 @@
+channels:
+ - rapidsai
+ - nvidia
+ - conda-forge
+ - bioconda
+dependencies:
+ - rapids=22.04
+ - python=3.9
+ - cudatoolkit=11.5
+ - cudnn
+ - cutensor
+ - cusparselt
+ - leidenalg
+ - louvain
+ - fa2
+ - pip
+ - pip:
+    - scanpy
+    - gdown
diff --git a/notebooks/demo_cpu.ipynb b/notebooks/demo_cpu.ipynb
diff --git a/notebooks/demo_gpu.ipynb b/notebooks/demo_gpu.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,22 @@
+[build-system]
+build-backend = "flit_core.buildapi"
+requires = [
+    "flit_core >=3.4,<4",
+    "setuptools_scm",
+    "importlib_metadata>=0.7; python_version < '3.10'",
+]
+
+[project]
+name = "rapids_singlecell"
+description = "running single cell analysis on Nvidia GPUs"
+requires-python = ">=3.8"
+license = {file = "LICENSE"}
+authors = [{name = "Severin Dicks"}]
+readme = {file = "README.md", content-type="text/markdown"}
+
+version = "0.1.0"
+
+
+[project.urls]
+Source = "https://github.com/Intron7/rapids_singlecell"
+
diff --git a/rapids_singlecell/__init__.py b/rapids_singlecell/__init__.py
@@ -0,0 +1,2 @@
+from . import cunnData
+from . import scanpy_gpu_funcs
diff --git a/code/cunnData.py → rapids_singlecell/cunnData.py b/code/cunnData.py → rapids_singlecell/cunnData.py
@@ -5,8 +5,6 @@
 
 import cupy as cp
 import cupyx as cpx
-import cudf
-import cugraph
 import anndata
 
 import numpy as np
@@ -22,7 +20,7 @@
 from cupyx.scipy.sparse import issparse as issparse_gpu
 
 from cuml.linear_model import LinearRegression
-from cuml.preprocessing import StandardScaler
+
 
 
 class cunnData:
@@ -645,31 +643,8 @@ def regress_out(self, keys, verbose=False):
             outputs[:, i] = _regress_out_chunk(X, y)
         self.X = outputs
 
-    def scale(self, max_value=10):
-        """
-        Scales matrix to unit variance and clips values
-
-        Parameters
-        ----------
-
-        normalized : cupy.ndarray or numpy.ndarray of shape (n_cells, n_genes)
-                     Matrix to scale
-        max_value : int
-                    After scaling matrix to unit variance,
-                    values will be clipped to this number
-                    of std deviations.
-
-        Return
-        ------
-        updates cunndata object with a scaled cunndata.X
-        """
-        if type(self.X) is not cp._core.core.ndarray:
-            print("densifying _.X")
-            self.X = self.X.toarray()
-        X = StandardScaler().fit_transform(self.X)
-        self.X = cp.clip(X,a_max=max_value)
 
-    def scale_2(self, max_value=10):
+    def scale(self, max_value=10):
         """
         Scales matrix to unit variance and clips values
         Parameters

diff --git a/code/scanpy_gpu_funcs.py → rapids_singlecell/scanpy_gpu_funcs.py b/code/scanpy_gpu_funcs.py → rapids_singlecell/scanpy_gpu_funcs.py
@@ -6,14 +6,10 @@
 import cupy as cp
 import cudf
 import cugraph
-import anndata
 import os
 
 import numpy as np
 import pandas as pd
-import scipy
-import math
-from scipy import sparse
 import seaborn as sns
 import matplotlib.pyplot as plt
 from natsort import natsorted
@@ -22,7 +18,7 @@
 from cuml.manifold import TSNE
 from cuml.cluster import KMeans
 from cuml.decomposition import PCA
-from cuml.linear_model import LinearRegression
+
 
 
 def select_groups(labels, groups_order_subset='all'):