diff --git a/docker/Dockerfile b/docker/Dockerfile index 72b9654b..304ce665 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -57,8 +57,7 @@ RUN apt-get -qq update \ # Required for R gfortran \ # Required for rpy2 - r-base-dev \ - r-base \ + r-cran-rjava \ # Required for R-package devtools (which is required for SJD) libharfbuzz-dev \ libfribidi-dev \ diff --git a/docker/install_bioc.R b/docker/install_bioc.R index e7a25f04..a671613c 100755 --- a/docker/install_bioc.R +++ b/docker/install_bioc.R @@ -1,5 +1,6 @@ #!/usr/bin/env Rscript --vanilla install.packages(c("BiocManager", "devtools"), dependencies=TRUE, repos="http://lib.stat.cmu.edu/R/CRAN/") +BiocManager::install(version = "3.19") # required for R 4.4.0 BiocManager::install(c("genesofeve/projectR", "biomaRt"), ask=FALSE) library(devtools); install_github("CHuanSite/SJD") \ No newline at end of file diff --git a/docker/install_bioc.sh b/docker/install_bioc.sh index 000b4a9d..06df8dec 100755 --- a/docker/install_bioc.sh +++ b/docker/install_bioc.sh @@ -5,7 +5,7 @@ Rver="${Rmaj}.4.0" current_dir=$(pwd) -curl -s -L http://lib.stat.cmu.edu/R/CRAN/src/base/${Rmaj}/${Rver}.tar.gz | tar xzv -C /opt +curl -s -L http://lib.stat.cmu.edu/R/CRAN/src/base/${Rmaj}/${Rver}.tar.gz | tar xzv -C /opt || exit 1 cd /opt/${Rver} /opt/${Rver}/configure --with-readline=no --enable-R-shlib --enable-BLAS-shlib --with-x=no || exit 1 make || exit 1 diff --git a/docker/requirements.txt b/docker/requirements.txt index 7f6d6700..6389c8ce 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -20,7 +20,7 @@ more_itertools==9.0.0 mysql-connector-python==8.0.20 numba==0.58.1 numexpr==2.8.4 -numpy==1.26.0 +numpy==1.26.4 opencv-python==4.5.5.64 openpyxl==3.1.5 pandas==2.2.1 @@ -29,7 +29,7 @@ pika==1.3.1 plotly==5.6.0 python-dotenv==0.20.0 requests==2.31.0 -rpy2==3.5.1 # 3.5.2 and up gives errors with rpy2py and py2rpy +rpy2==3.5.16 sanic scanpy==1.10.1 scikit-learn==1.0.2 diff --git a/docs/setup.python.md b/docs/setup.python.md index 16c8a262..24f99a68 100644 --- a/docs/setup.python.md +++ b/docs/setup.python.md @@ -60,7 +60,7 @@ Check the requirement.txt file in /docker for the latest packages mysql-connector-python==8.0.20 \ numba==0.58.1 \ numexpr==2.8.4 \ - numpy==1.26.0 \ + numpy==1.26.4 \ opencv-python==4.5.5.64 \ openpyxl==3.1.5 \ pandas==2.2.1 \ diff --git a/services/projectr/Dockerfile b/services/projectr/Dockerfile index ed642cae..92adabcc 100644 --- a/services/projectr/Dockerfile +++ b/services/projectr/Dockerfile @@ -11,8 +11,7 @@ RUN apt-get -qq update \ # Required for R gfortran \ # Required for rpy2 - r-base-dev \ - r-base \ + r-cran-rjava \ # Required for R-package devtools (which is required for SJD) libharfbuzz-dev \ libfribidi-dev \ diff --git a/services/projectr/install_bioc.R b/services/projectr/install_bioc.R index e7a25f04..a671613c 100755 --- a/services/projectr/install_bioc.R +++ b/services/projectr/install_bioc.R @@ -1,5 +1,6 @@ #!/usr/bin/env Rscript --vanilla install.packages(c("BiocManager", "devtools"), dependencies=TRUE, repos="http://lib.stat.cmu.edu/R/CRAN/") +BiocManager::install(version = "3.19") # required for R 4.4.0 BiocManager::install(c("genesofeve/projectR", "biomaRt"), ask=FALSE) library(devtools); install_github("CHuanSite/SJD") \ No newline at end of file diff --git a/services/projectr/install_bioc.sh b/services/projectr/install_bioc.sh index 000b4a9d..06df8dec 100755 --- a/services/projectr/install_bioc.sh +++ b/services/projectr/install_bioc.sh @@ -5,7 +5,7 @@ Rver="${Rmaj}.4.0" current_dir=$(pwd) -curl -s -L http://lib.stat.cmu.edu/R/CRAN/src/base/${Rmaj}/${Rver}.tar.gz | tar xzv -C /opt +curl -s -L http://lib.stat.cmu.edu/R/CRAN/src/base/${Rmaj}/${Rver}.tar.gz | tar xzv -C /opt || exit 1 cd /opt/${Rver} /opt/${Rver}/configure --with-readline=no --enable-R-shlib --enable-BLAS-shlib --with-x=no || exit 1 make || exit 1 diff --git a/services/projectr/requirements.txt b/services/projectr/requirements.txt index d1315aa9..8605072d 100644 --- a/services/projectr/requirements.txt +++ b/services/projectr/requirements.txt @@ -1,8 +1,6 @@ Flask==3.0.0 gunicorn==20.1.0 -rpy2==3.5.1 # 3.5.2 and up gives errors with rpy2py and py2rpy -#rpy2==3.5.16 -#pandas==2.2.1 +rpy2==3.5.16 +pandas==2.2.1 numpy==1.26.4 # https://stackoverflow.com/a/78641304 -pandas==1.4.1 google-cloud-logging \ No newline at end of file diff --git a/services/projectr/rfuncs.py b/services/projectr/rfuncs.py index 6346c922..c7e15251 100644 --- a/services/projectr/rfuncs.py +++ b/services/projectr/rfuncs.py @@ -16,6 +16,10 @@ from rpy2.robjects.conversion import localconverter from rpy2.robjects.vectors import StrVector +# If running locally, need to ensure that multiple concurrent R calls do not conflict +from rpy2.rinterface_lib import openrlib + + class RError(Exception): """Error based on issues that would manifest in any particular R-language call.""" def __init__(self, message="") -> None: @@ -45,51 +49,54 @@ def run_projectR_cmd(target_df, loading_df, algorithm): Return Pandas dataframe of the projectR output """ - # Convert from pandas dataframe to R data.frame - with localconverter(ro.default_converter + pandas2ri.converter): - target_r_df = ro.conversion.py2rpy(target_df) - loading_r_df = ro.conversion.py2rpy(loading_df) - - # data.frame to matrix (projectR has no data.frame signature) - target_r_matrix = convert_r_df_to_r_matrix(target_r_df) - loading_r_matrix = convert_r_df_to_r_matrix(loading_r_df) - - # Assign Rownames to each matrix - # I don't know why but using ro.StrVector makes rpy2py fail where the output df is an incompatible class - # Guessing that there are some non-strings mixed into the indexes - target_r_matrix.rownames = StrVector(target_df.index) - loading_r_matrix.rownames = StrVector(loading_df.index) - - # The NMF projectR method signature is based on the LinearEmbeddedMatrix class, - # Which has a featureLoadings property. That matrix is loaded and the default - # projectR signature is returned and used. So we can just pass the matrix as-is. - # https://rdrr.io/bioc/SingleCellExperiment/man/LinearEmbeddingMatrix.html - - # Run project R command. Get projectionPatterns matrix - try: - if algorithm == "nmf": - projectR = importr('projectR') - projection_patterns_r_matrix = projectR.projectR(data=target_r_matrix, loadings=loading_r_matrix, full=False) - elif algorithm == "fixednmf": - sjd = importr('SJD') - loading_list = ro.ListVector({"genesig": loading_r_matrix}) - - projection = sjd.projectNMF(proj_dataset=target_r_matrix, proj_group=True, list_component=loading_list) - projection_patterns_r_matrix = projection.rx2("proj_score_list").rx2("genesig") - else: - raise ValueError("Algorithm {} is not supported".format(algorithm)) - except Exception as e: - # print stacktrace with line numbers - traceback.print_exc(file=sys.stderr) - raise RError("Error: Could not run projectR command.\tReason: {}".format(str(e))) - - # matrix back to data.frame - projection_patterns_r_df = convert_r_matrix_to_r_df(projection_patterns_r_matrix) - - # Convert from R data.frame to pandas dataframe - with localconverter(ro.default_converter + pandas2ri.converter): - projection_patterns_df = ro.conversion.rpy2py(projection_patterns_r_df) - - return projection_patterns_df + # Ensure multithreading if running locally -> https://rpy2.github.io/doc/v3.5.x/html/rinterface.html#multithreading + with openrlib.rlock: + + # Convert from pandas dataframe to R data.frame + with localconverter(ro.default_converter + pandas2ri.converter): + target_r_df = ro.conversion.py2rpy(target_df) + loading_r_df = ro.conversion.py2rpy(loading_df) + + # data.frame to matrix (projectR has no data.frame signature) + target_r_matrix = convert_r_df_to_r_matrix(target_r_df) + loading_r_matrix = convert_r_df_to_r_matrix(loading_r_df) + + # Assign Rownames to each matrix + # I don't know why but using ro.StrVector makes rpy2py fail where the output df is an incompatible class + # Guessing that there are some non-strings mixed into the indexes + target_r_matrix.rownames = StrVector(target_df.index) + loading_r_matrix.rownames = StrVector(loading_df.index) + + # The NMF projectR method signature is based on the LinearEmbeddedMatrix class, + # Which has a featureLoadings property. That matrix is loaded and the default + # projectR signature is returned and used. So we can just pass the matrix as-is. + # https://rdrr.io/bioc/SingleCellExperiment/man/LinearEmbeddingMatrix.html + + # Run project R command. Get projectionPatterns matrix + try: + if algorithm == "nmf": + projectR = importr('projectR') + projection_patterns_r_matrix = projectR.projectR(data=target_r_matrix, loadings=loading_r_matrix, full=False) + elif algorithm == "fixednmf": + sjd = importr('SJD') + loading_list = ro.ListVector({"genesig": loading_r_matrix}) + + projection = sjd.projectNMF(proj_dataset=target_r_matrix, proj_group=True, list_component=loading_list) + projection_patterns_r_matrix = projection.rx2("proj_score_list").rx2("genesig") + else: + raise ValueError("Algorithm {} is not supported".format(algorithm)) + except Exception as e: + # print stacktrace with line numbers + traceback.print_exc(file=sys.stderr) + raise RError("Error: Could not run projectR command.\tReason: {}".format(str(e))) + + # matrix back to data.frame + projection_patterns_r_df = convert_r_matrix_to_r_df(projection_patterns_r_matrix) + + # Convert from R data.frame to pandas dataframe + with localconverter(ro.default_converter + pandas2ri.converter): + projection_patterns_df = ro.conversion.rpy2py(projection_patterns_r_df) + + return projection_patterns_df