Skip to content

Commit

Permalink
Merge branch 'ui-v2' of github.com:IGS/gEAR into ui-v2
Browse files Browse the repository at this point in the history
  • Loading branch information
jorvis committed Aug 20, 2024
2 parents 030108b + 5e73205 commit 7f1c553
Show file tree
Hide file tree
Showing 44 changed files with 1,067 additions and 526 deletions.
2 changes: 1 addition & 1 deletion bin/profile_single_heatmap_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def get_analysis(analysis, dataset_id, session_id):
ana.type = analysis['type']
except:
user = geardb.get_user_from_session_id(session_id)
ana.discover_type(current_user_id=user.id)
ana.discover_type()
else:
ds = geardb.Dataset(id=dataset_id, has_h5ad=1)
h5_path = ds.get_file_path()
Expand Down
2 changes: 1 addition & 1 deletion bin/profile_single_projectr_tsne_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def get_analysis(analysis, dataset_id, session_id):
ana.type = analysis['type']
except:
user = geardb.get_user_from_session_id(session_id)
ana.discover_type(current_user_id=user.id)
ana.discover_type()
else:
ds = geardb.Dataset(id=dataset_id, has_h5ad=1)
h5_path = ds.get_file_path()
Expand Down
47 changes: 47 additions & 0 deletions bin/remove_duplicate_layout_displays.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/opt/bin/python

# This is to fix an issue where some layouts have duplicated display members, if the user
# saved layouts in the layout arranger when the duplcation bug was active (https://github.com/IGS/gEAR/issues/768)

import sys

from pathlib import Path
lib_path = Path(__file__).resolve().parents[1].joinpath('lib')

sys.path.append(str(lib_path))

import geardb

conn = geardb.Connection()
cursor = conn.get_cursor()

# print row count
qry = "SELECT COUNT(*) FROM layout_displays"
cursor.execute(qry)
row_count = cursor.fetchone()[0]
print("Row count before deletion: {}".format(row_count))

# https://www.tutorialspoint.com/mysql/mysql-delete-duplicate-records.htm
qry = """
DELETE ld1 FROM layout_displays ld1
INNER JOIN layout_displays ld2
WHERE ld1.layout_id = ld2.layout_id
AND ld1.display_id = ld2.display_id
AND ld1.start_col = ld2.start_col
AND ld1.grid_width = ld2.grid_width
AND ld1.start_row = ld2.start_row
AND ld1.grid_height = ld2.grid_height
AND ld1.id > ld2.id
"""
cursor.execute(qry)

conn.commit()

# print row count
qry = "SELECT COUNT(*) FROM layout_displays"
cursor.execute(qry)
row_count = cursor.fetchone()[0]
print("Row count after deletion: {}".format(row_count))

cursor.close()
conn.close()
2 changes: 0 additions & 2 deletions docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ Flask==3.0.0
Flask-RESTful==0.3.9
gunicorn
h5py==3.10.0
itsdangerous==2.1.2 # See -> https://stackoverflow.com/a/71206978
jupyterlab==4.0.5
jupyter==1.0.0
kaleido==0.2.1
Expand All @@ -36,6 +35,5 @@ scanpy==1.10.1
scikit-learn==1.0.2
scipy==1.11.04
seaborn==0.13.2
SQLAlchemy==1.4.32
tables==3.9.2 # Read hdf5 files into pandas
xlrd==1.2.0
2 changes: 0 additions & 2 deletions docs/setup.python.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ fixed paths have worked fine for decades.
Flask-RESTful==0.3.9 \
gunicorn \
h5py==3.10.0 \
itsdangerous==2.1.2 \
jupyterlab==4.0.5 \
jupyter==1.0.0 \
kaleido==0.2.1 \
Expand All @@ -66,7 +65,6 @@ fixed paths have worked fine for decades.
scikit-learn==1.0.2 \
scipy==1.11.04 \
seaborn==0.13.2 \
SQLAlchemy==1.4.32 \
tables==3.9.2 \
xlrd==1.2.0
$ sudo mkdir /opt/bin
Expand Down
12 changes: 9 additions & 3 deletions lib/gear/orthology.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ def filter_organism_by_id(organism_id: str):
Returns:
dict: The organism dictionary corresponding to the given organism ID.
"""
return next((item for item in organisms if item["id"] == organism_id), None)

return next((item for item in organisms if item.id == organism_id), None)

def get_organism_name_by_id(organism_id: str):
"""Get the organism name corresponding to the given organism ID.
Expand All @@ -109,7 +110,11 @@ def get_organism_name_by_id(organism_id: str):
Returns:
str: The organism name corresponding to the given organism ID.
"""
return filter_organism_by_id(organism_id)["name"]
organism = filter_organism_by_id(organism_id)
if organism is not None:
return organism.label
else:
return ""

def create_orthology_df(orthomap_file: Path):
"""
Expand Down Expand Up @@ -149,7 +154,8 @@ def map_dataframe_genes(orig_df: pd.DataFrame, orthomap_file: Path):

def get_best_match(id1):
# Get the best match for the id2 gene symbol
sorted_by_best_match = orthomap_df[orthomap_df["id1"] == id1].sort_values("algorithms_match_count", ascending=False)
best_match_for_id = orthomap_df[orthomap_df["id1"] == id1]
sorted_by_best_match = best_match_for_id.sort_values(by="algorithms_match_count", ascending=False)
# If no match, return the original id1
if sorted_by_best_match.empty:
return id1
Expand Down
17 changes: 12 additions & 5 deletions lib/gear/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,18 +468,25 @@ def generate_plot(df, x=None, y=None, z=None, facet_row=None, facet_col=None,
# TODO: put in function

# Map indexes for subplot ordering. Indexes start at 1 since plotting rows/cols start at 1
facet_row_groups = category_orders[facet_row] if facet_row and facet_row in category_orders else []
facet_row_groups = []
facet_col_groups = []

if facet_row:
facet_row_groups = category_orders[facet_row] if facet_row in category_orders else df[facet_row].unique().tolist()

if facet_col:
facet_col_groups = category_orders[facet_col] if facet_col in category_orders else df[facet_col].unique().tolist()

facet_row_indexes = {group: idx for idx, group in enumerate(facet_row_groups, start=1)}
num_rows = len(facet_row_groups) if facet_row else 1
facet_col_groups = category_orders[facet_col] if facet_col and facet_col in category_orders else []
facet_col_indexes = {group: idx for idx, group in enumerate(facet_col_groups, start=1)}
num_cols = len(facet_col_groups) if facet_col else 1

# Make faceted plot
fig = make_subplots(rows=num_rows
, cols=num_cols
, row_titles=facet_row_groups if facet_row else None
, column_titles=facet_col_groups if facet_col else None
, row_titles=list(facet_row_groups)
, column_titles=list(facet_col_groups)
, x_title=x_title if x_title else None
, y_title=y_title if y_title else None
)
Expand Down Expand Up @@ -524,7 +531,7 @@ def generate_plot(df, x=None, y=None, z=None, facet_row=None, facet_col=None,
# Each individual trace is a separate scalegroup to ensure plots are scaled correctly for violin plots
new_plotting_args['scalegroup'] = name
if isinstance(name, tuple):
new_plotting_args['scalegroup'] = "_".join(name)
new_plotting_args['scalegroup'] = "_".join(str(name))

# If color dataseries is present, add some special configurations
if color_name:
Expand Down
25 changes: 16 additions & 9 deletions lib/geardb.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,12 @@ def get_analysis(analysis, dataset_id, session_id):
if 'type' in analysis:
ana.type = analysis['type']
else:
ana.discover_type(current_user_id=user_id)
ana.discover_type()

# Check that the h5ad file exists
if not os.path.exists(ana.dataset_path()):
raise FileNotFoundError("No h5 file found for the passed in analysis")

else:
ds = Dataset(id=dataset_id, has_h5ad=1)
h5_path = ds.get_file_path()
Expand Down Expand Up @@ -682,7 +687,7 @@ def discover_vetting(self, current_user_id=None):
return 'community'


def discover_type(self, current_user_id=None):
def discover_type(self):
"""
Given an analysis ID it's technically possible to scan the directory hierarchies and
find the type.
Expand Down Expand Up @@ -909,9 +914,9 @@ def __init__(self, id=None, label=None, genus=None, species=None, strain=None, t
def __repr__(self):
return json.dumps(self.__dict__)

@dataclass
class OrganismCollection:
def __init__(self, organisms=None):
self.organisms = [] if organisms is None else organisms
organisms: List[Organism] = field(default_factory=list)

def __repr__(self):
return json.dumps(self.__dict__)
Expand Down Expand Up @@ -944,6 +949,7 @@ def get_all(self):
self.organisms.append(org)

cursor.close()
conn.close()

return self.organisms

Expand Down Expand Up @@ -1006,6 +1012,7 @@ def add_member(self, member):

cursor.close()
conn.commit()
conn.close()

def dataset_ids(self):
"""
Expand Down Expand Up @@ -1108,7 +1115,7 @@ def load(self):
self.get_members()

cursor.close()
conn.commit()
conn.close()

def remove(self):
"""
Expand All @@ -1131,6 +1138,7 @@ def remove(self):

cursor.close()
conn.commit()
conn.close()

def remove_all_members(self):
"""
Expand All @@ -1147,6 +1155,7 @@ def remove_all_members(self):

cursor.close()
conn.commit()
conn.close()

self.members = []

Expand All @@ -1172,6 +1181,7 @@ def remove_member_by_display_id(self, display_id):

cursor.close()
conn.commit()
conn.close()

def remove_members_by_dataset_id(self, dataset_id):
"""Deletes all members where the display ID belongs to a given dataset ID from the database."""
Expand Down Expand Up @@ -1217,8 +1227,6 @@ def save(self):
self.id = cursor.lastrowid
else:
# ID already populated
conn = Connection()
cursor = conn.get_cursor()

# Update layout properties
sql = """
Expand All @@ -1235,12 +1243,11 @@ def save(self):
self.is_domain, self.share_id, self.id
))

conn.commit()

# TODO: delete existing members, add current ones

cursor.close()
conn.commit()
conn.close()

def save_change(self, attribute=None, value=None):
"""
Expand Down
3 changes: 1 addition & 2 deletions services/projectr/install_bioc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ Rver="${Rmaj}.3.1"

current_dir=$(pwd)

# Install and build R (Using 'apt-get install' on Ubuntu Trusty installs version 3.0.2 of R)
curl http://lib.stat.cmu.edu/R/CRAN/src/base/${Rmaj}/${Rver}.tar.gz | tar -C /opt -zx
curl -s -L http://lib.stat.cmu.edu/R/CRAN/src/base/${Rmaj}/${Rver}.tar.gz | tar xzv -C /opt
cd /opt/${Rver}
/opt/${Rver}/configure --with-readline=no --enable-R-shlib --enable-BLAS-shlib --with-x=no || exit 1
make || exit 1
Expand Down
23 changes: 20 additions & 3 deletions services/projectr/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os, sys
import pandas as pd
from io import StringIO
from flask import Flask, abort, jsonify, request

cloud_logging = False
Expand Down Expand Up @@ -36,9 +37,21 @@ def write_entry(logger_name, severity, message):

def do_binary_projection(target_df, loading_df):
"""Perform projection based on the number of genes that were expressed in the cell or observation."""
# Only applies with unweighted gene carts.
tp_target_series = target_df.astype(bool).sum(axis=0).transpose()
return pd.DataFrame(data=tp_target_series, columns=loading_df.columns, index=tp_target_series.index)
# Only applies with unweighted gene carts, or weighted carts with binary values.

# for each loading pattern, count the number of genes that are expressed in the target
# and return the count as the pattern weight.
binary_target_df = pd.DataFrame()
for pattern in loading_df.columns:
# Count the number of genes that are 1 in the loading_df
good_loading_genes_mask = loading_df[pattern].astype(bool)
good_loading_genes = loading_df.index[good_loading_genes_mask]

# Count the number of those genes that are 1 (expressed) in the target_df.
good_genes = target_df.loc[good_loading_genes].astype(bool).sum(axis=0).transpose()
binary_target_df[pattern] = good_genes
return binary_target_df


def do_pca_projection(target_df, loading_df):
"""Perform projection of PCA loadings."""
Expand Down Expand Up @@ -66,6 +79,10 @@ def index():
write_entry("projectr", "INFO", "Genecart ID: {}".format(genecart_id))


# pd.read_json gives a FutureWarning, and suggest to wrap the json in StringIO.
target = StringIO(target)
loadings = StringIO(loadings)

target_df = pd.read_json(target, orient="split")
loading_df = pd.read_json(loadings, orient="split")

Expand Down
2 changes: 1 addition & 1 deletion services/projectr/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Flask==3.0.0
gunicorn==20.1.0
rpy2==3.5.1 # 3.5.2 and up gives errors with rpy2py and py2rpy
pandas==1.4.1
pandas==2.2.1
google-cloud-logging
19 changes: 0 additions & 19 deletions www/api/requirements.txt

This file was deleted.

9 changes: 7 additions & 2 deletions www/api/resources/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

def create_projection_adata(dataset_adata, dataset_id, projection_id):
# Create AnnData object out of readable CSV file
# ? Does it make sense to put this in the geardb/Analysis class?
projection_id = secure_filename(projection_id)
dataset_id = secure_filename(dataset_id)

Expand All @@ -34,7 +33,7 @@ def create_projection_adata(dataset_adata, dataset_id, projection_id):
obs = dataset_adata.obs
# Create the anndata object and write to h5ad
# Associate with a filename to ensure AnnData is read in "backed" mode
projection_adata = anndata.AnnData(X=X, obs=obs, var=var, obsm=dataset_adata.obsm, filename=projection_adata_path, filemode='r')
projection_adata = anndata.AnnData(X=X, obs=obs, var=var, obsm=dataset_adata.obsm, filemode='r')
except Exception as e:
print(str(e), file=sys.stderr)
raise PlotError("Could not create projection AnnData object from CSV.")
Expand All @@ -45,6 +44,12 @@ def create_projection_adata(dataset_adata, dataset_id, projection_id):
# For some reason the gene_symbol is not taken in by the constructor
projection_adata.var["gene_symbol"] = projection_adata.var_names

# Associate with a filename to ensure AnnData is read in "backed" mode
# This creates the h5ad file if it does not exist
# TODO: If too many processes read from this file, it can throw a BlockingIOError. Eventually we should
# handle this by creating a copy of the file for each process, like a tempfile.
projection_adata.filename = projection_adata_path

return projection_adata

def order_by_time_point(obs_df):
Expand Down
Loading

0 comments on commit 7f1c553

Please sign in to comment.