Skip to content

Commit

Permalink
Merge branch 'ui-v2' of github.com:IGS/gEAR into ui-v2
Browse files Browse the repository at this point in the history
  • Loading branch information
jorvis committed Jan 20, 2024
2 parents 25baed2 + 068617a commit cb4b39b
Show file tree
Hide file tree
Showing 16 changed files with 537 additions and 246 deletions.
13 changes: 3 additions & 10 deletions lib/gear/mg_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1074,7 +1074,7 @@ def build_obs_group_indexes(df, filters, clusterbar_fields):
filter_indexes[k][elem] = obs_index.tolist()
return filter_indexes

def create_dataframe_gene_mask(df, gene_symbols, mapped_gene_symbols={}):
def create_dataframe_gene_mask(df, gene_symbols):
"""Create a gene mask to filter a dataframe."""
if not "gene_symbol" in df:
raise PlotError('Missing gene_symbol column in adata.var')
Expand Down Expand Up @@ -1120,15 +1120,8 @@ def create_dataframe_gene_mask(df, gene_symbols, mapped_gene_symbols={}):
# Note to user which genes were not found in the dataset
genes_not_present = [gene for gene in gene_symbols if gene not in found_genes]
if genes_not_present:
# attempt to map the gene to an ensembl id, and add to the gene_filter
for gene in genes_not_present:
if gene in mapped_gene_symbols:
gene_filter = gene_filter | df['gene_symbol'].isin([mapped_gene_symbols[gene]])
# If the gene is still not present, add it to the message
genes_not_present = [gene for gene in genes_not_present if gene not in mapped_gene_symbols]
if genes_not_present:
success = 2,
message_list.append('<li>One or more genes were not found in the dataset nor could be mapped: {}</li>'.format(', '.join(genes_not_present)))
success = 2,
message_list.append('<li>One or more genes were not found in the dataset nor could be mapped: {}</li>'.format(', '.join(genes_not_present)))
message = "\n".join(message_list)
return gene_filter, success, message
except PlotError as pe:
Expand Down
19 changes: 17 additions & 2 deletions lib/gear/orthology.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,16 @@ def map_single_gene(gene_symbol:str, orthomap_file: Path):
"""
# Read HDF5 file using Pandas read_hdf
gene_symbol_dict = create_orthology_gene_symbol_dict(orthomap_file)

# Check if case-insensitive gene symbol is in dictionary
gene_symbol = gene_symbol.lower()
for key in gene_symbol_dict.keys():
if gene_symbol == key.lower():
gene_symbol = key
break

# NOTE: Not all genes can be mapped. Unmappable genes do not change in the original dataframe.
return gene_symbol_dict[gene_symbol]
return gene_symbol_dict.get(gene_symbol, None)

def map_multiple_genes(gene_symbols:list, orthomap_file: Path):
"""
Expand All @@ -172,5 +180,12 @@ def map_multiple_genes(gene_symbols:list, orthomap_file: Path):
# Read HDF5 file using Pandas read_hdf
gene_symbol_dict = create_orthology_gene_symbol_dict(orthomap_file)

# Check if case-insensitive gene symbols are in dictionary
gene_symbols = [gene_symbol.lower() for gene_symbol in gene_symbols]
for key in gene_symbol_dict.keys():
if key.lower() in gene_symbols:
gene_symbols[gene_symbols.index(key.lower())] = key


# NOTE: Not all genes can be mapped. Unmappable genes do not change in the original dataframe.
return { gene_symbol: gene_symbol_dict[gene_symbol] for gene_symbol in gene_symbols}
return { gene_symbol: gene_symbol_dict[gene_symbol] for gene_symbol in gene_symbols if gene_symbol in gene_symbol_dict}
38 changes: 23 additions & 15 deletions www/api/resources/multigene_dash_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ def get_mapped_gene_symbols(gene_symbols, gene_organism_id, dataset_organism_id)
else:
for ortholog_file in get_ortholog_files_from_dataset(dataset_organism_id, "ensembl"):
try:
return map_multiple_genes(gene_symbols, ortholog_file)
mapped_gene_symbols_dict = map_multiple_genes(gene_symbols, ortholog_file)
# ? Should we check all and return the dict with the most matches
if len(mapped_gene_symbols_dict):
return mapped_gene_symbols_dict
except:
continue
return {}
Expand Down Expand Up @@ -296,25 +299,32 @@ def post(self, dataset_id):
dataset_organism_id = dataset.organism_id

mapped_gene_symbols_dict = {}
# create list of mapped_gene_symbols in gene_symbols order
mapped_gene_symbols = []

# If any searched gene is not in the dataset, attempt to map it to the dataset organism
if not check_all_genes_in_dataset(adata, gene_symbols):
try:
mapped_gene_symbols_dict = get_mapped_gene_symbols(gene_symbols, gene_organism_id, dataset_organism_id)
if len(mapped_gene_symbols_dict):
for gene_symbol in gene_symbols:
mapped_gene_symbols.append(mapped_gene_symbols_dict.get(gene_symbol, gene_symbol))

except:
return {"success": -1, "message": "The searched gene symbols could not be mapped to the dataset organism."}

selected_gene_symbols = gene_symbols if not mapped_gene_symbols else mapped_gene_symbols
# TODO: How to deal with a gene mapping to multiple Ensemble IDs
try:
if not gene_symbols and plot_type in ["dotplot", "heatmap", "mg_violin"]:
if not selected_gene_symbols and plot_type in ["dotplot", "heatmap", "mg_violin"]:
raise PlotError('Must pass in some genes before creating a plot of type {}'.format(plot_type))

if len(gene_symbols) == 1 and plot_type == "heatmap":
if len(selected_gene_symbols) == 1 and plot_type == "heatmap":
raise PlotError('Heatmaps require 2 or more genes as input')

# Some datasets have multiple ensemble IDs mapped to the same gene.
# Drop dups to prevent out-of-bounds index errors downstream
gene_filter, success, message = mg.create_dataframe_gene_mask(adata.var, gene_symbols, mapped_gene_symbols_dict)
gene_filter, success, message = mg.create_dataframe_gene_mask(adata.var, selected_gene_symbols)
except PlotError as pe:
return {
'success': -1,
Expand Down Expand Up @@ -345,7 +355,7 @@ def post(self, dataset_id):
# Collect all genes from the unfiltered dataset
dataset_genes = adata.var['gene_symbol'].unique().tolist()
# Gene symbols list may have genes not in the dataset.
normalized_genes_list, _found_genes = mg.normalize_searched_genes(dataset_genes, gene_symbols)
normalized_genes_list, _found_genes = mg.normalize_searched_genes(dataset_genes, selected_gene_symbols)
# Sort ensembl IDs based on the gene symbol order
sorted_ensm = map(lambda x: gene_to_ensm[x], normalized_genes_list)

Expand Down Expand Up @@ -444,16 +454,16 @@ def post(self, dataset_id):

mg.modify_volcano_plot(fig, query_val, ref_val, ensm2genesymbol, downcolor, upcolor)

if gene_symbols:
if selected_gene_symbols:
dataset_genes = df['gene_symbol'].unique().tolist()
normalized_genes_list, _found_genes = mg.normalize_searched_genes(dataset_genes, gene_symbols)
normalized_genes_list, _found_genes = mg.normalize_searched_genes(dataset_genes, selected_gene_symbols)
mg.add_gene_annotations_to_volcano_plot(fig, normalized_genes_list, annotate_nonsignificant)

elif plot_type == "quadrant":
# Get list of normalized genes before dataframe filtering takes place
if gene_symbols:
if selected_gene_symbols:
dataset_genes = adata.var['gene_symbol'].unique().tolist()
normalized_genes_list, _found_genes = mg.normalize_searched_genes(dataset_genes, gene_symbols)
normalized_genes_list, _found_genes = mg.normalize_searched_genes(dataset_genes, selected_gene_symbols)
try:
key, control_val, compare1_val, compare2_val = mg.validate_quadrant_conditions(ref_condition, compare_group1, compare_group2)
df = mg.prep_quadrant_dataframe(selected
Expand All @@ -477,7 +487,7 @@ def post(self, dataset_id):

fig = mg.create_quadrant_plot(df, control_val, compare1_val, compare2_val, colorscale)
# Annotate selected genes
if gene_symbols:
if selected_gene_symbols:
genes_not_found, genes_none_none = mg.add_gene_annotations_to_quadrant_plot(fig, normalized_genes_list)
if genes_not_found:
success = 2
Expand Down Expand Up @@ -520,6 +530,9 @@ def post(self, dataset_id):
groupby = ["gene_symbol"]
groupby.extend(groupby_filters)

# drop Ensembl ID index since it may not aggregate and throw warnings
df.drop(columns=[var_index], inplace=True)

grouped = df.groupby(groupby)
df = grouped.agg(['mean', 'count', ('percent', percent)]) \
.fillna(0) \
Expand Down Expand Up @@ -753,11 +766,6 @@ def post(self, dataset_id):

plot_json = json.dumps(fig, cls=PlotlyJSONEncoder)

# create list of mapped_gene_symbols in gene_symbols order
mapped_gene_symbols = []
for gene_symbol in gene_symbols:
mapped_gene_symbols.append(mapped_gene_symbols_dict.get(gene_symbol, gene_symbol))

# NOTE: With volcano plots, the Chrome "devtools" cannot load the JSON response occasionally
return {
"success": success
Expand Down
31 changes: 23 additions & 8 deletions www/api/resources/plotly_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
abs_path_www = Path(__file__).resolve().parents[TWO_LEVELS_UP] # web-root dir
PROJECTIONS_BASE_DIR = abs_path_www.joinpath('projections')

def normalize_searched_gene(gene_list, chosen_gene):
"""Convert to case-insensitive version of gene. Returns None if gene not found in dataset."""
for g in gene_list:
if chosen_gene.lower() == str(g).lower():
return g
return None

def get_mapped_gene_symbol(gene_symbol, gene_organism_id, dataset_organism_id):
"""
Maps a gene symbol to its corresponding orthologous gene symbol in a given dataset.
Expand All @@ -43,7 +50,9 @@ def get_mapped_gene_symbol(gene_symbol, gene_organism_id, dataset_organism_id):
else:
for ortholog_file in get_ortholog_files_from_dataset(dataset_organism_id, "ensembl"):
try:
return map_single_gene(gene_symbol, ortholog_file)
mapped_gene = map_single_gene(gene_symbol, ortholog_file)
if mapped_gene:
return mapped_gene
except:
continue
return None
Expand Down Expand Up @@ -285,16 +294,22 @@ def post(self, dataset_id):
if not check_gene_in_dataset(adata, gene_symbols):
try:
mapped_gene_symbol = get_mapped_gene_symbol(gene_symbol, gene_organism_id, dataset_organism_id)
except:
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be mapped to the dataset organism."}

if mapped_gene_symbol:
gene_symbols = (mapped_gene_symbol,)
if not check_gene_in_dataset(adata, gene_symbols):
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be found in the h5ad file."}
else:
# Last chance - See if a normalized gene symbol is present in the dataset
if not mapped_gene_symbol:
dataset_genes = adata.var['gene_symbol'].unique().tolist()
mapped_gene_symbol = normalize_searched_gene(dataset_genes, gene_symbol)
if not mapped_gene_symbol:
raise Exception("Could not map gene symbol to dataset organism.")

except Exception as e:
print(str(e), file=sys.stderr)
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be mapped to the dataset organism."}

gene_symbols = (mapped_gene_symbol,)
if not check_gene_in_dataset(adata, gene_symbols):
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be found in the h5ad file."}

# Filter genes and slice the adata to get a dataframe
# with expression and its observation metadata
try:
Expand Down
28 changes: 21 additions & 7 deletions www/api/resources/svg_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ def __init__(self, message="") -> None:
self.message = message
super().__init__(self.message)

def normalize_searched_gene(gene_list, chosen_gene):
"""Convert to case-insensitive version of gene. Returns None if gene not found in dataset."""
for g in gene_list:
if chosen_gene.lower() == str(g).lower():
return g
return None

def get_mapped_gene_symbol(gene_symbol, gene_organism_id, dataset_organism_id):
"""
Maps a gene symbol to its corresponding orthologous gene symbol in a given dataset.
Expand All @@ -40,7 +47,9 @@ def get_mapped_gene_symbol(gene_symbol, gene_organism_id, dataset_organism_id):
else:
for ortholog_file in get_ortholog_files_from_dataset(dataset_organism_id, "ensembl"):
try:
return map_single_gene(gene_symbol, ortholog_file)
mapped_gene = map_single_gene(gene_symbol, ortholog_file)
if mapped_gene:
return mapped_gene
except:
continue
return None
Expand Down Expand Up @@ -138,15 +147,20 @@ def get(self, dataset_id):
if not check_gene_in_dataset(adata, gene_symbols):
try:
mapped_gene_symbol = get_mapped_gene_symbol(gene_symbol, gene_organism_id, dataset_organism_id)

# Last chance - See if a normalized gene symbol is present in the dataset
if not mapped_gene_symbol:
dataset_genes = adata.var['gene_symbol'].unique().tolist()
mapped_gene_symbol = normalize_searched_gene(dataset_genes, gene_symbol)
if not mapped_gene_symbol:
raise Exception("Could not map gene symbol to dataset organism.")

except:
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be mapped to the dataset organism."}

if mapped_gene_symbol:
gene_symbols = (mapped_gene_symbol,)
if not check_gene_in_dataset(adata, gene_symbols):
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be found in the h5ad file."}
else:
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be mapped to the dataset organism."}
gene_symbols = (mapped_gene_symbol,)
if not check_gene_in_dataset(adata, gene_symbols):
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be found in the h5ad file."}

try:
gene_filter = adata.var.gene_symbol.isin(gene_symbols)
Expand Down
30 changes: 21 additions & 9 deletions www/api/resources/tsne_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ def __init__(self, message="") -> None:
self.message = message
super().__init__(self.message)

def normalize_searched_gene(gene_list, chosen_gene):
"""Convert to case-insensitive version of gene. Returns None if gene not found in dataset."""
for g in gene_list:
if chosen_gene.lower() == str(g).lower():
return g
return None

def get_mapped_gene_symbol(gene_symbol, gene_organism_id, dataset_organism_id):
"""
Maps a gene symbol to its corresponding orthologous gene symbol in a given dataset.
Expand All @@ -64,7 +71,9 @@ def get_mapped_gene_symbol(gene_symbol, gene_organism_id, dataset_organism_id):
else:
for ortholog_file in get_ortholog_files_from_dataset(dataset_organism_id, "ensembl"):
try:
return map_single_gene(gene_symbol, ortholog_file)
mapped_gene = map_single_gene(gene_symbol, ortholog_file)
if mapped_gene:
return mapped_gene
except:
continue
return None
Expand All @@ -81,8 +90,6 @@ def check_gene_in_dataset(adata, gene_symbols):
bool: True if any of the gene symbols are present in the dataset, False otherwise.
"""
gene_filter = adata.var.gene_symbol.isin(gene_symbols)
print(gene_symbols, file=sys.stderr)
print(gene_filter.any(), file=sys.stderr)
return gene_filter.any()

def get_analysis(analysis, dataset_id, session_id):
Expand Down Expand Up @@ -315,15 +322,20 @@ def post(self, dataset_id):
if not check_gene_in_dataset(adata, gene_symbols):
try:
mapped_gene_symbol = get_mapped_gene_symbol(gene_symbol, gene_organism_id, dataset_organism_id)

# Last chance - See if a normalized gene symbol is present in the dataset
if not mapped_gene_symbol:
dataset_genes = adata.var['gene_symbol'].unique().tolist()
mapped_gene_symbol = normalize_searched_gene(dataset_genes, gene_symbol)
if not mapped_gene_symbol:
raise Exception("Could not map gene symbol to dataset organism.")

except:
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be mapped to the dataset organism."}

if mapped_gene_symbol:
gene_symbols = (mapped_gene_symbol,)
if not check_gene_in_dataset(adata, gene_symbols):
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be found in the h5ad file."}
else:
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be mapped to the dataset organism."}
gene_symbols = (mapped_gene_symbol,)
if not check_gene_in_dataset(adata, gene_symbols):
return {"success": -1, "message": f"The searched gene symbol {gene_symbol} could not be found in the h5ad file."}

gene_filter = adata.var.gene_symbol.isin(gene_symbols)

Expand Down
8 changes: 4 additions & 4 deletions www/expression.html
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,10 @@ <h4>miRNA tree</h4>
<div id="result-panel-options">
<span class="option-label pr-1">Scoring method: </span>
<div class="control select is-small">
<select>
<option>Gene scope</option>
<option>Tissue scope</option>
<option>Sample scope</option>
<select id="svg-scoring-method">
<option value="gene" selected>Gene scope</option>
<option value="tissue">Tissue scope</option>
<option value="dataset">Sample scope</option>
</select>
</div>
</div>
Expand Down
4 changes: 2 additions & 2 deletions www/include/primary_nav.html
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
<ul class="menu-list">
<li>
<span class="icon-text is-align-items-center is-flex">
<a class="icon-link-part" tool="search_expression">
<a class="icon-link-part" tool="search_expression" href="./expression.html">
<span class="icon icon-image-part is-medium"><i class="mdi mdi-24px mdi-test-tube"></i></span>
<span class="icon-text-part">Gene Expression</span>
</a>
Expand All @@ -38,7 +38,7 @@
<a class="icon-link-part" tool="search_datasets" href="./dataset_explorer.html">
<span class="icon icon-image-part is-medium"><i class="mdi mdi-24px mdi-table-search"></i></span>
<span class="icon-text-part">Datasets</span>
</a>
</a>
</span>
</li>
</ul>
Expand Down
Loading

0 comments on commit cb4b39b

Please sign in to comment.