Skip to content

Commit

Permalink
add more tests, CodeCov shield, some minor fixes
Browse files Browse the repository at this point in the history
- updated resources.open_text to resources.files due to DeprecationWarning
- edge case in get_unique_topologies for empty list indexing
- always return dataframe in compositions_to_structures
- make component inference in parse_glycoform more robust
- quick-return string-identical repeats in equal_repeats
  • Loading branch information
Bribak committed Nov 15, 2024
1 parent 25d4672 commit 0c94995
Show file tree
Hide file tree
Showing 10 changed files with 913 additions and 290 deletions.
10 changes: 8 additions & 2 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,16 @@ jobs:
run: |
python -m pip install --upgrade pip
test -f setup.py && pip install -e ".[all]"
pip install pytest
pip install pytest pytest-cov
- name: Run tests
shell: bash -l {0}
run: |
cd tests
pytest
pytest --cov=../ --cov-report=xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
file: ./coverage.xml
fail_ci_if_error: true
16 changes: 8 additions & 8 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion glycowork/glycan_data/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from importlib import resources
from typing import Any, Dict, List, Optional

with resources.open_text("glycowork.glycan_data", "glycan_motifs.csv") as f:
with resources.files("glycowork.glycan_data").joinpath("glycan_motifs.csv").open(encoding = 'utf-8-sig') as f:
motif_list = pd.read_csv(f)
this_dir, this_filename = path.split(__file__) # Get path of data.pkl
data_path = path.join(this_dir, 'lib_v11.pkl')
Expand Down
5 changes: 5 additions & 0 deletions glycowork/motif/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,8 @@ def equal_repeats(r1: str, # First glycan sequence
r2: str # Second glycan sequence
) -> bool: # True if repeats are shifted versions
"Check whether two repeat units could stem from the same repeating structure"
if r1 == r2:
return True
r1_long = r1[:r1.rindex(')')+1] * 2
return any(r1_long[i:i + len(r2)] == r2 for i in range(len(r1)))

Expand Down Expand Up @@ -907,6 +909,9 @@ def parse_glycoform(glycoform: Union[str, Dict[str, int]], # Composition in H5N4
) -> Dict[str, int]: # Dictionary of feature counts
"Convert composition like H5N4F1A2 into monosaccharide counts"
if isinstance(glycoform, dict):
if not any(f in glycoform.keys() for f in glycan_features):
mapping = {'Hex': 'H', 'HexNAc': 'N', 'dHex': 'F', 'Neu5Ac': 'A', 'Neu5Gc': 'G'}
glycoform = {mapping.get(k, k): v for k, v in glycoform.items()}
components = {k: glycoform.get(k, 0) for k in glycan_features}
return components | infer_features_from_composition(components)
components = {c: 0 for c in glycan_features}
Expand Down
2 changes: 1 addition & 1 deletion glycowork/motif/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import copy
import networkx as nx
from itertools import product, combinations, chain
from typing import Dict, List, Union, Optional, Tuple, Callable, Any
from typing import Dict, List, Union, Optional, Tuple, Any
from glycowork.glycan_data.loader import replace_every_second, unwrap
from glycowork.motif.processing import min_process_glycans, bracket_removal, canonicalize_iupac
from glycowork.motif.graph import graph_to_string, subgraph_isomorphism, compare_glycans, glycan_to_nxGraph
Expand Down
14 changes: 6 additions & 8 deletions glycowork/motif/tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from importlib import resources
from collections import Counter
from sklearn.cluster import DBSCAN
from typing import Dict, List, Set, Union, Optional, Tuple
from functools import reduce
from typing import Dict, List, Set, Union, Optional

from glycowork.glycan_data.loader import lib, unwrap, df_glycan, Hex, dHex, HexA, HexN, HexNAc, Pen, linkages
from glycowork.motif.processing import min_process_glycans, rescue_glycans, rescue_compositions
Expand All @@ -16,7 +17,7 @@
'L':12, 'M':13, 'N':14, 'P':15, 'Q':16, 'R':17, 'S':18, 'T':19,
'V':20, 'W':21, 'Y':22, 'X':23, 'Z':24, 'z':25}

with resources.open_text("glycowork.motif", "mz_to_composition.csv") as f:
with resources.files("glycowork.motif").joinpath("mz_to_composition.csv").open(encoding = 'utf-8-sig') as f:
mapping_file = pd.read_csv(f)
mass_dict = dict(zip(mapping_file.composition, mapping_file["underivatized_monoisotopic"]))

Expand Down Expand Up @@ -330,7 +331,7 @@ def compositions_to_structures(composition_list: List[Dict[str, int]], # List of
print(f"{not_matched_count} compositions could not be matched. Run with verbose = True to see which compositions.")
if verbose:
print(not_matched_list)
return df_out
return df_out if isinstance(df_out, pd.DataFrame) else pd.DataFrame()


def mz_to_structures(mz_list: List[float], # List of precursor masses
Expand Down Expand Up @@ -365,10 +366,7 @@ def mz_to_structures(mz_list: List[float], # List of precursor masses
for m, comp in enumerate(compositions):
out_structures.append(compositions_to_structures(comp, glycan_class = glycan_class,
abundances = abundances.iloc[[m]], kingdom = kingdom, df_use = df_use, verbose = verbose))
if out_structures:
return pd.concat(out_structures, axis = 0)
else:
return []
return pd.concat(out_structures, axis = 0).reset_index(drop = True) if out_structures else []


def mask_rare_glycoletters(glycans: List[str], # List of IUPAC-condensed glycans
Expand Down Expand Up @@ -553,4 +551,4 @@ def get_unique_topologies(composition: Dict[str, int], # Composition dictionary
df_use = df_use[df_use.glycan_type == glycan_type]
df_use = df_use[df_use[taxonomy_rank].apply(lambda x: taxonomy_value in x)].glycan.values
df_use = list(set([structure_to_basic(k) for k in df_use]))
return [[g.replace(k, v) for k,v in universal_replacers.items()][0] for g in df_use if '{' not in g]
return [reduce(lambda x, kv: x.replace(*kv), universal_replacers.items(), g) for g in df_use if '{' not in g]
2 changes: 1 addition & 1 deletion glycowork/network/biosynthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from scipy.stats import ttest_rel, ttest_ind
from statsmodels.formula.api import ols
from statsmodels.stats.multitest import multipletests
from typing import Dict, List, Set, Union, Optional, Tuple, Any, FrozenSet
from typing import Dict, List, Set, Union, Optional, Tuple, FrozenSet
import statsmodels.api as sm
import networkx as nx
import numpy as np
Expand Down
Loading

0 comments on commit 0c94995

Please sign in to comment.