Skip to content

Commit

Permalink
more tests, add .cfg file, minor stability fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Bribak committed Nov 16, 2024
1 parent 5a99d6b commit f76535e
Show file tree
Hide file tree
Showing 5 changed files with 1,003 additions and 17 deletions.
21 changes: 15 additions & 6 deletions glycowork/motif/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,8 @@ def get_glycanova(
custom_scale: float = 0 # Ratio of total signal in group2/group1 for an informed scale model (or group_idx: mean(group)/min(mean(groups)) signal dict for multivariate)
) -> Tuple[pd.DataFrame, Dict[str, pd.DataFrame]]: # (ANOVA results with F-stats and omega-squared effect sizes, post-hoc results)
"Performs one-way ANOVA with omega-squared effect size calculation and optional Tukey's HSD post-hoc testing on glycomics data across multiple groups"
if len(set(groups)) < 3:
raise ValueError("You have fewer than three groups. We suggest get_differential_expression for those cases. ANOVA is for >= three groups.")
df, _, groups, _ = preprocess_data(df, groups, [], experiment = "anova", motifs = motifs, impute = impute,
min_samples = min_samples, transform = transform, feature_set = feature_set,
gamma = gamma, custom_scale = custom_scale, custom_motifs = custom_motifs)
Expand Down Expand Up @@ -982,10 +984,17 @@ def get_biodiversity(
mean_a, mean_b = [np.mean(row_a) for row_a in df_a.values], [np.mean(row_b) for row_b in df_b.values]
if paired:
assert len(df_a) == len(df_b), "For paired samples, the size of group1 and group2 should be the same"
pvals = [ttest_rel(row_b, row_a)[1] if paired else ttest_ind(row_b, row_a, equal_var = False)[1] for
row_a, row_b in zip(df_a.values, df_b.values)]
pvals = [p if p > 0 and p < 1 else 1.0 for p in pvals]
effect_sizes, _ = zip(*[cohen_d(row_b, row_a, paired = paired) for row_a, row_b in zip(df_a.values, df_b.values)])
pvals = []
effect_sizes = []
for row_a, row_b in zip(df_a.values, df_b.values):
if np.allclose(row_a, row_b, rtol = 1e-5, atol = 1e-8):
pvals.append(1.0)
effect_sizes.append(0.0)
else:
pval = ttest_rel(row_b, row_a)[1] if paired else ttest_ind(row_b, row_a, equal_var = False)[1]
pvals.append(pval if (pval > 0 and pval < 1) else 1.0)
effect, _ = cohen_d(row_b, row_a, paired = paired)
effect_sizes.append(effect)
a_df_stats = pd.DataFrame(list(zip(a_df.index.tolist(), mean_a, mean_b, pvals, effect_sizes)),
columns = ["Metric", "Group1 mean", "Group2 mean", "p-val", "Effect size"])
shopping_cart.append(a_df_stats)
Expand Down Expand Up @@ -1250,9 +1259,9 @@ def get_lectin_array(
lectin_lib = load_lectin_lib()
useable_lectin_mapping, motif_mapping = create_lectin_and_motif_mappings(lectin_list, lectin_lib)
if group2:
mean_scores_per_condition = df[group1 + group2].groupby([0] * len(group1) + [1] * len(group2), axis = 1).mean()
mean_scores_per_condition = df[group1 + group2].T.groupby([0] * len(group1) + [1] * len(group2)).mean().T
else:
mean_scores_per_condition = df.groupby(group1, axis = 1).mean()
mean_scores_per_condition = df.T.groupby(group1).mean().T
lectin_variance = mean_scores_per_condition.var(axis = 1)
idf = np.sqrt(lectin_variance)
if group2:
Expand Down
4 changes: 2 additions & 2 deletions glycowork/motif/draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def matches(
) -> Generator[Tuple[int, int, int], None, None]: # Yields (start pos, end pos, nesting depth)
"Finds matching pairs of delimiters in a string, handling nested pairs and returning positions and depth;ref: https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex"""
stack = []
for m in re.finditer(r'[{}{}]'.format(opendelim, closedelim), line):
for m in re.finditer(r'[\[\]]', line) if opendelim == '[' else re.finditer(r'[{}{}]'.format(opendelim, closedelim), line):
pos = m.start()
if line[pos-1] == '\\':
# Skip escape sequence
Expand Down Expand Up @@ -2356,7 +2356,7 @@ def scale_in_range(
"Normalizes list of numbers to specified range"
min_val = min(listy)
max_val = max(listy)
range_val = max_val - min_val
range_val = max(max_val - min_val, 1e-6)
return [(b - a) * ((x - min_val) / range_val) + a for x in listy]


Expand Down
4 changes: 2 additions & 2 deletions glycowork/network/biosynthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,8 +945,8 @@ def get_maximum_flow(network: nx.Graph, # Biosynthetic network
# Dictionary to store flow values and paths for each sink
flow_results = {}
for sink in sinks:
path_length = nx.shortest_path_length(network, source = source, target = sink)
try:
path_length = nx.shortest_path_length(network, source = source, target = sink)
try:
flow_value, flow_dict = nx.maximum_flow(network, source, sink)
except:
Expand All @@ -955,7 +955,7 @@ def get_maximum_flow(network: nx.Graph, # Biosynthetic network
'flow_value': flow_value * path_length,
'flow_dict': flow_dict
}
except nx.NetworkXError:
except (nx.NetworkXError, nx.NetworkXNoPath):
print(f"{sink} cannot be reached.")
return flow_results

Expand Down
6 changes: 6 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[bdist_wheel]
universal=0

[build_system]
requires = ["setuptools>=64.0"]
build-backend = "setuptools.build_meta"
Loading

0 comments on commit f76535e

Please sign in to comment.