Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(fix): resolve data ordering to match axis for stacked violin plots #3196

Merged
merged 12 commits into from
Aug 6, 2024
1 change: 1 addition & 0 deletions docs/release-notes/1.10.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@
* Add compatibility with {mod}`numpy` 2.0 {pr}`3065` and {pr}`3115` {smaller}`P Angerer`
* Fix `legend_loc` argument in {func}`scanpy.pl.embedding` not accepting matplotlib parameters {pr}`3163` {smaller}`P Angerer`
* Fix dispersion cutoff in {func}`~scanpy.pp.highly_variable_genes` in presence of `NaN`s {pr}`3176` {smaller}`P Angerer`
* Fix axis labeling for swapped axes in {func}`~scanpy.pl.rank_genes_groups_stacked_violin` {pr}`3196` {smaller}`Ilan Gold`

#### Performance
42 changes: 33 additions & 9 deletions src/scanpy/plotting/_stacked_violin.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,8 +410,30 @@ def _mainplot(self, ax: Axes):
colormap_array = cmap(normalize(_color_df.values))
x_spacer_size = self.plot_x_padding
y_spacer_size = self.plot_y_padding

# All columns should have a unique name, yet, frequently
# gene names are repeated in self.var_names, otherwise the
# violin plot will not distinguish those genes
_matrix.columns = [f"{x}_{idx}" for idx, x in enumerate(_matrix.columns)]

# Ensure the categories axis is always ordered identically.
# If the axes are not swapped, the above _matrix.columns is used in the actual violin plot (i.e., unique names).
# If they are swapped, then use the same as the labels used below.
# Without this, `_make_rows_of_violinplots` does not know about the order of the categories in labels.
labels = _color_df.columns
if self.are_axes_swapped:
x_axis_order = labels
else:
x_axis_order = _matrix.columns
flying-sheep marked this conversation as resolved.
Show resolved Hide resolved

self._make_rows_of_violinplots(
ax, _matrix, colormap_array, _color_df, x_spacer_size, y_spacer_size
ax,
_matrix,
colormap_array,
_color_df,
x_spacer_size,
y_spacer_size,
x_axis_order,
)

# turn on axis for `ax` as this is turned off
Expand All @@ -434,7 +456,6 @@ def _mainplot(self, ax: Axes):
# 0.5 to position the ticks on the center of the violins
x_ticks = np.arange(_color_df.shape[1]) + 0.5
ax.set_xticks(x_ticks)
labels = _color_df.columns
ax.set_xticklabels(labels, minor=False, ha="center")
# rotate x tick labels if they are longer than 2 characters
if max([len(x) for x in labels]) > 2:
Expand All @@ -445,7 +466,14 @@ def _mainplot(self, ax: Axes):
return normalize

def _make_rows_of_violinplots(
self, ax, _matrix, colormap_array, _color_df, x_spacer_size, y_spacer_size
self,
ax,
_matrix,
colormap_array,
_color_df,
x_spacer_size,
y_spacer_size,
x_axis_order,
):
import seaborn as sns # Slow import, only import if called

Expand All @@ -460,11 +488,6 @@ def _make_rows_of_violinplots(
else:
row_colors = [None] * _color_df.shape[0]

# All columns should have a unique name, yet, frequently
# gene names are repeated in self.var_names, otherwise the
# violin plot will not distinguish those genes
_matrix.columns = [f"{x}_{idx}" for idx, x in enumerate(_matrix.columns)]

# transform the dataframe into a dataframe having three columns:
# the categories name (from groupby),
# the gene name
Expand Down Expand Up @@ -543,9 +566,10 @@ def _make_rows_of_violinplots(
hue=None if palette_colors is None else x,
palette=palette_colors,
color=row_colors[idx],
order=x_axis_order,
hue_order=x_axis_order,
**self.kwds,
)

if self.stripplot:
row_ax = sns.stripplot(
x=x,
Expand Down
Loading