Skip to content

Commit

Permalink
Merge pull request #305 from MontgomeryLab/issue-303
Browse files Browse the repository at this point in the history
Version 1.4 release preparation
  • Loading branch information
taimontgomery authored May 2, 2023
2 parents 5aac71b + e2704c0 commit 6855509
Show file tree
Hide file tree
Showing 11 changed files with 29 additions and 41 deletions.
7 changes: 0 additions & 7 deletions START_HERE/run_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,6 @@ shared_memory: False
##-- Suppress all alignments if > <int> exist (default: no limit) (option -m) --##
#suppress_aln: 10

##-- Trim <int> bases from 5' (left) or 3' (right) end of reads (options --trim5 and --trim3) --##
#trim5: 0
#trim3: 0

##-- Input quals are from GA Pipeline ver. < 1.3 (option --solexa-quals) --##
#solexa: false

Expand All @@ -221,9 +217,6 @@ shared_memory: False
######-------------------------------------------------------------------------------######


##-- If True: show all parsed features in the counts csv, regardless of count/identity --##
counter_all_features: False

##-- If True: counts are normalized by genomic hits (number of multi-alignments) --##
counter_normalize_by_genomic_hits: True

Expand Down
Binary file modified images/features_sheet_header.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified images/tiny-count_selection.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 0 additions & 7 deletions tests/testdata/config_files/run_config_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,6 @@ shared_memory: False
##-- Suppress all alignments if > <int> exist (default: no limit) (option -m) --##
#suppress_aln: 10

##-- Trim <int> bases from 5' (left) or 3' (right) end of reads (options --trim5 and --trim3) --##
#trim5: 0
#trim3: 0

##-- Input quals are from GA Pipeline ver. < 1.3 (option --solexa-quals) --##
#solexa: false

Expand All @@ -221,9 +217,6 @@ shared_memory: False
######-------------------------------------------------------------------------------######


##-- If True: show all parsed features in the counts csv, regardless of count/identity --##
counter_all_features: False

##-- If True: counts are normalized by genomic hits (number of multi-alignments) --##
counter_normalize_by_genomic_hits: True

Expand Down
5 changes: 0 additions & 5 deletions tiny/cwl/tools/tiny-count.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,6 @@ inputs:
inputBinding:
prefix: --stepvector

all_features:
type: boolean?
inputBinding:
prefix: --all-features

in_pipeline:
type: boolean?
inputBinding:
Expand Down
2 changes: 0 additions & 2 deletions tiny/cwl/workflows/tinyrna_wf.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ inputs:
counter_diags: boolean?
counter_decollapse: boolean?
counter_stepvector: string?
counter_all_features: boolean?
counter_normalize_by_feature_hits: boolean?
counter_normalize_by_genomic_hits: boolean?

Expand Down Expand Up @@ -214,7 +213,6 @@ steps:
aligned_seqs: bowtie/sam_out
gff_files: gff_files
out_prefix: run_name
all_features: counter_all_features
normalize_by_feature_hits:
source: counter_normalize_by_feature_hits
valueFrom: $(String(self)) # convert boolean -> string
Expand Down
4 changes: 1 addition & 3 deletions tiny/rna/counter/counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ def get_args():
optional_args.add_argument('-sv', '--stepvector', choices=['Cython', 'HTSeq'], default='Cython',
help='Select which StepVector implementation is used to find '
'features overlapping an interval.')
optional_args.add_argument('-a', '--all-features', action='store_true', help=argparse.SUPPRESS)
#help='Represent all features in output counts table, '
# 'even if they did not match in Stage 1 selection.')
optional_args.add_argument('-a', '--all-features', action='store_true', help=argparse.SUPPRESS) # deprecated
optional_args.add_argument('-p', '--in-pipeline', action='store_true',
help='Indicates that tiny-count was invoked as part of a pipeline run '
'and that input files should be sourced as such.')
Expand Down
4 changes: 2 additions & 2 deletions tiny/rna/plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,9 @@ def load_dge_tables(comparisons: list, class_fillna: str) -> pd.DataFrame:
if not comparison:
raise ValueError("Could not find condition names in DGE filename: " + dgefile)
if len(comparison) > 1:
print("Warning: multiple conditions matched in DGE filename. Using first match.")
print("Warning: multiple conditions matched in DGE filename. Using last match.")

comparison_name = "_vs_".join(comparison[0])
comparison_name = "_vs_".join(comparison[-1])
table = set_counts_table_multiindex(pd.read_csv(dgefile), class_fillna)

de_table[comparison_name] = table['padj']
Expand Down
30 changes: 22 additions & 8 deletions tiny/rna/plotterlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,8 @@ def scatter_grouped(self, count_x: pd.DataFrame, count_y: pd.DataFrame, *groups,
has_outgroup = all(co.replace(0, pd.NA).dropna().any()
for co in (count_x_out, count_y_out))

# Determine which groups we are able to plot on log scale
plottable_groups = self.get_nonzero_group_indexes(count_x, count_y, groups)
# Make all counts log-compatible, or drop the group if it is zero in both conditions
plottable_groups = self.nonzero_group_indexes(count_x, count_y, groups, view_lims)
plot_labels = [labels[i] for i in plottable_groups]
plot_groups = [groups[i] for i in plottable_groups]
group_it = iter(plot_groups)
Expand Down Expand Up @@ -271,20 +271,34 @@ def scatter_grouped(self, count_x: pd.DataFrame, count_y: pd.DataFrame, *groups,
return gscat

@staticmethod
def get_nonzero_group_indexes(count_x, count_y, groups):
"""When scatter plotting groups for two conditions on a log scale, if one
of the conditions has all zero counts for the group, then none of the group's
points are actually plotted due to the singularity at 0. We want to skip
plotting these groups and omit them from the legend."""
def nonzero_group_indexes(count_x, count_y, groups, view_lims):
"""When scatter plotting features on a log scale, if the feature has a count of
zero in either condition then it is omitted from the plot by default due to the
singularity at 0, but we want to represent them nonetheless. So, we set their count
in the zero condition to the lower plot limit so that they are plotted on the very
edge of the plot space. Otherwise, approximating zero for these features would
shrink the plot.
We still want to omit features that are zero in both conditions, and if an entire
group consists of zero counts, its label should be omitted from the legend. This
is accomplished by omitting its index from the returned list."""

non_zero_groups = []
minpos = min(view_lims)
for i, group in enumerate(groups):
x, y = count_x.loc[group], count_y.loc[group]
x_is_zeros = x.replace(0, pd.NA).dropna().empty
y_is_zeros = y.replace(0, pd.NA).dropna().empty
if not (x_is_zeros or y_is_zeros):

if not (x_is_zeros and y_is_zeros):
non_zero_groups.append(i)

# Replace counts that are zero in only one condition
x.loc[(x == 0) & (y != 0)] = minpos
y.loc[(y == 0) & (x != 0)] = minpos
count_x.loc[group] = x
count_y.loc[group] = y

return non_zero_groups

@staticmethod
Expand Down
4 changes: 4 additions & 0 deletions tiny/templates/compatibility/run_config_compatibility.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@


1.4.0:
remove:
- counter_all_features
- trim5
- trim3
rename:
- counter_normalize_by_hits: counter_normalize_by_feature_hits
add:
Expand Down
7 changes: 0 additions & 7 deletions tiny/templates/run_config_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,6 @@ shared_memory: False
##-- Suppress all alignments if > <int> exist (default: no limit) (option -m) --##
#suppress_aln: 10

##-- Trim <int> bases from 5' (left) or 3' (right) end of reads (options --trim5 and --trim3) --##
#trim5: 0
#trim3: 0

##-- Input quals are from GA Pipeline ver. < 1.3 (option --solexa-quals) --##
#solexa: false

Expand All @@ -221,9 +217,6 @@ shared_memory: False
######-------------------------------------------------------------------------------######


##-- If True: show all parsed features in the counts csv, regardless of count/identity --##
counter_all_features: False

##-- If True: counts are normalized by genomic hits (number of multi-alignments) --##
counter_normalize_by_genomic_hits: True

Expand Down

0 comments on commit 6855509

Please sign in to comment.