From 113ae1c65b31a5bc2c53a17f4aa18f96eadd6303 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sat, 22 Apr 2023 17:46:41 -0700
Subject: [PATCH 01/18] Updating the SamplesSheet class so that it can be used
 in tiny-count (or any other pipeline step). This means the Samples Sheet will
 be subject to many of the same validation steps in tiny-count standalone runs
 as it is at pipeline startup.

Additionally:
- The normalization column is now validated and stored by the class
- The samples sheet can contain .sam and .bam files if it is used in a standalone run context
- File paths are properly resolved when used in a pipeline step context
- tiny-deseq-related validations (control condition and deseq2 compatibility checks) are skipped when used in a standalone run or pipeline step context. They are only validated at pipeline startup.
- The class now reports the filename and row number if any exception is raised during csv parsing
---
 tiny/rna/configuration.py | 125 +++++++++++++++++++++++++++-----------
 1 file changed, 91 insertions(+), 34 deletions(-)

diff --git a/tiny/rna/configuration.py b/tiny/rna/configuration.py
index 53b7e0fc..04e56ac3 100644
--- a/tiny/rna/configuration.py
+++ b/tiny/rna/configuration.py
@@ -10,12 +10,12 @@
 
 from pkg_resources import resource_filename
 from collections import Counter, OrderedDict, defaultdict
-from typing import Union, Any, List, Dict
+from typing import Union, Any, List, Dict, Optional
 from glob import glob
 
 from tiny.rna.compatibility import RunConfigCompatibility
 from tiny.rna.counter.validation import GFFValidator
-from tiny.rna.util import get_timestamp, get_r_safename
+from tiny.rna.util import get_timestamp, get_r_safename, append_to_exception
 
 
 class ConfigBase:
@@ -265,13 +265,13 @@ def absorb_paths_file(self):
 
     def process_samples_sheet(self):
         samples_sheet_path = self.paths['samples_csv']
-        samples_sheet = SamplesSheet(samples_sheet_path)
+        samples_sheet = SamplesSheet(samples_sheet_path, context="Pipeline Start")
 
         self['sample_basenames'] = samples_sheet.sample_basenames
         self['control_condition'] = samples_sheet.control_condition
         self['run_deseq'] = samples_sheet.is_compatible_df
 
-        self['in_fq'] = [self.cwl_file(fq, verify=False) for fq in samples_sheet.fastq_files]
+        self['in_fq'] = [self.cwl_file(fq, verify=False) for fq in samples_sheet.hts_samples]
         self['fastp_report_titles'] = [f"{g}_rep_{r}" for g, r in samples_sheet.groups_reps]
 
     def process_features_sheet(self) -> List[dict]:
@@ -641,47 +641,93 @@ def append_to(self, key: str, val: Any):
 
 
 class SamplesSheet:
-    def __init__(self, file):
+    def __init__(self, file, context):
         self.csv = CSVReader(file, "Samples Sheet")
         self.basename = os.path.basename(file)
         self.dir = os.path.dirname(file)
         self.file = file
 
-        self.fastq_files = []
+        self.hts_samples = []
         self.groups_reps = []
+        self.normalizations = []
         self.sample_basenames = []
         self.control_condition = None
         self.is_compatible_df = False
 
+        map_path, validate_path = self.get_context_methods(context).values()
+        self.validate_sample_path = validate_path
+        self.map_path = map_path
+        self.context = context
+
         self.read_csv()
 
+    def get_context_methods(self, context):
+        return {
+            "Standalone Run": {'map_path': self.from_here, 'validate_path': self.validate_alignments_filepath},
+            "Pipeline Start": {'map_path': self.from_here, 'validate_path': self.validate_fastq_filepath},
+            "Pipeline Step":  {'map_path': self.from_pipeline, 'validate_path': lambda _: True},  # skip validation
+        }[context]
+
     def read_csv(self):
         reps_per_group = Counter()
-        for row in self.csv.rows():
-            fastq_file = Configuration.joinpath(self.dir, row['File'])
-            group_name = row['Group']
-            rep_number = row['Replicate']
-            is_control = row['Control'].lower() == 'true'
-            basename = self.get_sample_basename(fastq_file)
+        try:
+            for row in self.csv.rows():
+                hts_sample = self.map_path(row['File'])
+                group_name = row['Group']
+                rep_number = row['Replicate']
+                is_control = row['Control'].lower() == 'true'
+                norm_prefs = row['Normalization']
+                basename = self.get_sample_basename(hts_sample)
+
+                self.validate_sample_path(hts_sample)
+                self.validate_group_rep(group_name, rep_number)
+                self.validate_control_group(is_control, group_name)
+                self.validate_normalization(norm_prefs)
+
+                self.hts_samples.append(hts_sample)
+                self.sample_basenames.append(basename)
+                self.groups_reps.append((group_name, rep_number))
+                self.normalizations.append(norm_prefs)
+                reps_per_group[group_name] += 1
+
+                if is_control: self.control_condition = group_name
+        except Exception as e:
+            msg = f"Error occurred on line {self.csv.line_num} of {self.basename}"
+            append_to_exception(e, msg)
+            raise
 
-            self.validate_fastq_filepath(fastq_file)
-            self.validate_group_rep(group_name, rep_number)
-            self.validate_control_group(is_control, group_name)
+        self.is_compatible_df = self.validate_deseq_compatibility(reps_per_group)
 
-            self.fastq_files.append(fastq_file)
-            self.sample_basenames.append(basename)
-            self.groups_reps.append((group_name, rep_number))
-            reps_per_group[group_name] += 1
+    def from_here(self, input_file):
+        """Resolves .sam/.bam paths in pipeline startup and standalone context"""
 
-            if is_control: self.control_condition = group_name
+        return ConfigBase.joinpath(self.dir, input_file)
 
-        self.is_compatible_df = self.validate_deseq_compatibility(reps_per_group)
+    def from_pipeline(self, input_file):
+        """Resolves .fastq(.gz) file paths in pipeline step context"""
+
+        basename_root, ext = os.path.splitext(os.path.basename(input_file))
+        return f"{basename_root}_aligned_seqs.sam"
+
+    def validate_alignments_filepath(self, file: str):
+        """Checks file existence, extension, and duplicate entries in standalone context"""
+
+        root, ext = os.path.splitext(file)
+
+        assert os.path.isfile(file), \
+            "The file on row {row_num} of {selfname} was not found:\n\t{file}" \
+            .format(row_num=self.csv.row_num, selfname=self.basename, file=file)
+
+        assert ext in (".sam", ".bam"), \
+            "Files in {selfname} must have a .sam or .bam extension (row {row_num})" \
+            .format(selfname=self.basename, row_num=self.csv.row_num)
+
+        assert file not in self.hts_samples, \
+            "Alignment files cannot be listed more than once in {selfname} (row {row_num})" \
+            .format(selfname=self.basename, row_num=self.csv.row_num)
 
     def validate_fastq_filepath(self, file: str):
-        """Checks file existence, extension, and duplicate entries.
-        Args:
-            file: fastq file path. For  which has already been resolved relative to self.dir
-        """
+        """Checks file existence, extension, and duplicate entries in pipeline startup context"""
 
         root, ext = os.path.splitext(file)
 
@@ -690,30 +736,41 @@ def validate_fastq_filepath(self, file: str):
             .format(row_num=self.csv.row_num, selfname=self.basename, file=file)
 
         assert ext in (".fastq", ".gz"), \
-            "Files in {selfname} must have a .fastq(.gz) extension (row {row_num})"\
+            "Files in {selfname} must have a .fastq(.gz) extension (row {row_num})" \
             .format(selfname=self.basename, row_num=self.csv.row_num)
 
-        assert file not in self.fastq_files, \
-            "Fastq files cannot be listed more than once in {selfname} (row {row_num})"\
+        assert file not in self.hts_samples, \
+            "Fastq files cannot be listed more than once in {selfname} (row {row_num})" \
             .format(selfname=self.basename, row_num=self.csv.row_num)
 
     def validate_group_rep(self, group:str, rep:str):
         assert (group, rep) not in self.groups_reps, \
             "The same group and replicate number cannot appear on " \
-            "more than one row in {selfname} (row {row_num})"\
+            "more than one row in {selfname} (row {row_num})" \
             .format(selfname=self.basename, row_num=self.csv.row_num)
 
     def validate_control_group(self, is_control: bool, group: str):
-        if not is_control: return
+
+        if not is_control or self.context != "Pipeline Start":
+            return
+
         assert self.control_condition in (group, None), \
-            "tinyRNA does not support multiple control conditions " \
+            "Experiments with multiple control conditions aren't supported " \
             "(row {row_num} in {selfname}).\nHowever, if the control condition " \
             "is unspecified, all possible comparisons will be made and this " \
-            "should accomplish your goal."\
+            "should accomplish your goal." \
             .format(row_num=self.csv.row_num, selfname=self.basename)
 
-    @staticmethod
-    def validate_deseq_compatibility(sample_groups: Counter) -> bool:
+    def validate_normalization(self, norm):
+        assert re.fullmatch(r"(\s*[\d.]+\s*)|(rpm)", norm, re.IGNORECASE) or not norm, \
+            "Invalid normalization value in {selfname} (row {row_num})" \
+            .format(selfname=self.basename, row_num=self.csv.row_num)
+
+    def validate_deseq_compatibility(self, sample_groups: Counter) -> Optional[bool]:
+
+        if self.context != "Pipeline Start":
+            return None
+
         SamplesSheet.validate_r_safe_sample_groups(sample_groups)
 
         total_samples = sum(sample_groups.values())

From 3c3a458864df2bc02301391a47d2b195c19661a8 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sat, 22 Apr 2023 17:51:36 -0700
Subject: [PATCH 02/18] Updating load_samples() to use the SamplesSheet class.
 Cleaner, more consistent, and more thorough! This means that the from_here()
 method can be removed from util.py, which is a good thing in the end because
 ConfigBase.from_here() (which SamplesSheet uses) is more developed

---
 tiny/rna/counter/counter.py | 57 ++++++++++---------------------------
 tiny/rna/util.py            | 11 -------
 2 files changed, 15 insertions(+), 53 deletions(-)

diff --git a/tiny/rna/counter/counter.py b/tiny/rna/counter/counter.py
index 4bb95f7d..c6b8a836 100644
--- a/tiny/rna/counter/counter.py
+++ b/tiny/rna/counter/counter.py
@@ -12,14 +12,13 @@
 from tiny.rna.counter.features import Features, FeatureCounter
 from tiny.rna.counter.statistics import MergedStatsManager
 from tiny.rna.counter.hts_parsing import ReferenceFeatures, ReferenceSeqs, ReferenceBase
-from tiny.rna.configuration import CSVReader, PathsFile, get_templates
+from tiny.rna.configuration import PathsFile, SamplesSheet, CSVReader, get_templates
 from tiny.rna.util import (
     report_execution_time,
     add_transparent_help,
     append_to_exception,
     get_timestamp,
-    ReadOnlyDict,
-    from_here
+    ReadOnlyDict
 )
 
 # Global variables for multiprocessing
@@ -89,15 +88,15 @@ def get_args():
         return ReadOnlyDict(args_dict)
 
 
-def load_samples(samples_csv: str, is_pipeline: bool) -> List[Dict[str, str]]:
+def load_samples(samples_csv: str, in_pipeline: bool) -> List[Dict[str, str]]:
     """Parses the Samples Sheet to determine library names and alignment files for counting
 
     Sample files may have a .fastq(.gz) extension (i.e. when tiny-count is called as part of a
-    pipeline run) or a .sam extension (i.e. when tiny-count is called as a standalone tool).
+    pipeline run) or a .sam/.bam extension (i.e. when tiny-count is called as a standalone tool).
 
     Args:
         samples_csv: a csv file which defines sample group, replicate, and file location
-        is_pipeline: helps locate sample SAM files. If true, files are assumed to reside
+        in_pipeline: helps locate sample SAM files. If true, files are assumed to reside
             in the working directory. If false, files are assumed to reside in the same
             directory as their source FASTQ files with '_aligned_seqs.sam' appended
             (i.e. /dir/sample1.fastq -> /dir/sample1_aligned_seqs.sam).
@@ -107,44 +106,18 @@ def load_samples(samples_csv: str, is_pipeline: bool) -> List[Dict[str, str]]:
         library name and the location of its SAM file for counting.
     """
 
-    def get_library_filename(csv_row_file: str, samples_csv: str) -> str:
-        """The input samples.csv may contain either fastq or sam files"""
-
-        file_ext = os.path.splitext(csv_row_file)[1].lower()
-
-        # If the sample file has a fastq(.gz) extension, infer the name of its pipeline-produced .sam file
-        if file_ext in [".fastq", ".gz"]:
-            # Fix relative paths to be relative to sample_csv's path, rather than relative to cwd
-            csv_row_file = os.path.basename(csv_row_file) if is_pipeline else from_here(samples_csv, csv_row_file)
-            csv_row_file = os.path.splitext(csv_row_file)[0] + "_aligned_seqs.sam"
-        elif file_ext == ".sam":
-            if not os.path.isabs(csv_row_file):
-                raise ValueError("The following file must be expressed as an absolute path:\n%s" % (csv_row_file,))
-        else:
-            raise ValueError("The filenames defined in your Samples Sheet must have a .fastq(.gz) or .sam extension.\n"
-                             "The following filename contained neither:\n%s" % (csv_row_file,))
-        return csv_row_file
-
+    context = "Pipeline Step" if in_pipeline else "Standalone Run"
+    samples = SamplesSheet(samples_csv, context=context)
     inputs = list()
-    sheet = CSVReader(samples_csv, "Samples Sheet")
 
-    try:
-        for row in sheet.rows():
-            library_name = f"{row['Group']}_rep_{row['Replicate']}"
-            library_file_name = get_library_filename(row['File'], samples_csv)
-            library_normalization = row['Normalization']
-
-            record = {
-                "Name": library_name,
-                "File": library_file_name,
-                "Norm": library_normalization
-            }
-
-            if record not in inputs: inputs.append(record)
-    except Exception as e:
-        msg = f"Error occurred on line {sheet.line_num} of {os.path.basename(samples_csv)}"
-        append_to_exception(e, msg)
-        raise
+    for file, group_rep, norm in zip(samples.hts_samples, samples.groups_reps, samples.normalizations):
+        record = {
+            "Name": "_rep_".join(group_rep),
+            "File": file,
+            "Norm": norm
+        }
+
+        if record not in inputs: inputs.append(record)
 
     return inputs
 
diff --git a/tiny/rna/util.py b/tiny/rna/util.py
index 1d8700df..1670ef3b 100644
--- a/tiny/rna/util.py
+++ b/tiny/rna/util.py
@@ -148,17 +148,6 @@ def _fill_text(self, text, width, indent):
         return '\n\n'.join(output_lines)
 
 
-def from_here(config_file, input_file):
-    """Calculates paths relative to the config file which contains them"""
-    config_file, input_file = (os.path.expanduser(p) for p in [config_file, input_file])
-
-    if not os.path.isabs(input_file):
-        from_here = os.path.dirname(config_file)
-        input_file = os.path.normpath(os.path.join(from_here, input_file))
-
-    return input_file
-
-
 def make_filename(args, ext='.csv'):
     return '_'.join([str(chnk) for chnk in args if chnk is not None]) + ext
 

From 6662696f6a112c93cd13ae6f4f61b6ee4c920e53 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sat, 22 Apr 2023 17:55:57 -0700
Subject: [PATCH 03/18] Changing usages of is_pipeline to in_pipeline to be
 consistent with the PathsFile class (and because it just makes more sense)

---
 doc/Parameters.md                 |  4 ++--
 tiny/cwl/tools/tiny-count.cwl     |  4 ++--
 tiny/cwl/workflows/tinyrna_wf.cwl |  4 ++--
 tiny/rna/counter/counter.py       | 16 ++++++++--------
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/doc/Parameters.md b/doc/Parameters.md
index d085c17a..94ea2bef 100644
--- a/doc/Parameters.md
+++ b/doc/Parameters.md
@@ -101,7 +101,7 @@ A custom Cython implementation of HTSeq's StepVector is used for finding feature
 ### Is Pipeline
 | Run Config Key | Commandline Argument |
 |----------------|----------------------|
-|                | `--is-pipeline`      |
+|                | `--in-pipeline`      |
 
 This commandline argument tells tiny-count that it is running as a workflow step rather than a standalone/manual run. Under these conditions tiny-count will look for all input files in the current working directory regardless of the paths defined in the Samples Sheet and Features Sheet.
 
@@ -152,7 +152,7 @@ Optional arguments:
   -sv {Cython,HTSeq}, --stepvector {Cython,HTSeq}
                         Select which StepVector implementation is used to find
                         features overlapping an interval. (default: Cython)
-  -p, --is-pipeline     Indicates that tiny-count was invoked as part of a
+  -p, --in-pipeline     Indicates that tiny-count was invoked as part of a
                         pipeline run and that input files should be sourced as
                         such. (default: False)
   -d, --report-diags    Produce diagnostic information about
diff --git a/tiny/cwl/tools/tiny-count.cwl b/tiny/cwl/tools/tiny-count.cwl
index aeb2e694..f2a695a6 100644
--- a/tiny/cwl/tools/tiny-count.cwl
+++ b/tiny/cwl/tools/tiny-count.cwl
@@ -55,10 +55,10 @@ inputs:
     inputBinding:
       prefix: --all-features
 
-  is_pipeline:
+  in_pipeline:
     type: boolean?
     inputBinding:
-      prefix: --is-pipeline
+      prefix: --in-pipeline
 
   diagnostics:
     type: boolean?
diff --git a/tiny/cwl/workflows/tinyrna_wf.cwl b/tiny/cwl/workflows/tinyrna_wf.cwl
index 0a877b86..582ded97 100644
--- a/tiny/cwl/workflows/tinyrna_wf.cwl
+++ b/tiny/cwl/workflows/tinyrna_wf.cwl
@@ -82,7 +82,7 @@ inputs:
   features_csv: File
   gff_files: File[]?
   aligned_seqs: File[]?
-  is_pipeline: boolean?
+  in_pipeline: boolean?
   counter_diags: boolean?
   counter_decollapse: boolean?
   counter_stepvector: string?
@@ -223,7 +223,7 @@ steps:
         valueFrom: $(String(self))  # convert boolean -> string
       decollapse: counter_decollapse
       stepvector: counter_stepvector
-      is_pipeline: {default: true}
+      in_pipeline: {default: true}
       diagnostics: counter_diags
       fastp_logs: preprocessing/json_report_file
       collapsed_fa: preprocessing/uniq_seqs
diff --git a/tiny/rna/counter/counter.py b/tiny/rna/counter/counter.py
index c6b8a836..6a756830 100644
--- a/tiny/rna/counter/counter.py
+++ b/tiny/rna/counter/counter.py
@@ -67,7 +67,7 @@ def get_args():
     optional_args.add_argument('-a', '--all-features', action='store_true', help=argparse.SUPPRESS)
                                #help='Represent all features in output counts table, '
                                #     'even if they did not match in Stage 1 selection.')
-    optional_args.add_argument('-p', '--is-pipeline', action='store_true',
+    optional_args.add_argument('-p', '--in-pipeline', action='store_true',
                                help='Indicates that tiny-count was invoked as part of a pipeline run '
                                     'and that input files should be sourced as such.')
     optional_args.add_argument('-d', '--report-diags', action='store_true',
@@ -122,12 +122,12 @@ def load_samples(samples_csv: str, in_pipeline: bool) -> List[Dict[str, str]]:
     return inputs
 
 
-def load_config(features_csv: str, is_pipeline: bool) -> List[dict]:
+def load_config(features_csv: str, in_pipeline: bool) -> List[dict]:
     """Parses the Features Sheet to provide inputs to FeatureSelector and reference parsers
 
     Args:
         features_csv: a csv file which defines feature sources and selection rules
-        is_pipeline: not currently used; helps properly locate input files
+        in_pipeline: not currently used; helps properly locate input files
 
     Returns:
         rules: a list of dictionaries, each representing a parsed row from input.
@@ -212,13 +212,13 @@ def map_and_reduce(libraries, paths, prefs):
 def main():
     # Get command line arguments.
     args = get_args()
-    is_pipeline = args['is_pipeline']
+    in_pipeline = args['in_pipeline']
 
     try:
         # Load and validate config files and input files
-        paths = PathsFile(args['paths_file'], is_pipeline)
-        libraries = load_samples(paths['samples_csv'], is_pipeline)
-        selection = load_config(paths['features_csv'], is_pipeline)
+        paths = PathsFile(args['paths_file'], in_pipeline)
+        libraries = load_samples(paths['samples_csv'], in_pipeline)
+        selection = load_config(paths['features_csv'], in_pipeline)
         reference = load_references(paths, libraries, selection, args)
 
         # global for multiprocessing
@@ -233,7 +233,7 @@ def main():
     except Exception as e:
         if type(e) is SystemExit: return
         traceback.print_exception(*sys.exc_info())
-        if args['is_pipeline']:
+        if in_pipeline:
             print("\n\ntiny-count encountered an error. Don't worry! You don't have to start over.\n"
                   "You can resume the pipeline at tiny-count. To do so:\n\t"
                   "1. cd into your Run Directory\n\t"

From fdf2c21fee0488571a8f8c1178d910bc58d757e1 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Mon, 24 Apr 2023 13:09:25 -0700
Subject: [PATCH 04/18] Renaming SAM_reader to AlignmentReader and adding
 better checks for proper ordering of alignments

---
 tiny/rna/counter/counter.py     |  2 +-
 tiny/rna/counter/features.py    |  8 ++--
 tiny/rna/counter/hts_parsing.py | 65 +++++++++++++++++++++++++++------
 tiny/rna/counter/statistics.py  |  2 +-
 tiny/rna/counter/validation.py  |  6 +--
 5 files changed, 62 insertions(+), 21 deletions(-)

diff --git a/tiny/rna/counter/counter.py b/tiny/rna/counter/counter.py
index 6a756830..4a16c15b 100644
--- a/tiny/rna/counter/counter.py
+++ b/tiny/rna/counter/counter.py
@@ -59,7 +59,7 @@ def get_args():
     optional_args.add_argument('-vs', '--verify-stats', metavar='T/F', default='T',
                                help='Verify that all reported stats are internally consistent.')
     optional_args.add_argument('-dc', '--decollapse', action='store_true',
-                               help='Create a decollapsed copy of all SAM files listed in your Samples Sheet. '
+                               help='Create a decollapsed SAM copy of all files listed in your Samples Sheet. '
                                     'This option is ignored for non-collapsed inputs.')
     optional_args.add_argument('-sv', '--stepvector', choices=['Cython', 'HTSeq'], default='Cython',
                                help='Select which StepVector implementation is used to find '
diff --git a/tiny/rna/counter/features.py b/tiny/rna/counter/features.py
index 512292bd..1ce7be63 100644
--- a/tiny/rna/counter/features.py
+++ b/tiny/rna/counter/features.py
@@ -4,7 +4,7 @@
 from collections import defaultdict
 from typing import List, Tuple, Set, Dict, Mapping, Union
 
-from tiny.rna.counter.hts_parsing import ReferenceFeatures, ReferenceSeqs, SAM_reader
+from tiny.rna.counter.hts_parsing import ReferenceFeatures, ReferenceSeqs, AlignmentReader
 from .statistics import LibraryStats
 from .matching import *
 from ..util import append_to_exception
@@ -32,7 +32,7 @@ class FeatureCounter:
 
     def __init__(self, references, selection_rules, **prefs):
         self.stats = LibraryStats(**prefs)
-        self.sam_reader = SAM_reader(**prefs)
+        self.alignment_reader = AlignmentReader(**prefs)
         self.selector = FeatureSelector(selection_rules, **prefs)
 
         if isinstance(references, ReferenceFeatures):
@@ -48,7 +48,7 @@ def __init__(self, references, selection_rules, **prefs):
     def count_reads(self, library: dict):
         """Collects statistics on features assigned to each alignment associated with each read"""
 
-        read_seq = self.sam_reader.bundle_multi_alignments(library["File"])
+        read_seq = self.alignment_reader.bundle_multi_alignments(library["File"])
         self.stats.assign_library(library)
 
         # For each sequence in the sam file...
@@ -85,7 +85,7 @@ class FeatureSelector:
     """Performs hierarchical selection given a set of candidate features for a locus
 
     Two sources of data serve as targets for selection: feature attributes (sourced from
-    input GFF files), and sequence attributes (sourced from input SAM files).
+    input GFF files), and sequence attributes (sourced from input alignment files).
     All candidate features are assumed to have matched at least one Identity selector,
     as determined by hts_parsing.ReferenceFeatures.get_matches_and_classes()
 
diff --git a/tiny/rna/counter/hts_parsing.py b/tiny/rna/counter/hts_parsing.py
index 469e184f..838c8b26 100644
--- a/tiny/rna/counter/hts_parsing.py
+++ b/tiny/rna/counter/hts_parsing.py
@@ -19,21 +19,23 @@
 _re_attr_main = re.compile(r"\s*([^\s=]+)[\s=]+(.*)")
 _re_attr_empty = re.compile(r"^\s*$")
 
-# For SAM_reader
+# For AlignmentReader
 AlignmentDict = Dict[str, Union[str, int]]
 Bundle = Tuple[List[AlignmentDict], int]
 _re_tiny = r"\d+_count=\d+"
 _re_fastx = r"seq\d+_x(\d+)"
 
 
-class SAM_reader:
-    """A minimal SAM reader that bundles multiple-alignments and only parses data relevant to the workflow."""
+class AlignmentReader:
+    """A minimal SAM/BAM reader that bundles multiple-alignments and only parses data relevant to the workflow."""
 
     def __init__(self, **prefs):
         self.decollapse = prefs.get("decollapse", False)
         self.out_prefix = prefs.get("out_prefix", None)
         self.collapser_type = None
         self.references = []
+        self.file_type = None
+        self.basename = None
         self.has_nm = None
         self.file = None
 
@@ -50,8 +52,8 @@ def bundle_multi_alignments(self, file: str) -> Iterator[Bundle]:
         """Bundles multi-alignments (determined by a shared QNAME) and reports the associated read's count"""
 
         pysam_reader = pysam.AlignmentFile(file)
-        self.file = file
 
+        self._assign_library(file)
         self._gather_metadata(pysam_reader)
         self._iter = AlignmentIter(pysam_reader, self.has_nm, self._decollapsed_callback, self._decollapsed_reads)
         bundle, read_count = self._new_bundle(next(self._iter))
@@ -78,18 +80,25 @@ def _new_bundle(self, aln: AlignmentDict) -> Bundle:
 
         return [aln], count
 
+    def _assign_library(self, file):
+        self.file_type = file.rsplit('.')[-1].lower()
+        self.basename = os.path.basename(file)
+        self.file = file
+
     def _gather_metadata(self, reader: pysam.AlignmentFile) -> None:
-        """Saves header information, examines the first alignment to determine which collapser utility was used (if any), and whether the alignment has a query sequence and NM tag.
+        """Saves header information, examines the first alignment to determine which collapser utility was used (if any)
+        and whether the alignment has a query sequence and NM tag.
         The input file's header is written to the decollapsed output file if necessary."""
 
         header = reader.header
         first_aln = next(reader.head(1))
 
         if first_aln.query_sequence is None:
-            raise ValueError("SAM alignments must include the read sequence.")
+            raise ValueError(f"Alignments must include the read sequence ({self.basename}).")
 
         self._header = header
-        self._header_dict = header.to_dict()  # performs validation
+        self._header_dict = header.to_dict()  # performs header validation
+        self._check_for_incompatible_order()
         self._determine_collapser_type(first_aln.query_name)
         self.has_nm = first_aln.has_tag("NM")
         self.references = header.references
@@ -97,6 +106,43 @@ def _gather_metadata(self, reader: pysam.AlignmentFile) -> None:
         if self.decollapse:
             self._write_header_for_decollapsed_sam(str(header))
 
+    def _check_for_incompatible_order(self):
+        """Inspects headers to determine if alignments of multi-mapping reads are in adjacent order
+
+        Headers that report unsorted/unknown sorting order are likely still compatible, but
+        since the SAM/BAM standard doesn't address alignment adjacency, we have to assume
+        that it is implementation dependent. Therefore, we check the last reported @PG
+        header against a short (incomplete) list of alignment tools that are known to
+        follow the adjacency convention."""
+
+        hd_header = self._header_dict.get('HD', {})
+        so_header = hd_header.get('SO', "")
+        go_header = hd_header.get('GO', "")
+
+        # Check for strictly compatible order
+        if "queryname" in so_header or "query" in go_header:
+            return
+
+        error = self.basename + " is incompatible because {reason}.\n" \
+                "Please ensure that alignments for multi-mapping reads are ordered so " \
+                "that they are adjacent to each other. One way to achieve this is to " \
+                'sort your files by QNAME with the command "samtools sort -n"'
+
+        # Check for strictly incompatible order
+        if not ({'SO', 'GO'} & hd_header.keys()):
+            raise ValueError(error.format(reason="its sorting/grouping couldn't be determined from headers"))
+        if "coordinate" in so_header:
+            raise ValueError(error.format(reason="it is sorted by coordinate"))
+        if "reference" in go_header:
+            raise ValueError(error.format(reason="it is grouped by reference"))
+
+        # If SO is unknown/unsorted or GO is none, check to see if the last program
+        # to handle the alignments is one that produces adjacent alignments by default
+        pg_header = self._header_dict.get('PG', [{}])
+        last_prog = pg_header[-1].get('ID', "")
+        if last_prog.lower() not in ("bowtie", "bowtie2", "star"):
+            raise ValueError(error.format(reason="multi-mapping adjacency couldn't be determined"))
+
     def _determine_collapser_type(self, qname: str) -> None:
         """Attempts to determine the collapsing utility that was used before producing the
         input alignment file, then checks basic requirements for the utility's outputs."""
@@ -108,11 +154,6 @@ def _determine_collapser_type(self, qname: str) -> None:
         elif re.match(_re_fastx, qname) is not None:
             self.collapser_type = "fastx"
             self._collapser_token = "_x"
-
-            sort_order = self._header_dict.get('HD', {}).get('SO', None)
-            if sort_order is None or sort_order != "queryname":
-                raise ValueError("SAM files from fastx collapsed outputs must be sorted by queryname\n"
-                                 "(and the @HD [...] SO header must be set accordingly).")
         else:
             self.collapser_type = None
 
diff --git a/tiny/rna/counter/statistics.py b/tiny/rna/counter/statistics.py
index 41fbcfa7..a3afd0fd 100644
--- a/tiny/rna/counter/statistics.py
+++ b/tiny/rna/counter/statistics.py
@@ -634,7 +634,7 @@ def record_alignment_details(self, assignments, aln, bundle, n_candidates):
 
         This is called once per locus per read (every alignment) when the user elects to save
         diagnostics. The recorded information is later written to {library['Name']}_aln_table.txt
-        after the entire SAM file has been processed."""
+        after the entire alignment file has been processed."""
 
         # Map internal strand representation to +/-/.
         strand = self.map_strand[aln['Strand']]
diff --git a/tiny/rna/counter/validation.py b/tiny/rna/counter/validation.py
index a55b488e..c2d7868c 100644
--- a/tiny/rna/counter/validation.py
+++ b/tiny/rna/counter/validation.py
@@ -7,7 +7,7 @@
 from collections import Counter, defaultdict
 from typing import List, Dict
 
-from tiny.rna.counter.hts_parsing import parse_gff, ReferenceFeatures, SAM_reader
+from tiny.rna.counter.hts_parsing import parse_gff, ReferenceFeatures
 from tiny.rna.counter.features import FeatureSelector
 from tiny.rna.util import ReportFormatter, sorted_natural, gzip_open
 
@@ -18,7 +18,7 @@ class GFFValidator:
 
     targets = {
         "ID attribute": "Features missing a valid identifier attribute",
-        "sam files": "Potentially incompatible SAM alignment files",
+        "alignment files": "Potentially incompatible alignment files",
         "seq chromosomes": "Chromosomes present in sequence files",
         "gff chromosomes": "Chromosomes present in GFF files",
         "strand": "Features missing strand information",
@@ -210,7 +210,7 @@ def generate_chrom_heuristics_report(self, suspect_files):
                   for file, chroms in suspect_files.items()}
 
         summary = {
-            "sam files": chroms,
+            "alignment files": chroms,
             "gff chromosomes": sorted(self.chrom_set)
         }
 

From 7f805be38e064f3177d468c9e8ea8ebbef6a252d Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Mon, 24 Apr 2023 14:35:07 -0700
Subject: [PATCH 05/18] Renaming SamSqValidator to AlignmentSqValidator, and
 uses of "sam" to "alignment"

---
 tiny/rna/counter/counter.py    | 20 +++++++++-----------
 tiny/rna/counter/validation.py | 22 +++++++++++-----------
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/tiny/rna/counter/counter.py b/tiny/rna/counter/counter.py
index 4a16c15b..d0bf3ab8 100644
--- a/tiny/rna/counter/counter.py
+++ b/tiny/rna/counter/counter.py
@@ -8,7 +8,7 @@
 
 from typing import List, Dict
 
-from tiny.rna.counter.validation import GFFValidator, SamSqValidator
+from tiny.rna.counter.validation import GFFValidator, AlignmentSqValidator
 from tiny.rna.counter.features import Features, FeatureCounter
 from tiny.rna.counter.statistics import MergedStatsManager
 from tiny.rna.counter.hts_parsing import ReferenceFeatures, ReferenceSeqs, ReferenceBase
@@ -91,19 +91,17 @@ def get_args():
 def load_samples(samples_csv: str, in_pipeline: bool) -> List[Dict[str, str]]:
     """Parses the Samples Sheet to determine library names and alignment files for counting
 
-    Sample files may have a .fastq(.gz) extension (i.e. when tiny-count is called as part of a
+    Sample files can have a .fastq(.gz) extension (i.e. when tiny-count is called as part of a
     pipeline run) or a .sam/.bam extension (i.e. when tiny-count is called as a standalone tool).
 
     Args:
         samples_csv: a csv file which defines sample group, replicate, and file location
-        in_pipeline: helps locate sample SAM files. If true, files are assumed to reside
-            in the working directory. If false, files are assumed to reside in the same
-            directory as their source FASTQ files with '_aligned_seqs.sam' appended
-            (i.e. /dir/sample1.fastq -> /dir/sample1_aligned_seqs.sam).
+        in_pipeline: helps locate sample alignment files. If true, files are assumed to
+            reside in the working directory.
 
     Returns:
-        inputs: a list of dictionaries for each library, where each dictionary defines the
-        library name and the location of its SAM file for counting.
+        inputs: a list of dictionaries for each library. Each dictionary holds
+        the library's name, file location, and normalization preferences.
     """
 
     context = "Pipeline Step" if in_pipeline else "Standalone Run"
@@ -170,13 +168,13 @@ def load_references(paths: PathsFile, libraries: List[dict], rules: List[dict],
     """
 
     gff_files = paths.get_gff_config()
-    sam_files = [lib['File'] for lib in libraries]
+    aln_files = [lib['File'] for lib in libraries]
 
     if gff_files:
-        GFFValidator(gff_files, rules, alignments=sam_files).validate()
+        GFFValidator(gff_files, rules, alignments=aln_files).validate()
         references = ReferenceFeatures(gff_files, **prefs)
     else:
-        sq_validator = SamSqValidator(sam_files)
+        sq_validator = AlignmentSqValidator(aln_files)
 
         # Reuse sq_validator's parsing results to save time
         sq_validator.validate()
diff --git a/tiny/rna/counter/validation.py b/tiny/rna/counter/validation.py
index c2d7868c..005cc5e5 100644
--- a/tiny/rna/counter/validation.py
+++ b/tiny/rna/counter/validation.py
@@ -229,24 +229,24 @@ def build_column_filters(rules):
         return FeatureSelector.build_selectors([selector_defs])[0]
 
 
-class SamSqValidator:
+class AlignmentSqValidator:
     """Validates @SQ headers for tiny-count's sequence-based counting mode"""
 
     targets = {
         "inter sq": "Sequence identifiers with inconsistent lengths",
-        "intra sq": "SAM files with repeated sequence identifiers",
-        "incomplete sq": "SAM files with incomplete @SQ headers",
-        "missing sq": "SAM files that lack @SQ headers"
+        "intra sq": "alignment files with repeated sequence identifiers",
+        "incomplete sq": "alignment files with incomplete @SQ headers",
+        "missing sq": "alignment files that lack @SQ headers"
     }
 
-    def __init__(self, sam_files):
+    def __init__(self, alignment_files):
         self.report = ReportFormatter(self.targets)
-        self.sam_files = sam_files
+        self.alignment_files = alignment_files
         self.reference_seqs = {}
         self.sq_headers = {}
 
     def validate(self):
-        print("Validating sequence identifiers in SAM files... ", end='')
+        print("Validating sequence identifiers in alignment files... ", end='')
         self.read_sq_headers()
         self.validate_sq_headers()
         print("done.")
@@ -279,7 +279,7 @@ def get_ambiguous_lengths(self) -> List[str]:
         Sequences with consistent length definitions are added to self.reference_seqs"""
 
         seq_lengths = defaultdict(set)
-        for sam in self.sam_files:
+        for sam in self.alignment_files:
             for sq in self.sq_headers[sam]:
                 seq_id = sq['SN']
                 seq_len = int(sq['LN'])
@@ -299,7 +299,7 @@ def get_duplicate_identifiers(self) -> Dict[str, List[str]]:
         """Returns a dictionary of SAM files that contain duplicate sequence identifiers"""
 
         bad_files = {}
-        for file in self.sam_files:
+        for file in self.alignment_files:
             sq = self.sq_headers[file]
             id_count = Counter(seq['SN'] for seq in sq)
             duplicates = [seq_id for seq_id, count in id_count.items() if count > 1]
@@ -326,7 +326,7 @@ def generate_header_syntax_report(self, missing, incomplete):
         if incomplete:
             report['incomplete sq'] = sorted_natural(incomplete)
 
-        header = "Every SAM file must have complete @SQ headers with SN and LN\n" \
+        header = "Every alignment file must have complete @SQ headers with SN and LN\n" \
                  "fields when performing sequence-based counting.\n"
         self.report.add_error_section(header, report)
 
@@ -341,6 +341,6 @@ def generate_identifier_report(self, duplicate, ambiguous):
         self.report.add_error_section(header, report)
 
     def read_sq_headers(self):
-        for file in self.sam_files:
+        for file in self.alignment_files:
             pr = pysam.AlignmentFile(file, check_sq=False)
             self.sq_headers[file] = pr.header.to_dict().get('SQ', [])

From 7a7a3742fffb820e0b2a177276ac9db6651c9bd9 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Mon, 24 Apr 2023 14:39:05 -0700
Subject: [PATCH 06/18] Updating infer_strandedness() (even though it isn't
 currently in use)

---
 tiny/rna/counter/hts_parsing.py | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/tiny/rna/counter/hts_parsing.py b/tiny/rna/counter/hts_parsing.py
index 838c8b26..b1311a0e 100644
--- a/tiny/rna/counter/hts_parsing.py
+++ b/tiny/rna/counter/hts_parsing.py
@@ -1,4 +1,5 @@
 import functools
+import itertools
 import os.path
 import HTSeq
 import pysam
@@ -197,15 +198,15 @@ def _write_decollapsed_sam(self) -> None:
             self._decollapsed_reads.clear()
 
 
-def infer_strandedness(sam_file: str, intervals: dict) -> str:
-    """Infers strandedness from a sample SAM file and intervals from a parsed GFF file
+def infer_strandedness(alignment_file: str, intervals: dict) -> str:
+    """Infers strandedness from a sample alignment file and intervals from a parsed GFF file
 
     Credit: this technique is an adaptation of those in RSeQC's infer_experiment.py.
     It has been modified to accept a GFF reference file rather than a BED file,
-    and to use HTSeq rather than bx-python.
+    and to use our AlignmentReader rather than bx-python.
 
     Args:
-        sam_file: the path of the SAM file to evaluate
+        alignment_file: the path of the alignment file to evaluate
         intervals: the intervals instance attribute of ReferenceFeatures, populated by .get()
     """
 
@@ -216,18 +217,14 @@ def infer_strandedness(sam_file: str, intervals: dict) -> str:
         iv_convert.strand = '.'
         unstranded[iv_convert] = orig_iv.strand
 
-    # Assumes read_SAM() defaults to non-reverse strandedness
-    sample_read = read_SAM(sam_file)
+    # Assumes AlignmentReader defaults to non-reverse strandedness
+    sample_read = AlignmentReader().bundle_multi_alignments(alignment_file)
     gff_sam_map = Counter()
-    for count in range(1, 20000):
-        try:
-            rec = next(sample_read)
-            strandless = HTSeq.GenomicInterval(rec['Chrom'], rec['Start'], rec['End'])
-            sam_strand = rec['strand']
-            gff_strand = ':'.join(unstranded[strandless].get_steps())
-            gff_sam_map[sam_strand + gff_strand] += 1
-        except StopIteration:
-            break
+    for rec, _ in zip(itertools.chain(*sample_read), range(20000)):
+        strandless = HTSeq.GenomicInterval(rec['Chrom'], rec['Start'], rec['End'])
+        sam_strand = rec['strand']
+        gff_strand = ':'.join(unstranded[strandless].get_steps())
+        gff_sam_map[sam_strand + gff_strand] += 1
 
     non_rev = (gff_sam_map['++'] + gff_sam_map['--']) / sum(gff_sam_map.values())
     reverse = (gff_sam_map['+-'] + gff_sam_map['-+']) / sum(gff_sam_map.values())

From 59f8bd06b1c1877dc49954f0d722bdfc28ac09b6 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sat, 29 Apr 2023 11:23:31 -0700
Subject: [PATCH 07/18] Restructuring testdata folder for tiny-count

---
 .../counter/{ => gff}/discontinuous.gff3      |  0
 .../{ => gff}/identity_choice_test.gff3       |  0
 tests/testdata/counter/{ => gff}/single.gff3  |  0
 tests/testdata/counter/{ => gff}/single2.gff3 |  0
 .../counter/{ => sam}/Lib304_test.sam         |  0
 .../{ => sam}/identity_choice_test.sam        |  0
 .../counter/{ => sam}/non-collapsed.sam       |  2 +
 tests/testdata/counter/{ => sam}/single.sam   |  2 +
 tests/unit_tests_counter.py                   |  8 ++--
 tests/unit_tests_features.py                  |  8 ++--
 tests/unit_tests_hts_parsing.py               | 38 +++++++++----------
 tests/unit_tests_validation.py                | 16 +++++---
 12 files changed, 42 insertions(+), 32 deletions(-)
 rename tests/testdata/counter/{ => gff}/discontinuous.gff3 (100%)
 rename tests/testdata/counter/{ => gff}/identity_choice_test.gff3 (100%)
 rename tests/testdata/counter/{ => gff}/single.gff3 (100%)
 rename tests/testdata/counter/{ => gff}/single2.gff3 (100%)
 rename tests/testdata/counter/{ => sam}/Lib304_test.sam (100%)
 rename tests/testdata/counter/{ => sam}/identity_choice_test.sam (100%)
 rename tests/testdata/counter/{ => sam}/non-collapsed.sam (81%)
 rename tests/testdata/counter/{ => sam}/single.sam (80%)

diff --git a/tests/testdata/counter/discontinuous.gff3 b/tests/testdata/counter/gff/discontinuous.gff3
similarity index 100%
rename from tests/testdata/counter/discontinuous.gff3
rename to tests/testdata/counter/gff/discontinuous.gff3
diff --git a/tests/testdata/counter/identity_choice_test.gff3 b/tests/testdata/counter/gff/identity_choice_test.gff3
similarity index 100%
rename from tests/testdata/counter/identity_choice_test.gff3
rename to tests/testdata/counter/gff/identity_choice_test.gff3
diff --git a/tests/testdata/counter/single.gff3 b/tests/testdata/counter/gff/single.gff3
similarity index 100%
rename from tests/testdata/counter/single.gff3
rename to tests/testdata/counter/gff/single.gff3
diff --git a/tests/testdata/counter/single2.gff3 b/tests/testdata/counter/gff/single2.gff3
similarity index 100%
rename from tests/testdata/counter/single2.gff3
rename to tests/testdata/counter/gff/single2.gff3
diff --git a/tests/testdata/counter/Lib304_test.sam b/tests/testdata/counter/sam/Lib304_test.sam
similarity index 100%
rename from tests/testdata/counter/Lib304_test.sam
rename to tests/testdata/counter/sam/Lib304_test.sam
diff --git a/tests/testdata/counter/identity_choice_test.sam b/tests/testdata/counter/sam/identity_choice_test.sam
similarity index 100%
rename from tests/testdata/counter/identity_choice_test.sam
rename to tests/testdata/counter/sam/identity_choice_test.sam
diff --git a/tests/testdata/counter/non-collapsed.sam b/tests/testdata/counter/sam/non-collapsed.sam
similarity index 81%
rename from tests/testdata/counter/non-collapsed.sam
rename to tests/testdata/counter/sam/non-collapsed.sam
index 94e2d107..48368cde 100644
--- a/tests/testdata/counter/non-collapsed.sam
+++ b/tests/testdata/counter/sam/non-collapsed.sam
@@ -1,2 +1,4 @@
+@HD	SO:unsorted
 @SQ	SN:I	LN:21
+@PG	ID:bowtie
 NON_COLLAPSED_QNAME	16	I	15064570	255	21M	*	0	0	CAAGACAGAGCTTCACCGTTC	IIIIIIIIIIIIIIIIIIIII	XA:i:0	MD:Z:21	NM:i:0	XM:i:2
diff --git a/tests/testdata/counter/single.sam b/tests/testdata/counter/sam/single.sam
similarity index 80%
rename from tests/testdata/counter/single.sam
rename to tests/testdata/counter/sam/single.sam
index 6d8dc69b..ccf1b730 100644
--- a/tests/testdata/counter/single.sam
+++ b/tests/testdata/counter/sam/single.sam
@@ -1,2 +1,4 @@
+@HD	SO:unsorted
 @SQ	SN:I	LN:21
+@PG	ID:bowtie
 0_count=5	16	I	15064570	255	21M	*	0	0	CAAGACAGAGCTTCACCGTTC	IIIIIIIIIIIIIIIIIIIII	XA:i:0	MD:Z:21	NM:i:0	XM:i:2
\ No newline at end of file
diff --git a/tests/unit_tests_counter.py b/tests/unit_tests_counter.py
index e32a0fa6..ad83ee01 100644
--- a/tests/unit_tests_counter.py
+++ b/tests/unit_tests_counter.py
@@ -16,12 +16,12 @@ class CounterTests(unittest.TestCase):
 
     @classmethod
     def setUpClass(self):
-        self.gff_file = f"{resources}/identity_choice_test.gff3"
-        self.short_gff_file = f"{resources}/single.gff3"
+        self.gff_file = f"{resources}/gff/identity_choice_test.gff3"
+        self.short_gff_file = f"{resources}/gff/single.gff3"
         self.short_gff = helpers.read(self.short_gff_file)
 
-        self.sam_file = f"{resources}/identity_choice_test.sam"
-        self.short_sam_file = f"{resources}/single.sam"
+        self.sam_file = f"{resources}/sam/identity_choice_test.sam"
+        self.short_sam_file = f"{resources}/sam/single.sam"
         self.short_sam = helpers.read(self.short_sam_file)
 
         self.strand = {'sense': tuple('+'), 'antisense': tuple('-'), 'both': ('+', '-')}
diff --git a/tests/unit_tests_features.py b/tests/unit_tests_features.py
index 236be93e..ce752dbd 100644
--- a/tests/unit_tests_features.py
+++ b/tests/unit_tests_features.py
@@ -11,12 +11,12 @@ class FeaturesTests(unittest.TestCase):
 
     @classmethod
     def setUpClass(self):
-        self.gff_file = f"{resources}/identity_choice_test.gff3"
-        self.short_gff_file = f"{resources}/single.gff3"
+        self.gff_file = f"{resources}/gff/identity_choice_test.gff3"
+        self.short_gff_file = f"{resources}/gff/single.gff3"
         self.short_gff = read(self.short_gff_file)
 
-        self.sam_file = f"{resources}/identity_choice_test.sam"
-        self.short_sam_file = f"{resources}/single.sam"
+        self.sam_file = f"{resources}/sam/identity_choice_test.sam"
+        self.short_sam_file = f"{resources}/sam/single.sam"
         self.short_sam = read(self.short_sam_file)
 
     """Helper functions"""
diff --git a/tests/unit_tests_hts_parsing.py b/tests/unit_tests_hts_parsing.py
index 4af12415..b9b7eac3 100644
--- a/tests/unit_tests_hts_parsing.py
+++ b/tests/unit_tests_hts_parsing.py
@@ -25,8 +25,8 @@
 class SamReaderTests(unittest.TestCase):
     @classmethod
     def setUpClass(self):
-        self.sam_file = f"{resources}/identity_choice_test.sam"
-        self.short_sam_file = f"{resources}/single.sam"
+        self.sam_file = f"{resources}/sam/identity_choice_test.sam"
+        self.short_sam_file = f"{resources}/sam/single.sam"
         self.short_sam = helpers.read(self.short_sam_file)
 
     @staticmethod
@@ -59,8 +59,8 @@ def test_sam_reader(self):
     """
 
     def test_sam_parser_comparison(self):
-        file = f"{resources}/Lib304_test.sam"
-        ours = SAM_reader().bundle_multi_alignments(file)
+        file = f"{resources}/sam/Lib304_test.sam"
+        ours = AlignmentReader().bundle_multi_alignments(file)
         theirs = HTSeq.bundle_multiple_alignments(HTSeq.BAM_Reader(file))
 
         for (our_bundle, _), their_bundle in zip(ours, theirs):
@@ -183,8 +183,8 @@ def test_SAM_reader_no_decollapse_non_collapsed_SAM_files(self):
         with patch.object(SAM_reader, "_write_decollapsed_sam") as write_sam, \
                 patch.object(SAM_reader, "_write_header_for_decollapsed_sam") as write_header:
             with contextlib.redirect_stderr(stdout_capture):
-                reader = SAM_reader(decollapse=True)
-                records = reader.bundle_multi_alignments(f"{resources}/non-collapsed.sam")
+                reader = AlignmentReader(decollapse=True)
+                records = reader.bundle_multi_alignments(f"{resources}/sam/non-collapsed.sam")
                 self.exhaust_iterator(records)
 
         write_sam.assert_not_called()
@@ -199,8 +199,8 @@ class ReferenceFeaturesTests(unittest.TestCase):
 
     @classmethod
     def setUpClass(self):
-        self.gff_file = f"{resources}/identity_choice_test.gff3"
-        self.short_gff_file = f"{resources}/single.gff3"
+        self.gff_file = f"{resources}/gff/identity_choice_test.gff3"
+        self.short_gff_file = f"{resources}/gff/single.gff3"
         self.short_gff = helpers.read(self.short_gff_file)
 
         self.maxDiff = None
@@ -365,7 +365,7 @@ def test_ref_tables_alias_multisource_concat_all_features_false(self):
     def test_ref_tables_identity_matches_multisource_concat(self):
         feature_source = {
             self.short_gff_file: ["ID"],
-            f"{resources}/single2.gff3": ["ID"]
+            f"{resources}/gff/single2.gff3": ["ID"]
         }
 
         kwargs = {'all_features': True}
@@ -392,7 +392,7 @@ def test_ref_tables_identity_matches_multisource_concat(self):
 
     def test_ref_tables_discontinuous_aliases(self):
         kwargs = {'all_features': True}
-        feature_source = {f"{resources}/discontinuous.gff3": ["Name"]}
+        feature_source = {f"{resources}/gff/discontinuous.gff3": ["Name"]}
         mock_selector = self.selector_with_template(helpers.rules_template)
 
         _, alias, _ = ReferenceFeatures(feature_source, **kwargs).get(mock_selector)
@@ -408,7 +408,7 @@ def test_ref_tables_discontinuous_aliases(self):
 
     def test_ref_tables_discontinuous_no_match_all_features_false(self):
         kwargs = {'all_features': False}
-        feature_source = {f"{resources}/discontinuous.gff3": ["Name"]}
+        feature_source = {f"{resources}/gff/discontinuous.gff3": ["Name"]}
         mock_selector = self.selector_with_template([{'Identity': ('No', 'Match')}])
 
         expected_err = "No features were retained while parsing your GFF file.\n" \
@@ -425,7 +425,7 @@ def test_ref_tables_discontinuous_no_match_all_features_false(self):
     def test_ref_tables_discontinuous_features(self):
 
         kwargs = {'all_features': True}
-        feature_source = {f"{resources}/discontinuous.gff3": ["Name"]}
+        feature_source = {f"{resources}/gff/discontinuous.gff3": ["Name"]}
         feature_selector = self.selector_with_template([{'Identity': ('No', 'Match')}])
 
         # Features that fail to match on identity are not added to the StepVector,
@@ -440,7 +440,7 @@ def test_ref_tables_discontinuous_features(self):
 
     def test_ref_tables_discontinuous_intervals(self):
         kwargs = {'all_features': True}
-        feature_source = {f"{resources}/discontinuous.gff3": ["Name"]}
+        feature_source = {f"{resources}/gff/discontinuous.gff3": ["Name"]}
         feature_selector = self.selector_with_template(helpers.rules_template)
 
         grandparent_iv = HTSeq.GenomicInterval('I', 0, 10, '-')
@@ -467,7 +467,7 @@ def test_ref_tables_discontinuous_intervals(self):
     performed for intervals in this test."""
 
     def test_ref_tables_discontinuous_identity_matches(self):
-        feature_source = {f"{resources}/discontinuous.gff3": ["Name"]}
+        feature_source = {f"{resources}/gff/discontinuous.gff3": ["Name"]}
         feature_selector = self.selector_with_template([
             {'Identity': ('Class', 'NA'), 'Hierarchy': 2},                  # Rule 1
             {'Identity': ('Name', 'Sibling3'), 'Hierarchy': 3},             # Rule 2
@@ -511,7 +511,7 @@ def test_ref_tables_discontinuous_identity_matches(self):
     def test_ref_tables_source_filter(self):
 
         kwargs = {'all_features': False}
-        feature_source = {f"{resources}/discontinuous.gff3": ["Name"]}
+        feature_source = {f"{resources}/gff/discontinuous.gff3": ["Name"]}
         feature_selector = self.selector_with_template([{'Filter_s': "Source2Name"}])
 
         child2_iv =     HTSeq.GenomicInterval('I', 39, 50, '-')
@@ -536,7 +536,7 @@ def test_ref_tables_source_filter(self):
     def test_ref_tables_type_filter(self):
 
         kwargs = {'all_features': False}
-        feature_source = {f"{resources}/discontinuous.gff3": ["Name"]}
+        feature_source = {f"{resources}/gff/discontinuous.gff3": ["Name"]}
         feature_selector = self.selector_with_template([{'Filter_t': "CDS"}])
 
         child1_iv =     HTSeq.GenomicInterval('I', 29, 40, '-')
@@ -561,7 +561,7 @@ def test_ref_tables_type_filter(self):
     def test_ref_tables_both_filter(self):
 
         kwargs = {'all_features': False}
-        feature_source = {f"{resources}/discontinuous.gff3": ["Name"]}
+        feature_source = {f"{resources}/gff/discontinuous.gff3": ["Name"]}
         feature_selector = self.selector_with_template([{'Filter_s': "SourceName", 'Filter_t': "gene"}])
 
         rt = ReferenceFeatures(feature_source, **kwargs)
@@ -577,7 +577,7 @@ def test_ref_tables_both_filter(self):
     def test_ref_tables_tagged_match_single(self):
         kwargs = {'all_features': False}
         feat_id = "Gene:WBGene00023193"
-        feature_source = {f"{resources}/single.gff3": ["sequence_name"]}
+        feature_source = {f"{resources}/gff/single.gff3": ["sequence_name"]}
         feature_selector = self.selector_with_template([
             {'Identity': ("ID", feat_id), 'Class': "tagged_match", 'Hierarchy': 1},
             {'Identity': ("ID", feat_id), 'Class': "",             'Hierarchy': 2}
@@ -604,7 +604,7 @@ def test_ref_tables_tagged_match_single(self):
     """Does ReferenceFeatures.get() correctly merge records for discontinuous features matching multiple tagged rules?"""
 
     def test_ref_tables_tagged_match_merging(self):
-        feature_source = {f"{resources}/discontinuous.gff3": ['Name']}
+        feature_source = {f"{resources}/gff/discontinuous.gff3": ['Name']}
 
         # All rules match the same root feature
         feature_selector = self.selector_with_template([
diff --git a/tests/unit_tests_validation.py b/tests/unit_tests_validation.py
index 32a0845f..a17dbeef 100644
--- a/tests/unit_tests_validation.py
+++ b/tests/unit_tests_validation.py
@@ -6,7 +6,13 @@
 from glob import glob
 from unittest.mock import patch, mock_open
 
-from tiny.rna.counter.validation import GFFValidator, ReportFormatter, SamSqValidator
+from tiny.rna.counter.validation import GFFValidator, ReportFormatter, AlignmentSqValidator
+
+resources = "./testdata/counter"
+
+# To run all test suites
+if __name__ == '__main__':
+    unittest.main()
 
 
 class GFFValidatorTest(unittest.TestCase):
@@ -104,7 +110,7 @@ def test_gff_id_validation(self):
 
     def test_ebwt_chroms(self):
         validator = self.make_gff_validator()
-        ebwt_prefix = "./testdata/counter/validation/ebwt/ram1"
+        ebwt_prefix = f"{resources}/validation/ebwt/ram1"
 
         # Chroms are shared
         validator.chrom_set = {'ram1'}
@@ -122,7 +128,7 @@ def test_ebwt_chroms(self):
 
     def test_genome_chroms(self):
         validator = self.make_gff_validator()
-        fasta_file = "./testdata/counter/validation/genome/genome.fasta"
+        fasta_file = f"{resources}/validation/genome/genome.fasta"
 
         # Chroms are shared
         validator.chrom_set = {'chr1', 'chr2', 'chr3'}
@@ -140,8 +146,8 @@ def test_genome_chroms(self):
 
     def test_alignments_heuristic(self):
         validator = self.make_gff_validator()
-        sam_files = ['./testdata/counter/identity_choice_test.sam',
-                     './testdata/counter/single.sam']
+        sam_files = [f'{resources}/sam/identity_choice_test.sam',
+                     f'{resources}/sam/single.sam']
 
         sam_chroms = {
             sam_files[0]: {'I', 'V', 'MtDNA'},

From 78a7f87099d446d9c7f70bbef3b931defbab148d Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sat, 29 Apr 2023 11:28:55 -0700
Subject: [PATCH 08/18] Correcting load_config to use the CSVReader class'
 row_num attribute instead of the superclass' line_num in error catching. This
 is more meaningful to the user since CSV records can technically span
 multiple lines

---
 tiny/rna/counter/counter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tiny/rna/counter/counter.py b/tiny/rna/counter/counter.py
index d0bf3ab8..de4d71bd 100644
--- a/tiny/rna/counter/counter.py
+++ b/tiny/rna/counter/counter.py
@@ -146,7 +146,7 @@ def load_config(features_csv: str, in_pipeline: bool) -> List[dict]:
             # Duplicate rule entries are not allowed
             if rule not in rules: rules.append(rule)
     except Exception as e:
-        msg = f"Error occurred on line {sheet.line_num} of {os.path.basename(features_csv)}"
+        msg = f"Error occurred on line {sheet.row_num} of {os.path.basename(features_csv)}"
         append_to_exception(e, msg)
         raise
 

From 51d265750fd12473f9c2f9c0cc3456ca63000a68 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sat, 29 Apr 2023 11:31:40 -0700
Subject: [PATCH 09/18] Corrections in SamplesSheet class. Validation methods
 already report row_num so it is redundant to add that info to
 AssertionErrors. Also adding more robust regex for matching normalization
 definitions

---
 tiny/rna/configuration.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tiny/rna/configuration.py b/tiny/rna/configuration.py
index 04e56ac3..fbd1e3db 100644
--- a/tiny/rna/configuration.py
+++ b/tiny/rna/configuration.py
@@ -692,8 +692,10 @@ def read_csv(self):
 
                 if is_control: self.control_condition = group_name
         except Exception as e:
-            msg = f"Error occurred on line {self.csv.line_num} of {self.basename}"
-            append_to_exception(e, msg)
+            if not isinstance(e, AssertionError):
+                # Validation steps already indicate row, exception likely from something else
+                msg = f"Error occurred on line {self.csv.row_num} of {self.basename}"
+                append_to_exception(e, msg)
             raise
 
         self.is_compatible_df = self.validate_deseq_compatibility(reps_per_group)
@@ -762,7 +764,7 @@ def validate_control_group(self, is_control: bool, group: str):
             .format(row_num=self.csv.row_num, selfname=self.basename)
 
     def validate_normalization(self, norm):
-        assert re.fullmatch(r"(\s*[\d.]+\s*)|(rpm)", norm, re.IGNORECASE) or not norm, \
+        assert re.fullmatch(r"\s*((?:\d+(?:\.\d*)?|\.\d+)|(rpm))\s*", norm, re.IGNORECASE) or not norm, \
             "Invalid normalization value in {selfname} (row {row_num})" \
             .format(selfname=self.basename, row_num=self.csv.row_num)
 

From 9350f131d3d24842c47ea1ee67c6e69785243626 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sat, 29 Apr 2023 11:33:28 -0700
Subject: [PATCH 10/18] Bringing some outdated and inconsistent section
 comments up to date in Paths File and Run Config

---
 START_HERE/paths.yml                                | 9 ++++++---
 START_HERE/run_config.yml                           | 5 ++---
 tests/testdata/config_files/paths.yml               | 9 ++++++---
 tests/testdata/config_files/run_config_template.yml | 5 ++---
 tiny/templates/paths.yml                            | 9 ++++++---
 tiny/templates/run_config_template.yml              | 5 ++---
 6 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/START_HERE/paths.yml b/START_HERE/paths.yml
index 0246ca0e..ca45c9f6 100644
--- a/START_HERE/paths.yml
+++ b/START_HERE/paths.yml
@@ -1,14 +1,17 @@
 ############################## MAIN INPUT FILES FOR ANALYSIS ##############################
 #
-# Edit this section to provide the path to your Samples and Features sheets. Relative and
-# absolute paths are both allowed. All relative paths are relative to THIS config file.
+# Relative and absolute paths are both allowed.
+# All relative paths are evaluated relative to THIS config file.
 #
 # Directions:
 #   1. Fill out the Samples Sheet with files to process + naming scheme. [samples.csv]
 #   2. Fill out the Features Sheet with selection rules [features.csv]
-#   3. Set samples_csv and features_csv (below) to point to these files
+#   3. Set samples_csv and features_csv to point to these files
 #   4. Add annotation files and per-file alias preferences to gff_files (optional)
 #
+# If using the tinyRNA workflow, additionally set ebwt and/or reference_genome_files
+# in the BOWTIE-BUILD section.
+#
 ######-------------------------------------------------------------------------------######
 
 ##-- Path to Sample & Features Sheets (relative paths are relative to this config file) --##
diff --git a/START_HERE/run_config.yml b/START_HERE/run_config.yml
index 8d7d5bc5..260b146a 100644
--- a/START_HERE/run_config.yml
+++ b/START_HERE/run_config.yml
@@ -41,9 +41,8 @@ run_native: false
 
 ######------------------------- BOWTIE INDEX BUILD OPTIONS --------------------------######
 #
-# If you do not already have bowtie indexes, they can be built for you by setting
-# run_bowtie_build (above) to true and adding your reference genome file(s) to your
-# paths_config file.
+# If you do not already have bowtie indexes, they can be built for you
+# (see the BOWTIE-BUILD section in the Paths File)
 #
 # We have specified default parameters for small RNA data based on our own "best practices".
 # You can change the parameters here.
diff --git a/tests/testdata/config_files/paths.yml b/tests/testdata/config_files/paths.yml
index 46c14bf2..2dc39b9a 100644
--- a/tests/testdata/config_files/paths.yml
+++ b/tests/testdata/config_files/paths.yml
@@ -1,14 +1,17 @@
 ############################## MAIN INPUT FILES FOR ANALYSIS ##############################
 #
-# Edit this section to provide the path to your Samples and Features sheets. Relative and
-# absolute paths are both allowed. All relative paths are relative to THIS config file.
+# Relative and absolute paths are both allowed.
+# All relative paths are evaluated relative to THIS config file.
 #
 # Directions:
 #   1. Fill out the Samples Sheet with files to process + naming scheme. [samples.csv]
 #   2. Fill out the Features Sheet with selection rules [features.csv]
-#   3. Set samples_csv and features_csv (below) to point to these files
+#   3. Set samples_csv and features_csv to point to these files
 #   4. Add annotation files and per-file alias preferences to gff_files (optional)
 #
+# If using the tinyRNA workflow, additionally set ebwt and/or reference_genome_files
+# in the BOWTIE-BUILD section.
+#
 ######-------------------------------------------------------------------------------######
 
 ##-- Path to Sample & Features Sheets (relative paths are relative to this config file) --##
diff --git a/tests/testdata/config_files/run_config_template.yml b/tests/testdata/config_files/run_config_template.yml
index e8195a36..04fda2f8 100644
--- a/tests/testdata/config_files/run_config_template.yml
+++ b/tests/testdata/config_files/run_config_template.yml
@@ -41,9 +41,8 @@ run_native: false
 
 ######------------------------- BOWTIE INDEX BUILD OPTIONS --------------------------######
 #
-# If you do not already have bowtie indexes, they can be built for you by setting
-# run_bowtie_build (above) to true and adding your reference genome file(s) to your
-# paths_config file.
+# If you do not already have bowtie indexes, they can be built for you
+# (see the BOWTIE-BUILD section in the Paths File)
 #
 # We have specified default parameters for small RNA data based on our own "best practices".
 # You can change the parameters here.
diff --git a/tiny/templates/paths.yml b/tiny/templates/paths.yml
index a05f83fc..c72eebc0 100644
--- a/tiny/templates/paths.yml
+++ b/tiny/templates/paths.yml
@@ -1,14 +1,17 @@
 ############################## MAIN INPUT FILES FOR ANALYSIS ##############################
 #
-# Edit this section to provide the path to your Samples and Features sheets. Relative and
-# absolute paths are both allowed. All relative paths are relative to THIS config file.
+# Relative and absolute paths are both allowed.
+# All relative paths are evaluated relative to THIS config file.
 #
 # Directions:
 #   1. Fill out the Samples Sheet with files to process + naming scheme. [samples.csv]
 #   2. Fill out the Features Sheet with selection rules [features.csv]
-#   3. Set samples_csv and features_csv (below) to point to these files
+#   3. Set samples_csv and features_csv to point to these files
 #   4. Add annotation files and per-file alias preferences to gff_files (optional)
 #
+# If using the tinyRNA workflow, additionally set ebwt and/or reference_genome_files
+# in the BOWTIE-BUILD section.
+#
 ######-------------------------------------------------------------------------------######
 
 ##-- Path to Sample & Features Sheets (relative paths are relative to this config file) --##
diff --git a/tiny/templates/run_config_template.yml b/tiny/templates/run_config_template.yml
index 5eb18add..b8eb7c22 100644
--- a/tiny/templates/run_config_template.yml
+++ b/tiny/templates/run_config_template.yml
@@ -41,9 +41,8 @@ run_native: false
 
 ######------------------------- BOWTIE INDEX BUILD OPTIONS --------------------------######
 #
-# If you do not already have bowtie indexes, they can be built for you by setting
-# run_bowtie_build (above) to true and adding your reference genome file(s) to your
-# paths_config file.
+# If you do not already have bowtie indexes, they can be built for you
+# (see the BOWTIE-BUILD section in the Paths File)
 #
 # We have specified default parameters for small RNA data based on our own "best practices".
 # You can change the parameters here.

From cb9aeb610c6696b9a2cf512e69f33fbbe6e172a8 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sun, 30 Apr 2023 21:15:31 -0700
Subject: [PATCH 11/18] Renaming the FASTQ/SAM Files column in Samples Sheet to
 Input Files

---
 START_HERE/samples.csv                  |  2 +-
 doc/Configuration.md                    |  2 +-
 tests/testdata/config_files/samples.csv |  2 +-
 tiny/rna/configuration.py               | 11 ++++++++++-
 tiny/templates/samples.csv              |  2 +-
 5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/START_HERE/samples.csv b/START_HERE/samples.csv
index 6455c9ae..47caddd8 100755
--- a/START_HERE/samples.csv
+++ b/START_HERE/samples.csv
@@ -1,4 +1,4 @@
-﻿FASTQ/SAM Files,Sample/Group Name,Replicate number,Control,Normalization
+﻿Input Files,Sample/Group Name,Replicate number,Control,Normalization
 ./fastq_files/cond1_rep1.fastq.gz,condition1,1,TRUE,
 ./fastq_files/cond1_rep2.fastq.gz,condition1,2,,
 ./fastq_files/cond1_rep3.fastq.gz,condition1,3,,
diff --git a/doc/Configuration.md b/doc/Configuration.md
index 26faf28a..7d9c663e 100644
--- a/doc/Configuration.md
+++ b/doc/Configuration.md
@@ -119,7 +119,7 @@ The final output directory name has three components:
 The `run_directory` suffix in the Paths File supports subdirectories; if provided, the final output directory will be named as indicated above, but the subdirectory structure specified in `run_directory` will be retained. 
 
 ## Samples Sheet Details
-|  _Column:_ | FASTQ/SAM Files     | Sample/Group Name | Replicate Number | Control | Normalization |
+|  _Column:_ | Input Files         | Sample/Group Name | Replicate Number | Control | Normalization |
 |-----------:|---------------------|-------------------|------------------|---------|---------------|
 | _Example:_ | cond1_rep1.fastq.gz | condition1        | 1                | True    | RPM           |
 
diff --git a/tests/testdata/config_files/samples.csv b/tests/testdata/config_files/samples.csv
index d96b524e..f4ed8522 100755
--- a/tests/testdata/config_files/samples.csv
+++ b/tests/testdata/config_files/samples.csv
@@ -1,4 +1,4 @@
-﻿FASTQ/SAM Files,Sample/Group Name,Replicate Number,Control,Normalization
+﻿Input Files,Sample/Group Name,Replicate Number,Control,Normalization
 ../../../START_HERE/fastq_files/cond1_rep1.fastq.gz,condition1,1,TRUE,
 ../../../START_HERE/fastq_files/cond1_rep2.fastq.gz,condition1,2,,
 ../../../START_HERE/fastq_files/cond1_rep3.fastq.gz,condition1,3,,
diff --git a/tiny/rna/configuration.py b/tiny/rna/configuration.py
index fbd1e3db..b317fcb4 100644
--- a/tiny/rna/configuration.py
+++ b/tiny/rna/configuration.py
@@ -833,7 +833,7 @@ class CSVReader(csv.DictReader):
            "Mismatches":        "Mismatch"
         }),
         "Samples Sheet": OrderedDict({
-            "FASTQ/SAM Files":   "File",
+            "Input Files":       "File",
             "Sample/Group Name": "Group",
             "Replicate Number":  "Replicate",
             "Control":           "Control",
@@ -960,6 +960,15 @@ def check_backward_compatibility(self, header_vals):
                     "the new column to your Features Sheet to avoid this error."
                 ]))
 
+        if self.doctype == "Samples Sheet":
+            if 'fastq/sam files' in header_vals_lc:
+                compat_errors.append('\n'.join([
+                    "It looks like you're using a Samples Sheet from an earlier version of",
+                    'tinyRNA. The "FASTQ/SAM files" column has been renamed to "Input Files"',
+                    'due to the addition of BAM file support. Please rename the column in',
+                    "your Samples Sheet to avoid this error."
+                ]))
+
         if compat_errors: raise ValueError('\n\n'.join(compat_errors))
 
 
diff --git a/tiny/templates/samples.csv b/tiny/templates/samples.csv
index 32305007..9d6a8c00 100755
--- a/tiny/templates/samples.csv
+++ b/tiny/templates/samples.csv
@@ -1,2 +1,2 @@
-﻿FASTQ/SAM Files,Sample/Group Name,Replicate Number,Control,Normalization
+﻿Input Files,Sample/Group Name,Replicate Number,Control,Normalization
 ,,,,
\ No newline at end of file

From 4764dd34835cbd6a86c9504b93f334134c2dd041 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sun, 30 Apr 2023 21:27:52 -0700
Subject: [PATCH 12/18] More mass renaming of SAM -> alignment

---
 START_HERE/tiny-count_TUTORIAL.md |  4 +--
 tests/unit_test_helpers.py        |  2 +-
 tests/unit_tests_features.py      | 52 +++++++++++++++----------------
 tests/unit_tests_hts_parsing.py   | 46 +++++++++++++--------------
 tests/unit_tests_validation.py    | 30 +++++++++---------
 tiny/rna/counter/counter.py       |  4 +--
 tiny/rna/counter/features.py      |  2 +-
 tiny/rna/counter/validation.py    | 10 +++---
 8 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/START_HERE/tiny-count_TUTORIAL.md b/START_HERE/tiny-count_TUTORIAL.md
index 1a8b6072..1fae5f30 100644
--- a/START_HERE/tiny-count_TUTORIAL.md
+++ b/START_HERE/tiny-count_TUTORIAL.md
@@ -11,7 +11,7 @@ Alternatively, if you have already installed tinyRNA, you can use the `tiny-coun
 
 ## Your Data Files
 Gather the following files for the analysis:
-1. **SAM files** containing small RNA reads aligned to a reference genome, one file per sample
+1. **SAM or BAM files** containing small RNA reads aligned to a reference genome, one file per sample
 2. **GFF3 or GFF2/GTF file(s)** containing annotations for features that you want to assign reads to
 
 ## Configuration Files
@@ -24,7 +24,7 @@ tiny-count --get-templates
 Next, fill out the configuration files that were copied:
 
 ### 1. The Samples Sheet (samples.csv)
-Edit this file to add the paths to your SAM files, and to define the group name, replicate number, etc. for each sample.
+Edit this file to add the paths to your SAM or BAM files, and to define the group name, replicate number, etc. for each sample.
 
 ### 2. The Paths File (paths.yml)
 Edit this file to add the paths to your GFF annotation(s) under the `gff_files` key. You can leave the `alias` key as-is for now. All other keys in this file are used in the tinyRNA workflow.
diff --git a/tests/unit_test_helpers.py b/tests/unit_test_helpers.py
index 0b6aaf45..b750d10e 100644
--- a/tests/unit_test_helpers.py
+++ b/tests/unit_test_helpers.py
@@ -131,7 +131,7 @@ def get_dir_checksum_tree(root_path: str) -> dict:
     return dir_tree
 
 
-def make_parsed_sam_record(Name="0_count=1", Seq="CAAGACAGAGCTTCACCGTTC", Chrom='I', Start=15064570, Strand=True, NM=0):
+def make_parsed_alignment(Name="0_count=1", Seq="CAAGACAGAGCTTCACCGTTC", Chrom='I', Start=15064570, Strand=True, NM=0):
     return {
         "Name": Name,
         "Length": len(Seq),
diff --git a/tests/unit_tests_features.py b/tests/unit_tests_features.py
index ce752dbd..ed7ef9e9 100644
--- a/tests/unit_tests_features.py
+++ b/tests/unit_tests_features.py
@@ -3,7 +3,7 @@
 
 from unittest.mock import patch, call
 from tiny.rna.counter.features import *
-from unit_test_helpers import read, make_parsed_sam_record, rules_template, strand_to_bool
+from unit_test_helpers import read, make_parsed_alignment, rules_template, strand_to_bool
 
 resources = "./testdata/counter"
 
@@ -115,7 +115,7 @@ def test_assign_features_no_match(self):
         htsgas[iv_none] += "Should not match"
 
         # Create mock SAM alignment with non-overlapping interval
-        sam_aln = make_parsed_sam_record(**{'Start': 2, 'Chrom': chrom, 'Strand': strand_to_bool(strand)})
+        sam_aln = make_parsed_alignment(**{'Start': 2, 'Chrom': chrom, 'Strand': strand_to_bool(strand)})
 
         """
         iv_none: 0 |--| 2
@@ -143,7 +143,7 @@ def test_assign_features_single_base_overlap(self):
         htsgas[iv_none] += "Non-overlapping feature"
 
         # Create mock SAM alignment which overlaps iv_olap by one base
-        sam_aln = make_parsed_sam_record(**{'Chrom': chrom, 'Strand': strand_to_bool(strand), 'Start': 0, 'Seq': 'AT'})
+        sam_aln = make_parsed_alignment(**{'Chrom': chrom, 'Strand': strand_to_bool(strand), 'Start': 0, 'Seq': 'AT'})
 
         """
         iv_none:    2 |-| 3
@@ -170,7 +170,7 @@ def test_count_reads_generic(self, mock):
         mock_bundle = ["mock_alignment"]
         mock_read_count = 1
 
-        instance.sam_reader.bundle_multi_alignments.return_value = iter([(mock_bundle, mock_read_count)])
+        instance.alignment_reader.bundle_multi_alignments.return_value = iter([(mock_bundle, mock_read_count)])
         instance.assign_features.return_value = ({"mock_feat"}, 1)
 
         expected_calls_to_stats = [
@@ -195,11 +195,11 @@ def test_feature_selector_nested_interval(self):
         feat, fs = self.make_feature_for_interval_test(iv_rule, "Nested Overlap", chrom, strand, start, stop)
 
         aln_base = {'Seq': 'ATGC', 'Chrom': chrom, 'Strand': strand_to_bool(strand)}
-        aln_spill_lo = make_parsed_sam_record(**dict(aln_base, Start=start - 1, Name="spill"))
-        aln_spill_hi = make_parsed_sam_record(**dict(aln_base, Start=start + 2, Name="spill"))
-        aln_contained = make_parsed_sam_record(**dict(aln_base, Seq="N", Start=7, Name="contained"))
-        aln_contained_lo = make_parsed_sam_record(**dict(aln_base, Start=start, Name="contained"))
-        aln_contained_hi = make_parsed_sam_record(**dict(aln_base, Start=start + 1, Name="contained"))
+        aln_spill_lo = make_parsed_alignment(**dict(aln_base, Start=start - 1, Name="spill"))
+        aln_spill_hi = make_parsed_alignment(**dict(aln_base, Start=start + 2, Name="spill"))
+        aln_contained = make_parsed_alignment(**dict(aln_base, Seq="N", Start=7, Name="contained"))
+        aln_contained_lo = make_parsed_alignment(**dict(aln_base, Start=start, Name="contained"))
+        aln_contained_hi = make_parsed_alignment(**dict(aln_base, Start=start + 1, Name="contained"))
 
         """
         aln:                  |ATGC|
@@ -225,10 +225,10 @@ def test_feature_selector_partial_interval(self):
         feat, fs = self.make_feature_for_interval_test(iv_rule, "Partial Overlap", chrom, strand, start, stop)
 
         aln_base = {'Seq': 'ATGC', 'Chrom': chrom, 'Strand': strand_to_bool(strand)}
-        aln_spill_lo = make_parsed_sam_record(**dict(aln_base, Start=start - 1, Name="spill"))
-        aln_spill_hi = make_parsed_sam_record(**dict(aln_base, Start=start + 2, Name="spill"))
-        aln_contained_lo = make_parsed_sam_record(**dict(aln_base, Start=start, Name="contained"))
-        aln_contained_hi = make_parsed_sam_record(**dict(aln_base, Start=start + 1, Name="contained"))
+        aln_spill_lo = make_parsed_alignment(**dict(aln_base, Start=start - 1, Name="spill"))
+        aln_spill_hi = make_parsed_alignment(**dict(aln_base, Start=start + 2, Name="spill"))
+        aln_contained_lo = make_parsed_alignment(**dict(aln_base, Start=start, Name="contained"))
+        aln_contained_hi = make_parsed_alignment(**dict(aln_base, Start=start + 1, Name="contained"))
 
         """
         aln:                  |ATGC|
@@ -253,11 +253,11 @@ def test_feature_selector_exact_interval(self):
 
         aln_match = {'Seq': 'ATGC', 'Chrom': chrom, 'Strand': strand_to_bool(strand)}
         aln_short = {'Seq': 'NNN', 'Chrom': chrom, 'Strand': strand_to_bool(strand)}
-        aln_exact = make_parsed_sam_record(**dict(aln_match, Start=start, Name="exact"))
-        aln_short_lo = make_parsed_sam_record(**dict(aln_short, Start=start, Name="match lo"))
-        aln_short_hi = make_parsed_sam_record(**dict(aln_short, Start=start + 1, Name="match hi"))
-        aln_spill_lo = make_parsed_sam_record(**dict(aln_match, Start=start - 1, Name="spill lo"))
-        aln_spill_hi = make_parsed_sam_record(**dict(aln_match, Start=start + 1, Name="spill hi"))
+        aln_exact = make_parsed_alignment(**dict(aln_match, Start=start, Name="exact"))
+        aln_short_lo = make_parsed_alignment(**dict(aln_short, Start=start, Name="match lo"))
+        aln_short_hi = make_parsed_alignment(**dict(aln_short, Start=start + 1, Name="match hi"))
+        aln_spill_lo = make_parsed_alignment(**dict(aln_match, Start=start - 1, Name="spill lo"))
+        aln_spill_hi = make_parsed_alignment(**dict(aln_match, Start=start + 1, Name="spill hi"))
 
         """
         aln_match:            |ATGC|
@@ -300,10 +300,10 @@ def test_feature_selector_5p_interval(self):
         feat_A, fs = self.make_feature_for_interval_test(iv_rule, "5' Anchored Overlap (+)", chrom, '+', start, end)
         aln_base = {'Start': start, 'Chrom': chrom, 'Strand': True}
         aln = {
-            'aln_none': make_parsed_sam_record(**dict(aln_base, Start=start + 1, Seq="ATGC")),
-            'aln_long': make_parsed_sam_record(**dict(aln_base, Seq="ATGCNN")),
-            'aln_exact': make_parsed_sam_record(**dict(aln_base, Seq="ATGCN")),
-            'aln_short': make_parsed_sam_record(**dict(aln_base, Seq="ATGC")),
+            'aln_none': make_parsed_alignment(**dict(aln_base, Start=start + 1, Seq="ATGC")),
+            'aln_long': make_parsed_alignment(**dict(aln_base, Seq="ATGCNN")),
+            'aln_exact': make_parsed_alignment(**dict(aln_base, Seq="ATGCN")),
+            'aln_short': make_parsed_alignment(**dict(aln_base, Seq="ATGC")),
         }
 
         self.assertEqual(fs.choose(feat_A, aln['aln_none']), {})
@@ -347,10 +347,10 @@ def test_feature_selector_3p_interval(self):
         feat_A, fs = self.make_feature_for_interval_test(iv_rule, "3' Anchored Overlap (+)", chrom, '+', start, end)
         aln_base = {'Start': start, 'Chrom': chrom, 'Strand': True}
         aln = {
-            'aln_none': make_parsed_sam_record(**dict(aln_base, Seq="ATGC")),
-            'aln_long': make_parsed_sam_record(**dict(aln_base, Start=start - 1, Seq="ATGCNN")),
-            'aln_exact': make_parsed_sam_record(**dict(aln_base, Seq="ATGCN")),
-            'aln_short': make_parsed_sam_record(**dict(aln_base, Start=start + 1, Seq="ATGC")),
+            'aln_none': make_parsed_alignment(**dict(aln_base, Seq="ATGC")),
+            'aln_long': make_parsed_alignment(**dict(aln_base, Start=start - 1, Seq="ATGCNN")),
+            'aln_exact': make_parsed_alignment(**dict(aln_base, Seq="ATGCN")),
+            'aln_short': make_parsed_alignment(**dict(aln_base, Start=start + 1, Seq="ATGC")),
         }
 
         self.assertEqual(fs.choose(feat_A, aln['aln_none']), {})
diff --git a/tests/unit_tests_hts_parsing.py b/tests/unit_tests_hts_parsing.py
index b9b7eac3..b8898497 100644
--- a/tests/unit_tests_hts_parsing.py
+++ b/tests/unit_tests_hts_parsing.py
@@ -22,7 +22,7 @@
     unittest.main()
 
 
-class SamReaderTests(unittest.TestCase):
+class AlignmentReaderTests(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.sam_file = f"{resources}/sam/identity_choice_test.sam"
@@ -35,10 +35,10 @@ def exhaust_iterator(it):
 
     # === TESTS ===
 
-    """Did SAM_reader correctly skip header values and parse all pertinent info from a single record SAM file?"""
+    """Did AlignmentReader correctly skip header values and parse all pertinent info from a single record SAM file?"""
 
-    def test_sam_reader(self):
-        sam_bundle, read_count = next(SAM_reader().bundle_multi_alignments(self.short_sam_file))
+    def test_AlignmentReader_single_sam(self):
+        sam_bundle, read_count = next(AlignmentReader().bundle_multi_alignments(self.short_sam_file))
         sam_record = sam_bundle[0]
 
         self.assertEqual(sam_record['Chrom'], "I")
@@ -79,21 +79,21 @@ def test_sam_parser_comparison(self):
 
     """Does SAM_reader._get_decollapsed_filename() create an appropriate filename?"""
 
-    def test_SAM_reader_get_decollapsed_filename(self):
-        reader = SAM_reader()
+    def test_AlignmentReader_get_decollapsed_filename(self):
+        reader = AlignmentReader()
         reader.file = "~/path/to/input/sam_file.sam"
 
         sam_out = reader._get_decollapsed_filename()
 
         self.assertEqual(sam_out, "sam_file_decollapsed.sam")
 
-    """Does SAM_reader._new_bundle report the correct read count for different collapser types?"""
+    """Does AlignmentReader._new_bundle report the correct read count for different collapser types?"""
 
-    def test_SAM_reader_new_bundle(self):
+    def test_AlignmentReader_new_bundle(self):
         qnames = ["0_count=3", "seq0_x5", "non-collapsed"]
         counts = [3, 5, 1]
 
-        reader = SAM_reader()
+        reader = AlignmentReader()
         reader._header_dict = {'HD': {'SO': 'queryname'}}
 
         for qname, expected in zip(qnames, counts):
@@ -101,10 +101,10 @@ def test_SAM_reader_new_bundle(self):
             _, read_count = reader._new_bundle({'Name': qname})
             self.assertEqual(read_count, expected)
 
-    """Does SAM_reader._gather_metadata() correctly identify metadata and write the decollapsed file header?"""
+    """Does AlignmentReader._gather_metadata() correctly identify metadata and write the decollapsed file header?"""
 
-    def test_SAM_reader_gather_metadata(self):
-        reader = SAM_reader(decollapse=True)
+    def test_AlignmentReader_gather_metadata(self):
+        reader = AlignmentReader(decollapse=True)
         reader._decollapsed_filename = "mock_outfile_name.sam"
 
         sam_in = pysam.AlignmentFile(self.short_sam_file)
@@ -124,14 +124,14 @@ def test_SAM_reader_gather_metadata(self):
         self.assertEqual(reader.references, ('I',))
         self.assertTrue(reader.has_nm)
 
-    """Does SAM_reader._write_decollapsed_sam() write the correct number of duplicates to the decollapsed file?"""
+    """Does AlignmentReader._write_decollapsed_sam() write the correct number of duplicates to the decollapsed file?"""
 
-    def test_SAM_reader_write_decollapsed_sam(self):
+    def test_AlignmentReader_write_decollapsed_sam(self):
         header = pysam.AlignmentHeader()
         alignment = pysam.AlignedSegment(header)
         alignment.query_name = "0_count=5"
 
-        reader = SAM_reader(decollapse=True)
+        reader = AlignmentReader(decollapse=True)
         reader.collapser_type = "tiny-collapse"
         reader._collapser_token = "="
         reader._decollapsed_reads = [alignment]
@@ -150,12 +150,12 @@ def test_SAM_reader_write_decollapsed_sam(self):
         outfile.assert_has_calls(expected_writelines)
         self.assertTrue(len(reader._decollapsed_reads) == 0)
 
-    """Does SAM_reader._parse_alignments() save lines and write them to the decollapsed file when appropriate?"""
+    """Does AlignmentReader._parse_alignments() save lines and write them to the decollapsed file when appropriate?"""
 
-    def test_SAM_reader_parse_alignments_decollapse(self):
-        with patch.object(SAM_reader, "_write_decollapsed_sam") as write_fn:
-            # Set up SAM_reader class
-            reader = SAM_reader(decollapse=True)
+    def test_AlignmentReader_parse_alignments_decollapse(self):
+        with patch.object(AlignmentReader, "_write_decollapsed_sam") as write_fn:
+            # Set up AlignmentReader class
+            reader = AlignmentReader(decollapse=True)
             reader.collapser_type = "tiny-collapse"
             reader._decollapsed_reads = buffer = [0] * 99999  # At 100,001, expect buffer to be written
             reader.file = self.short_sam_file                 # File with single alignment
@@ -178,10 +178,10 @@ def test_SAM_reader_parse_alignments_decollapse(self):
 
     """Are decollapsed outputs skipped when non-collapsed SAM files are supplied?"""
 
-    def test_SAM_reader_no_decollapse_non_collapsed_SAM_files(self):
+    def test_AlignmentReader_no_decollapse_non_collapsed_SAM_files(self):
         stdout_capture = io.StringIO()
-        with patch.object(SAM_reader, "_write_decollapsed_sam") as write_sam, \
-                patch.object(SAM_reader, "_write_header_for_decollapsed_sam") as write_header:
+        with patch.object(AlignmentReader, "_write_decollapsed_sam") as write_sam, \
+                patch.object(AlignmentReader, "_write_header_for_decollapsed_sam") as write_header:
             with contextlib.redirect_stderr(stdout_capture):
                 reader = AlignmentReader(decollapse=True)
                 records = reader.bundle_multi_alignments(f"{resources}/sam/non-collapsed.sam")
diff --git a/tests/unit_tests_validation.py b/tests/unit_tests_validation.py
index a17dbeef..807f4c28 100644
--- a/tests/unit_tests_validation.py
+++ b/tests/unit_tests_validation.py
@@ -237,7 +237,7 @@ def test_chrom_heuristics_report_output(self):
         exp_warnings = '\n'.join([
             "GFF files and sequence files might not contain the same chromosome identifiers.",
             "This is determined from a subset of each sequence file, so false positives may be reported.",
-            "\t" + f"{validator.targets['sam files']}: ",
+            "\t" + f"{validator.targets['alignment files']}: ",
             "\t\t" + "sam1: ",
             "\t\t\t" + f"Chromosomes sampled: {', '.join(sorted(suspect_files['sam1']))}",
             "\t\t" + "sam2: ",
@@ -304,7 +304,7 @@ def test_chrom_heuristics_runtime(self):
 class SamSqValidatorTest(unittest.TestCase):
     @classmethod
     def setUpClass(self):
-        self.syntax_header = "Every SAM file must have complete @SQ headers with SN and LN\n" \
+        self.syntax_header = "Every alignment file must have complete @SQ headers with SN and LN\n" \
                              "fields when performing sequence-based counting.\n"
 
         self.identifier_header = "Sequence identifiers must be unique and have consistent length definitions.\n"
@@ -333,12 +333,12 @@ def test_read_sq_headers(self):
             ],
         }
 
-        validator = SamSqValidator([sam_file])
+        validator = AlignmentSqValidator([sam_file])
         validator.read_sq_headers()
 
         self.assertDictEqual(validator.sq_headers, expected)
 
-    """Does SamSqValidator correctly identify missing @SQ headers?"""
+    """Does AlignmentSqValidator correctly identify missing @SQ headers?"""
 
     def test_missing_sq_headers(self):
         mock_files = ['sam1', 'sam2']
@@ -349,18 +349,18 @@ def test_missing_sq_headers(self):
 
         expected = '\n'.join([
             self.syntax_header,
-            "\t" + f"{SamSqValidator.targets['missing sq']}: ",
+            "\t" + f"{AlignmentSqValidator.targets['missing sq']}: ",
             "\t\t" + mock_files[1]
         ])
 
-        validator = SamSqValidator(mock_files)
+        validator = AlignmentSqValidator(mock_files)
         validator.sq_headers = mock_sam_headers
         validator.validate_sq_headers()
 
         self.assertListEqual(validator.report.errors, [expected])
         self.assertListEqual(validator.report.warnings, [])
 
-    """Does SamSqValidator correctly identify incomplete @SQ headers?"""
+    """Does AlignmentSqValidator correctly identify incomplete @SQ headers?"""
 
     def test_incomplete_sq_headers(self):
         mock_files = ['sam1', 'sam2', 'sam3']
@@ -372,19 +372,19 @@ def test_incomplete_sq_headers(self):
 
         expected = '\n'.join([
             self.syntax_header,
-            "\t" + f"{SamSqValidator.targets['incomplete sq']}: ",
+            "\t" + f"{AlignmentSqValidator.targets['incomplete sq']}: ",
             "\t\t" + mock_files[1],
             "\t\t" + mock_files[2],
         ])
 
-        validator = SamSqValidator(mock_files)
+        validator = AlignmentSqValidator(mock_files)
         validator.sq_headers = mock_sam_headers
         validator.validate_sq_headers()
 
         self.assertListEqual(validator.report.errors, [expected])
         self.assertListEqual(validator.report.warnings, [])
 
-    """Does SamSqValidator correctly identify duplicate identifiers?"""
+    """Does AlignmentSqValidator correctly identify duplicate identifiers?"""
 
     def test_duplicate_identifiers(self):
         mock_files = ['sam1', 'sam2', 'sam3']
@@ -396,18 +396,18 @@ def test_duplicate_identifiers(self):
 
         expected = '\n'.join([
             self.identifier_header,
-            "\t" + f"{SamSqValidator.targets['intra sq']}: ",
+            "\t" + f"{AlignmentSqValidator.targets['intra sq']}: ",
             "\t\t" + mock_files[2],
         ])
 
-        validator = SamSqValidator(mock_files)
+        validator = AlignmentSqValidator(mock_files)
         validator.sq_headers = mock_sam_headers
         validator.validate_sq_headers()
 
         self.assertListEqual(validator.report.errors, [expected])
         self.assertListEqual(validator.report.warnings, [])
 
-    """Does SamSqValidator correctly identify identifiers with inconsistent length definitions?"""
+    """Does AlignmentSqValidator correctly identify identifiers with inconsistent length definitions?"""
 
     def test_inconsistent_identifier_length(self):
         mock_files = ['sam1', 'sam2', 'sam3']
@@ -419,12 +419,12 @@ def test_inconsistent_identifier_length(self):
 
         expected = '\n'.join([
             self.identifier_header,
-            "\t" + f"{SamSqValidator.targets['inter sq']}: ",
+            "\t" + f"{AlignmentSqValidator.targets['inter sq']}: ",
             "\t\t" + 'chr1',
             "\t\t" + 'chr2',
         ])
 
-        validator = SamSqValidator(mock_files)
+        validator = AlignmentSqValidator(mock_files)
         validator.sq_headers = mock_sam_headers
         validator.validate_sq_headers()
 
diff --git a/tiny/rna/counter/counter.py b/tiny/rna/counter/counter.py
index de4d71bd..5e577cdc 100644
--- a/tiny/rna/counter/counter.py
+++ b/tiny/rna/counter/counter.py
@@ -154,7 +154,7 @@ def load_config(features_csv: str, in_pipeline: bool) -> List[dict]:
 
 
 def load_references(paths: PathsFile, libraries: List[dict], rules: List[dict], prefs) -> ReferenceBase:
-    """Determines the reference source (GFF or SAM @SQ headers) and constructs the appropriate object
+    """Determines the reference source (GFF or alignment @SQ headers) and constructs the appropriate object
 
     Args:
         paths: a PathsFile object that optionally contains a `gff_files` configuration
@@ -191,7 +191,7 @@ def map_and_reduce(libraries, paths, prefs):
     # MergedStatsManager handles final output files, regardless of multiprocessing status
     summary = MergedStatsManager(Features, paths['features_csv'], prefs)
 
-    # Use a multiprocessing pool if multiple sam files were provided
+    # Use a multiprocessing pool if multiple alignment files were provided
     if len(libraries) > 1:
         mp.set_start_method("fork")
         with mp.Pool(len(libraries)) as pool:
diff --git a/tiny/rna/counter/features.py b/tiny/rna/counter/features.py
index 1ce7be63..8eca57dc 100644
--- a/tiny/rna/counter/features.py
+++ b/tiny/rna/counter/features.py
@@ -51,7 +51,7 @@ def count_reads(self, library: dict):
         read_seq = self.alignment_reader.bundle_multi_alignments(library["File"])
         self.stats.assign_library(library)
 
-        # For each sequence in the sam file...
+        # For each sequence in the alignment file...
         for bundle, read_count in read_seq:
             bstat = self.stats.count_bundle(bundle, read_count)
 
diff --git a/tiny/rna/counter/validation.py b/tiny/rna/counter/validation.py
index 005cc5e5..f53a4049 100644
--- a/tiny/rna/counter/validation.py
+++ b/tiny/rna/counter/validation.py
@@ -171,10 +171,10 @@ def chroms_shared_with_genomes(self, genome_fastas):
     def alignment_chroms_mismatch_heuristic(self, sam_files: List[str], subset_size=50000) -> Dict[str, set]:
         """Since alignment files can be very large, we only check that there's at least one shared
         chromosome identifier and only the first subset_size lines are read from each file. The
-        returned dictionary contains only the SAM files whose sampled chromosome set failed to
+        returned dictionary contains only the alignment files whose sampled chromosome set failed to
         intersect with the chromosomes parsed from GFF files.
         Returns:
-            a dictionary of {SAM filename: SAM chromosomes sampled}"""
+            a dictionary of {alignment filename: alignment chromosomes sampled}"""
 
         files_wo_overlap = {}
 
@@ -296,7 +296,7 @@ def get_ambiguous_lengths(self) -> List[str]:
         return bad_seqs
 
     def get_duplicate_identifiers(self) -> Dict[str, List[str]]:
-        """Returns a dictionary of SAM files that contain duplicate sequence identifiers"""
+        """Returns a dictionary of alignment files that contain duplicate sequence identifiers"""
 
         bad_files = {}
         for file in self.alignment_files:
@@ -308,13 +308,13 @@ def get_duplicate_identifiers(self) -> Dict[str, List[str]]:
         return bad_files
 
     def get_incomplete_sq_headers(self) -> List[str]:
-        """Returns a list of SAM files that have incomplete @SQ headers"""
+        """Returns a list of alignment files that have incomplete @SQ headers"""
 
         return [file for file, sqs in self.sq_headers.items()
                 if not all("SN" in sq and "LN" in sq for sq in sqs)]
 
     def get_missing_headers(self) -> List[str]:
-        """Returns a list of SAM files that lack @SQ headers"""
+        """Returns a list of alignment files that lack @SQ headers"""
 
         return [file for file, sqs in self.sq_headers.items()
                 if len(sqs) == 0]

From 1c112f8aba45d383a3bf03a368272cdfe0780943 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sun, 30 Apr 2023 21:34:30 -0700
Subject: [PATCH 13/18] Updating unit tests in accordance with changes to both
 the SamplesSheet class and the AlignmentReader class

---
 tests/unit_tests_configuration.py | 170 +++++++++++++++++++++++-------
 tests/unit_tests_counter.py       |  28 ++---
 tests/unit_tests_features.py      |   1 +
 tests/unit_tests_hts_parsing.py   | 129 ++++++++++++++++++++++-
 4 files changed, 263 insertions(+), 65 deletions(-)

diff --git a/tests/unit_tests_configuration.py b/tests/unit_tests_configuration.py
index b9da1a9a..a10c6f38 100644
--- a/tests/unit_tests_configuration.py
+++ b/tests/unit_tests_configuration.py
@@ -10,6 +10,14 @@
 from tiny.rna.util import r_reserved_keywords
 
 
+def patch_isfile():
+    return patch('tiny.rna.configuration.os.path.isfile', return_value=True)
+
+
+def patch_open(read_data):
+    return patch('tiny.rna.configuration.open', mock_open(read_data=read_data))
+
+
 class BowtieIndexesTest(unittest.TestCase):
     @classmethod
     def setUpClass(self):
@@ -150,37 +158,60 @@ def test_verify_bowtie_build_outputs(self):
 
 class SamplesSheetTest(unittest.TestCase):
 
+    @classmethod
+    def setUpClass(cls):
+        cls.contexts = ["Pipeline Start", "Pipeline Step", "Standalone Run"]
+
     """Does SamplesSheet catch multi-assignment of control condition?"""
 
     def test_validate_control_group(self):
-        sheet = csv_factory("samples.csv", [
-            {'File': '1.fastq', 'Group': 'G1', 'Replicate': '1', 'Control': True, 'Normalization': ''},  # Good
-            {'File': '2.fastq', 'Group': 'G1', 'Replicate': '2', 'Control': True, 'Normalization': ''},  # Good
-            {'File': '3.fastq', 'Group': 'G2', 'Replicate': '1', 'Control': True, 'Normalization': ''}   # Bad
-        ])                                                                 # ^^^
-
-        exp_contains = r".*(multiple control conditions).*"
-        with self.assertRaisesRegex(AssertionError, exp_contains), \
-                patch('tiny.rna.configuration.open', mock_open(read_data=sheet)), \
-                patch('tiny.rna.configuration.os.path.isfile', return_value=True):
-            SamplesSheet('mock_filename')
+        rows = [
+            {'File': '1', 'Group': 'G1', 'Replicate': '1', 'Control': True, 'Normalization': ''},  # Good
+            {'File': '2', 'Group': 'G1', 'Replicate': '2', 'Control': True, 'Normalization': ''},  # Good
+            {'File': '3', 'Group': 'G2', 'Replicate': '1', 'Control': True, 'Normalization': ''}   # Bad
+        ]                        # ^^^^                               ^^^^
+
+        start_sheet = csv_factory("samples.csv", [dict(r, File=f"{i}.fastq") for i, r in enumerate(rows)])
+        step_sheet =  csv_factory("samples.csv", [dict(r, File=f"{i}.sam")   for i, r in enumerate(rows)])
+        run_sheet = step_sheet
+
+        with patch_isfile():
+            # Control condition should not be evaluated in Pipeline Step or Standalone Run context
+            with patch_open(step_sheet):
+                SamplesSheet('mock_filename', context="Pipeline Step")
+            with patch_open(run_sheet):
+                SamplesSheet('mock_filename', context="Standalone Run")
+
+            # Control condition should be evaluated in Pipeline Start context
+            exp_contains = r".*(multiple control conditions).*"
+            with self.assertRaisesRegex(AssertionError, exp_contains), patch_open(start_sheet):
+                SamplesSheet('mock_filename', context="Pipeline Start")
 
     """Does SamplesSheet catch duplicate entries for the same group and rep?"""
+
     def test_validate_group_rep(self):
-        sheet = csv_factory("samples.csv", [
-            {'File': '1.fastq', 'Group': 'G1', 'Replicate': '1', 'Control': True, 'Normalization': ''},  # Good
-            {'File': '2.fastq', 'Group': 'G1', 'Replicate': '2', 'Control': True, 'Normalization': ''},  # Good
-            {'File': '3.fastq', 'Group': 'G1', 'Replicate': '2', 'Control': True, 'Normalization': ''}   # Bad
-        ])                             # ^^^                ^^^
+        rows = [
+            {'File': '1', 'Group': 'G1', 'Replicate': '1', 'Control': True, 'Normalization': ''},  # Good
+            {'File': '2', 'Group': 'G1', 'Replicate': '2', 'Control': True, 'Normalization': ''},  # Good
+            {'File': '3', 'Group': 'G1', 'Replicate': '2', 'Control': True, 'Normalization': ''}   # Bad
+        ]                        # ^^^^               ^^^
+
+        start_sheet = csv_factory("samples.csv", [dict(r, File=f"{i}.fastq") for i, r in enumerate(rows)])
+        step_sheet =  csv_factory("samples.csv", [dict(r, File=f"{i}.sam")   for i, r in enumerate(rows)])
+        run_sheet = step_sheet
+        contexts = list(zip(self.contexts, [start_sheet, step_sheet, run_sheet]))
 
         exp_contains = r".*(same group and replicate).*"
-        with self.assertRaisesRegex(AssertionError, exp_contains), \
-                patch('tiny.rna.configuration.open', mock_open(read_data=sheet)), \
-                patch('tiny.rna.configuration.os.path.isfile', return_value=True):
-            SamplesSheet('mock_filename')
 
-    """Does SamplesSheet catch fastq files that don't exist, have a bad file extension, or are listed more than once?"""
+        # Validation should take place in all contexts
+        for context, sheet in contexts:
+            with self.assertRaisesRegex(AssertionError, exp_contains), patch_open(sheet), patch_isfile():
+                SamplesSheet('mock_filename', context=context)
+
+    """Does SamplesSheet catch bad fastq entries in Pipeline Start context?"""
+
     def test_validate_fastq_filepath(self):
+        context = "Pipeline Start"
         csv_rows = [
             {'File': '1.fastq', 'Group': 'G1', 'Replicate': '1', 'Control': True, 'Normalization': ''},  # Good
             {'File': '1.fastq', 'Group': 'G1', 'Replicate': '2', 'Control': True, 'Normalization': ''},  # Bad
@@ -189,28 +220,50 @@ def test_validate_fastq_filepath(self):
         sheet = csv_factory("samples.csv", csv_rows)
 
         # File doesn't exist
-        exp_contains = r".*(was not found).*"
-        with self.assertRaisesRegex(AssertionError, exp_contains), \
-                patch('tiny.rna.configuration.open', mock_open(read_data=sheet)):
-            SamplesSheet('mock_filename')
+        exp_contains = r".*(fastq file on row 1 of mock_filename was not found).*"
+        with self.assertRaisesRegex(AssertionError, exp_contains), patch_open(sheet):
+            SamplesSheet('mock_filename', context=context)
 
         # Duplicate filename
-        exp_contains = r".*(listed more than once).*"
-        with self.assertRaisesRegex(AssertionError, exp_contains), \
-                patch('tiny.rna.configuration.open', mock_open(read_data=sheet)), \
-                patch('tiny.rna.configuration.os.path.isfile', return_value=True):
-            SamplesSheet('mock_filename')
+        exp_contains = r".*(listed more than once).*\(row 2\)"
+        with self.assertRaisesRegex(AssertionError, exp_contains), patch_open(sheet), patch_isfile():
+            SamplesSheet('mock_filename', context=context)
 
         # Bad file extension
-        exp_contains = r".*(\.fastq\(\.gz\) extension).*"
-        csv_rows.pop(0)
+        sheet = csv_factory("samples.csv", csv_rows[1:])  # remove duplicate entry
+        exp_contains = r".*(\.fastq\(\.gz\) extension).*\(row 2\)"
+        with self.assertRaisesRegex(AssertionError, exp_contains), patch_open(sheet), patch_isfile():
+            SamplesSheet('mock_filename', context=context)
+
+    """Does SamplesSheet catch bad alignment file entries in Standalone Run context?"""
+
+    def test_validate_alignments_filepath(self):
+        context = "Standalone Run"
+        csv_rows = [
+            {'File': '1.sam', 'Group': 'G1', 'Replicate': '1', 'Control': True, 'Normalization': ''},  # Good
+            {'File': '1.sam', 'Group': 'G1', 'Replicate': '2', 'Control': True, 'Normalization': ''},  # Good
+            {'File': '1.bam', 'Group': 'G1', 'Replicate': '3', 'Control': True, 'Normalization': ''},  # Bad
+            {'File': '2.xyz', 'Group': 'G2', 'Replicate': '1', 'Control': True, 'Normalization': ''}   # Bad
+        ]          # ^^^^^^^
         sheet = csv_factory("samples.csv", csv_rows)
-        with self.assertRaisesRegex(AssertionError, exp_contains), \
-                patch('tiny.rna.configuration.open', mock_open(read_data=sheet)), \
-                patch('tiny.rna.configuration.os.path.isfile', return_value=True):
-            SamplesSheet('mock_filename')
 
-    """Does validate_r_safe_sample_groups detect group names that will cause namespace collisions in R?"""
+        # File doesn't exist
+        exp_contains = r".*(file on row 1 of mock_filename was not found).*"
+        with self.assertRaisesRegex(AssertionError, exp_contains), patch_open(sheet):
+            SamplesSheet('mock_filename', context=context)
+
+        # Duplicate filename
+        exp_contains = r".*(listed more than once).*\(row 2\)"
+        with self.assertRaisesRegex(AssertionError, exp_contains), patch_open(sheet), patch_isfile():
+            SamplesSheet('mock_filename', context=context)
+
+        # Bad file extension
+        sheet = csv_factory("samples.csv", csv_rows[1:])  # remove duplicate entry
+        exp_contains = r".*(\.sam or \.bam extension).*\(row 3\)"
+        with self.assertRaisesRegex(AssertionError, exp_contains), patch_open(sheet), patch_isfile():
+            SamplesSheet('mock_filename', context=context)
+
+    """Does validate_r_safe_sample_groups() detect group names that will cause namespace collisions in R?"""
 
     def test_validate_r_safe_sample_groups(self):
         non_alphanum_chars = [bad.join(('a', 'b')) for bad in "~!@#$%^&*()+-=`<>?/,:;\"'[]{}\| \t\n\r\f\v"]
@@ -227,6 +280,45 @@ def test_validate_r_safe_sample_groups(self):
             with self.assertRaisesRegex(AssertionError, msg):
                 SamplesSheet.validate_r_safe_sample_groups(dict.fromkeys(bad))
 
+    """Does validate_normalization() do what it should?"""
+
+    def test_validate_normalization(self):
+        good = [
+            {'File': '1.fastq', 'Group': 'G1', 'Replicate': '1', 'Control': False, 'Normalization': 'rpm'},      # Good
+            {'File': '2.fastq', 'Group': 'G1', 'Replicate': '2', 'Control': False, 'Normalization': 'RPM'},      # Good
+            {'File': '3.fastq', 'Group': 'G1', 'Replicate': '3', 'Control': False, 'Normalization': ' RPM '},    # Good
+            {'File': '4.fasta', 'Group': 'G2', 'Replicate': '1', 'Control': False, 'Normalization': '  1'},      # Good
+            {'File': '5.fasta', 'Group': 'G2', 'Replicate': '2', 'Control': False, 'Normalization': '1.1'},      # Good
+            {'File': '6.fasta', 'Group': 'G2', 'Replicate': '3', 'Control': False, 'Normalization': ''},         # Good
+        ]                                                                                          # ^^^^
+
+        start_sheet = csv_factory("samples.csv", [dict(r, File=f"{i}.fastq") for i, r in enumerate(good)])
+        step_sheet = csv_factory("samples.csv", [dict(r, File=f"{i}.sam") for i, r in enumerate(good)])
+        run_sheet = step_sheet
+
+        # These SHOULD NOT throw an error
+        for context, sheet in zip(self.contexts, [start_sheet, step_sheet, run_sheet]):
+            with patch_open(sheet), patch_isfile():
+                SamplesSheet("mock_filename", context=context)
+
+        bad = [
+            {'File': '1.fastq', 'Group': 'G1', 'Replicate': '1', 'Control': False, 'Normalization': 'abc'},      # Bad
+            {'File': '2.fastq', 'Group': 'G1', 'Replicate': '2', 'Control': False, 'Normalization': '123.rpm'},  # Bad
+            {'File': '3.fastq', 'Group': 'G1', 'Replicate': '3', 'Control': False, 'Normalization': '.'},        # Bad
+            {'File': '1.fastq', 'Group': 'G2', 'Replicate': '1', 'Control': False, 'Normalization': '_'},        # Bad
+        ]
+
+        start_sheet = csv_factory("samples.csv", [dict(r, File=f"{i}.fastq") for i, r in enumerate(bad)])
+        step_sheet = csv_factory("samples.csv", [dict(r, File=f"{i}.sam") for i, r in enumerate(bad)])
+        run_sheet = step_sheet
+
+        exp_contains = r".*(Invalid normalization value).*"
+
+        # These SHOULD throw an error
+        for context, sheet in zip(self.contexts, [start_sheet, step_sheet, run_sheet]):
+            with self.assertRaisesRegex(AssertionError, exp_contains), patch_open(sheet), patch_isfile():
+                SamplesSheet("mock_filename", context=context)
+
 
 class PathsFileTest(unittest.TestCase):
 
@@ -344,11 +436,11 @@ def test_backward_incompatibility(self):
         with self.assertRaisesRegex(AssertionError, r".*(check the release notes).*"):
             config.check_backward_compatibility()
 
-    """Does PathsFile map all paths to the working directory when in_pipeline=True?"""
+    """Does PathsFile map all paths to the working directory when context=True?"""
 
     def test_pipeline_mapping(self):
         # There are entries in the template Paths File that will cause FileNotFound
-        #  when in_pipeline=True (they're not in the template directory). Since
+        #  when context=True (they're not in the template directory). Since
         #  file existence isn't relevant for this test, we patch that out.
         with patch('tiny.rna.configuration.os.path.isfile', return_value=True):
             config = make_paths_file(in_pipeline=True)
diff --git a/tests/unit_tests_counter.py b/tests/unit_tests_counter.py
index ad83ee01..9702e2b0 100644
--- a/tests/unit_tests_counter.py
+++ b/tests/unit_tests_counter.py
@@ -7,7 +7,7 @@
 
 import unit_test_helpers as helpers
 import tiny.rna.counter.counter as counter
-from tiny.rna.util import from_here
+from tiny.rna.configuration import ConfigBase
 
 resources = "./testdata/counter"
 
@@ -96,13 +96,13 @@ def get_loaded_samples_row(self, row, exp_file):
     def test_load_samples_single_cmd(self):
         mock_samp_sheet_path = '/dev/null'
         inp_file = "test.fastq"
-        exp_file = from_here(mock_samp_sheet_path, "test_aligned_seqs.sam")
+        exp_file = ConfigBase.joinpath(mock_samp_sheet_path, "test_aligned_seqs.sam")
 
         row = dict(self.csv_samp_row_dict, **{'File': inp_file})
         csv = self.csv("samples.csv", [row])
 
         with patch('tiny.rna.configuration.open', mock_open(read_data=csv)):
-            inputs_step = counter.load_samples(mock_samp_sheet_path, is_pipeline=False)
+            inputs_step = counter.load_samples(mock_samp_sheet_path, in_pipeline=False)
 
         expected_result = self.get_loaded_samples_row(row, exp_file)
         self.assertEqual(inputs_step, expected_result)
@@ -118,7 +118,7 @@ def test_load_samples_single_pipeline(self):
         csv = self.csv("samples.csv", [row])
 
         with patch('tiny.rna.configuration.open', mock_open(read_data=csv)):
-            inputs_pipeline = counter.load_samples(mock_samp_sheet_path, is_pipeline=True)
+            inputs_pipeline = counter.load_samples(mock_samp_sheet_path, in_pipeline=True)
 
         expected_result = self.get_loaded_samples_row(row, exp_file)
         self.assertEqual(inputs_pipeline, expected_result)
@@ -144,25 +144,11 @@ def test_load_samples_sam(self):
 
         with patch('tiny.rna.configuration.open', mock_open(read_data=csv)):
             dummy_file = '/dev/null'
-            inputs = counter.load_samples(dummy_file, is_pipeline=False)
+            inputs = counter.load_samples(dummy_file, in_pipeline=False)
 
         expected_result = self.get_loaded_samples_row(row, sam_filename)
         self.assertEqual(inputs, expected_result)
 
-    """Does load_samples throw ValueError if a non-absolute path to a SAM file is provided?"""
-
-    def test_load_samples_nonabs_path(self):
-        bad = "./dne.sam"
-        row = dict(self.csv_samp_row_dict, **{'File': bad})
-        csv = self.csv("samples.csv", [row])
-
-        expected_error = "The following file must be expressed as an absolute path:\n" + bad
-
-        with patch('tiny.rna.configuration.open', mock_open(read_data=csv)):
-            with self.assertRaisesRegex(ValueError, expected_error):
-                dummy_file = '/dev/null'
-                counter.load_samples(dummy_file, False)
-
     """Does load_samples throw ValueError if sample filename does not have a .fastq or .sam extension?"""
 
     def test_load_samples_bad_extension(self):
@@ -187,7 +173,7 @@ def test_load_config_single_cmd(self):
 
         with patch('tiny.rna.configuration.open', mock_open(read_data=csv)):
             dummy_file = '/dev/null'
-            ruleset = counter.load_config(dummy_file, is_pipeline=False)
+            ruleset = counter.load_config(dummy_file, in_pipeline=False)
 
         expected_ruleset = self.parsed_feat_rule
         self.assertEqual(ruleset, expected_ruleset)
@@ -201,7 +187,7 @@ def test_load_config_single_pipeline(self):
 
         with patch('tiny.rna.configuration.open', mock_open(read_data=csv)):
             dummy_file = '/dev/null'
-            ruleset = counter.load_config(dummy_file, is_pipeline=True)
+            ruleset = counter.load_config(dummy_file, in_pipeline=True)
 
         expected_ruleset = self.parsed_feat_rule
         self.assertEqual(ruleset, expected_ruleset)
diff --git a/tests/unit_tests_features.py b/tests/unit_tests_features.py
index ed7ef9e9..9f2cd3e0 100644
--- a/tests/unit_tests_features.py
+++ b/tests/unit_tests_features.py
@@ -7,6 +7,7 @@
 
 resources = "./testdata/counter"
 
+
 class FeaturesTests(unittest.TestCase):
 
     @classmethod
diff --git a/tests/unit_tests_hts_parsing.py b/tests/unit_tests_hts_parsing.py
index b8898497..6f06c066 100644
--- a/tests/unit_tests_hts_parsing.py
+++ b/tests/unit_tests_hts_parsing.py
@@ -28,6 +28,7 @@ def setUpClass(self):
         self.sam_file = f"{resources}/sam/identity_choice_test.sam"
         self.short_sam_file = f"{resources}/sam/single.sam"
         self.short_sam = helpers.read(self.short_sam_file)
+        self.short_bam_file = f"{resources}/bam/single.bam"
 
     @staticmethod
     def exhaust_iterator(it):
@@ -50,7 +51,22 @@ def test_AlignmentReader_single_sam(self):
         self.assertEqual(sam_record['Length'], 21)
         self.assertEqual(sam_record['nt5end'], 'G')
 
-    """Does our custom SAM parser produce the same pertinent info as HTSeq's BAM_reader?
+    """Did AlignmentReader correctly skip header values and parse all pertinent info from a single record BAM file?"""
+
+    def test_AlignmentReader_single_bam(self):
+        bam_bundle, read_count = next(AlignmentReader().bundle_multi_alignments(self.short_bam_file))
+        bam_record = bam_bundle[0]
+
+        self.assertEqual(bam_record['Chrom'], "I")
+        self.assertEqual(bam_record['Start'], 15064569)
+        self.assertEqual(bam_record['End'], 15064590)
+        self.assertEqual(bam_record['Strand'], False)
+        self.assertEqual(bam_record['Name'], "0_count=5")
+        self.assertEqual(bam_record['Seq'], "CAAGACAGAGCTTCACCGTTC")
+        self.assertEqual(bam_record['Length'], 21)
+        self.assertEqual(bam_record['nt5end'], 'G')
+
+    """Does our AlignmentReader produce the same pertinent info from a SAM file as HTSeq's BAM_reader?
 
     A note on SAM files: reads are always stored 5' to 3', so antisense reads are actually
     recorded in reverse complement. HTSeq automatically performs this conversion, but we
@@ -77,7 +93,28 @@ def test_sam_parser_comparison(self):
                 else:
                     self.assertEqual(our['Seq'], their.read.seq.decode())
 
-    """Does SAM_reader._get_decollapsed_filename() create an appropriate filename?"""
+    """Does our AlignmentReader produce the same pertinent info from a BAM file as HTSeq's BAM_reader?"""
+
+    def test_bam_parser_comparison(self):
+        file = f"{resources}/bam/Lib304_test.bam"
+        ours = AlignmentReader().bundle_multi_alignments(file)
+        theirs = HTSeq.bundle_multiple_alignments(HTSeq.BAM_Reader(file))
+
+        for (our_bundle, _), their_bundle in zip(ours, theirs):
+            self.assertEqual(len(our_bundle), len(their_bundle))
+            for our, their in zip(our_bundle, their_bundle):
+                self.assertEqual(our['Chrom'], their.iv.chrom)
+                self.assertEqual(our['Start'], their.iv.start)
+                self.assertEqual(our['End'], their.iv.end)
+                self.assertEqual(our['Name'], their.read.name)
+                self.assertEqual(our['nt5end'], chr(their.read.seq[0]))  # See note above
+                self.assertEqual(our['Strand'], helpers.strand_to_bool(their.iv.strand))
+                if our['Strand'] is False:  # See note above
+                    self.assertEqual(our['Seq'][::-1].translate(helpers.complement), their.read.seq.decode())
+                else:
+                    self.assertEqual(our['Seq'], their.read.seq.decode())
+
+    """Does AlignmentReader._get_decollapsed_filename() create an appropriate filename?"""
 
     def test_AlignmentReader_get_decollapsed_filename(self):
         reader = AlignmentReader()
@@ -106,21 +143,28 @@ def test_AlignmentReader_new_bundle(self):
     def test_AlignmentReader_gather_metadata(self):
         reader = AlignmentReader(decollapse=True)
         reader._decollapsed_filename = "mock_outfile_name.sam"
-
+        reader._assign_library(self.short_sam_file)
         sam_in = pysam.AlignmentFile(self.short_sam_file)
+
         with patch('builtins.open', mock_open()) as sam_out:
             reader._gather_metadata(sam_in)
 
         expected_writelines = [
             call('mock_outfile_name.sam', 'w'),
             call().__enter__(),
-            call().write("@SQ\tSN:I\tLN:21\n"),
+            call().write("@HD\tSO:unsorted\n@SQ\tSN:I\tLN:21\n@PG\tID:bowtie\n"),
             call().__exit__(None, None, None)
         ]
 
+        expected_header_dict = {
+            'HD': {'SO': 'unsorted'},
+            'SQ': [{'LN': 21, 'SN': 'I'}],
+            'PG': [{'ID': 'bowtie'}]
+        }
+
         sam_out.assert_has_calls(expected_writelines)
         self.assertEqual(reader.collapser_type, 'tiny-collapse')
-        self.assertDictEqual(reader._header_dict, {'SQ': [{'SN': "I", 'LN': 21}]})
+        self.assertDictEqual(reader._header_dict, expected_header_dict)
         self.assertEqual(reader.references, ('I',))
         self.assertTrue(reader.has_nm)
 
@@ -194,6 +238,81 @@ def test_AlignmentReader_no_decollapse_non_collapsed_SAM_files(self):
                          "Alignments do not appear to be derived from a supported collapser input. "
                          "Decollapsed SAM files will therefore not be produced.\n")
 
+    """Is incompatible alignment file ordering correctly identified from @HD header values?"""
+
+    def test_AlignmentReader_incompatible_HD_header(self):
+        # Valid SO values: ["unknown", "unsorted", "queryname", "coordinate"]
+        # Valid GO values: ["none", "query", "reference"]
+        
+        strictly_compatible = [
+            {'HD': {'SO': "queryname"}},
+            {'HD': {'GO': "query"}},
+        ]
+
+        # Should not throw error
+        for header in strictly_compatible:
+            reader = AlignmentReader()
+            reader._header_dict = header
+            reader._assign_library("mock_infile.sam")
+            reader._check_for_incompatible_order()
+
+        strictly_incompatible = [
+            ({'HD': {'SO': "coordinate"}}, "by coordinate"),
+            ({'HD': {'GO': "reference"}},  "by reference"),
+            ({'HD': {}},                   "sorting/grouping couldn't be determined"),
+            ({},                           "sorting/grouping couldn't be determined"),
+        ]
+
+        # Should throw error
+        for header, message in strictly_incompatible:
+            reader = AlignmentReader()
+            reader._header_dict = header
+            reader._assign_library("mock_infile.sam")
+            with self.assertRaisesRegex(ValueError, message):
+                reader._check_for_incompatible_order()
+
+    """Is incompatible alignment file ordering correctly identified from @PG header values?"""
+
+    def test_AlignmentReader_incompatible_PG_header(self):
+        # SO = ["unknown", "unsorted", "queryname", "coordinate"]
+        # GO = ["none", "query", "reference"]
+
+        self.assertEqual(AlignmentReader.compatible_unordered, ("bowtie", "bowtie2", "star"))
+        SO_unordered = [{'SO': "unknown"}, {'SO': "unsorted"}]
+        GO_unordered = [{'GO': "none"}]
+        compatible = [
+            {'PG': [{'ID': tool}], 'HD': un_so, 'GO': un_go}
+            for tool in AlignmentReader.compatible_unordered
+            for un_so in SO_unordered
+            for un_go in GO_unordered
+        ]
+
+        # Only the last reported PG ID matters
+        multiple_tools = [{'ID': "INCOMPATIBLE"}, {'ID': "bowtie"}]
+        compatible += [{'PG': multiple_tools, 'HD': {'SO': "unsorted"}}]
+
+        # Should not throw error
+        for header in compatible:
+            reader = AlignmentReader()
+            reader._header_dict = header
+            reader._assign_library("mock_infile.sam")
+            reader._check_for_incompatible_order()
+
+        incompatible = [
+            {'PG': [{'ID': "INCOMPATIBLE"}], 'HD': un_so, 'GO': un_go}
+            for un_so in SO_unordered
+            for un_go in GO_unordered
+        ]
+
+        # Should throw error
+        expected_error = "adjacency couldn't be determined"
+        for header in incompatible:
+            reader = AlignmentReader()
+            reader._header_dict = header
+            reader._assign_library("mock_infile.sam")
+            with self.assertRaisesRegex(ValueError, expected_error):
+                reader._check_for_incompatible_order()
+
 
 class ReferenceFeaturesTests(unittest.TestCase):
 

From deb2425ee71f82c05bdd71109b6fd3176b0bf4c0 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sun, 30 Apr 2023 21:40:41 -0700
Subject: [PATCH 14/18] Minor docstring update and lifting state for compatible
 alignment tools that produce "unordered" outputs

---
 tiny/rna/counter/hts_parsing.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tiny/rna/counter/hts_parsing.py b/tiny/rna/counter/hts_parsing.py
index b1311a0e..81b1b789 100644
--- a/tiny/rna/counter/hts_parsing.py
+++ b/tiny/rna/counter/hts_parsing.py
@@ -28,7 +28,9 @@
 
 
 class AlignmentReader:
-    """A minimal SAM/BAM reader that bundles multiple-alignments and only parses data relevant to the workflow."""
+    """A SAM/BAM wrapper for Pysam that bundles multiple-alignments and only retrieves data relevant to the workflow."""
+
+    compatible_unordered = ("bowtie", "bowtie2", "star")  # unordered files from these tools don't require sorting
 
     def __init__(self, **prefs):
         self.decollapse = prefs.get("decollapse", False)
@@ -112,7 +114,8 @@ def _check_for_incompatible_order(self):
 
         Headers that report unsorted/unknown sorting order are likely still compatible, but
         since the SAM/BAM standard doesn't address alignment adjacency, we have to assume
-        that it is implementation dependent. Therefore, we check the last reported @PG
+        that it is implementation dependent. Therefore, if all other checks for strict
+        compatibility/incompatibility pass, we check the last reported @PG
         header against a short (incomplete) list of alignment tools that are known to
         follow the adjacency convention."""
 
@@ -141,7 +144,7 @@ def _check_for_incompatible_order(self):
         # to handle the alignments is one that produces adjacent alignments by default
         pg_header = self._header_dict.get('PG', [{}])
         last_prog = pg_header[-1].get('ID', "")
-        if last_prog.lower() not in ("bowtie", "bowtie2", "star"):
+        if last_prog.lower() not in self.compatible_unordered:
             raise ValueError(error.format(reason="multi-mapping adjacency couldn't be determined"))
 
     def _determine_collapser_type(self, qname: str) -> None:

From c309ffa376a64563b471f29382cebe05fa367aa5 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Sun, 30 Apr 2023 21:42:23 -0700
Subject: [PATCH 15/18] Adding relevant updates to tiny-count documentation and
 adding test files that were missed earlier

---
 doc/tiny-count.md                          |  11 ++++++++---
 tests/testdata/counter/bam/Lib304_test.bam | Bin 0 -> 22386 bytes
 tests/testdata/counter/bam/single.bam      | Bin 0 -> 217 bytes
 3 files changed, 8 insertions(+), 3 deletions(-)
 create mode 100644 tests/testdata/counter/bam/Lib304_test.bam
 create mode 100644 tests/testdata/counter/bam/single.bam

diff --git a/doc/tiny-count.md b/doc/tiny-count.md
index 79f80f0e..3537c16c 100644
--- a/doc/tiny-count.md
+++ b/doc/tiny-count.md
@@ -7,7 +7,10 @@ For an explanation of tiny-count's parameters in the Run Config and by commandli
 tiny-count offers a variety of options for refining your analysis. You might find that repeat analyses are required while tuning these options to your goals. Using the command `tiny recount`, tinyRNA will run the workflow starting at the tiny-count step using inputs from a prior end-to-end run to save time. See the [pipeline resume documentation](Pipeline.md#resuming-a-prior-analysis) for details and prerequisites.
 
 ## Running as a Standalone Tool
-If you would like to run tiny-count as a standalone tool, not as part of an end-to-end or resumed analysis, you can do so with the command `tiny-count`. The command has [one required argument](Parameters.md#full-tiny-count-help-string): your Paths File. Your Samples Sheet will need to list SAM files rather than FASTQ files in the `FASTQ/SAM Files` column. SAM files from third party sources are also supported, and if they have been produced from reads collapsed by tiny-collapse or fastx, tiny-count will honor the reported read counts.
+If you would like to run tiny-count as a standalone tool, not as part of an end-to-end or resumed analysis, you can do so with the command `tiny-count`. The command has [one required argument](Parameters.md#full-tiny-count-help-string): your Paths File. Your Samples Sheet will need to list SAM or BAM alignment files rather than FASTQ files in the `Input Files` column. Alignment files from third party sources are also supported, and if they have been produced from reads collapsed by tiny-collapse or fastx, tiny-count will honor the reported read counts.
+
+#### Input File Requirements
+The SAM/BAM files provided during standalone runs _must_ be ordered so that multi-mapping read alignments are listed adjacent to one another. This adjacency convention is required for proper normalization by genomic hits. For this reason, files with ambiguous sorting or grouping (`SO:unsorted`, `SO:unknown`, or `GO:none`) will be rejected unless they were produced by an alignment tool that we recognize for following the adjacency convention. At this time, this includes Bowtie, Bowtie2, and STAR (an admittedly incomplete list).
 
 #### Using Non-collapsed Sequence Alignments
 While third-party SAM files from non-collapsed reads are supported, there are some caveats. These files will result in substantially higher resource usage and runtimes; we strongly recommend collapsing prior to alignment. Additionally, the sequence-related stats produced by tiny-count will no longer represent _unique_ sequences. These stats will instead refer to all sequences with unique QNAMEs (that is, multi-alignment bundles still cary a sequence count of 1).
@@ -139,8 +142,10 @@ Examples:
 
 ## Count Normalization
 Small RNA reads passing selection will receive a normalized count increment. By default, read counts are normalized twice before being assigned to a feature. Both normalization steps can be disabled in `run_config.yml` if desired. Counts for each small RNA sequence are divided: 
-1. By the number of loci it aligns to in the genome.
-2. By the number of _selected_ features for each of its alignments.
+1. By the number of loci it aligns to in the genome (genomic hits).
+2. By the number of _selected_ features for each of its alignments (feature hits).
+
+>**Important**: For proper normalization by genomic hits, input files must be ordered such that multi-mapping read alignments are listed adjacent to one another. 
 
 ## The Details
 You may encounter the following cases when you have more than one unique GFF file listed in your Paths File:
diff --git a/tests/testdata/counter/bam/Lib304_test.bam b/tests/testdata/counter/bam/Lib304_test.bam
new file mode 100644
index 0000000000000000000000000000000000000000..48e2b97e87e261342653c2b997cad233b3d66674
GIT binary patch
literal 22386
zcmV)EK)}BriwFb&00000{{{d;LjnLb0aZ}TP69C$9o!9H(XcNa+7V<{h$Kpas3CDX
zxzk=?;?Rz50r@Te$-i*nMK`#(?{iL0?)3JtgPkOqJj~M9WijYy>FTN2+u(de)19nd
z($%t<rwh7WXCs-bygPZmPv^5@>W;C-KbGvP^lnk~glnS!3))!&N}xRi5o>$$7DQEB
zQT=i_H1)>vz&Jm8Xj}BGf>3LoF}`N}z^D?YVLVb}9p77^feKsHmCEumVu<B=OFC%E
zfbXHd27(0GQI1=WU;>lp4Wc1Iz4ZeoT3;U^qKLaz7^mORm(+#;L!~}WBE*ftOpo-<
zfJ?0;&!yJ9!pd-M#zPq9<4nqtxOAai(?A>0oDEza{+T0Ry)OMZxuTdSpWk1}51R$M
zCwl<^03VA81ONa4009360763o0EsNUoe8wvRJq4HFk_|3&81=NO$avdikx`>1d>J#
z0!3viATxr>RG|ncKAI3Qt)LH!Kp91)i~<E|K?DR^5NVkQK_;1%QJ#u`%KPuryOWch
zoztDSn%sq`>#mjG&CcH6{^sw1F)MzKI%4-R>wJ99&{+I$1^@YY^oRVea^RuIo^Z^p
z&s4|kh+o^D87QWyg|5l@nPE*Ev#Yajtuki%uG7ZM*mc^tLq`d--?|%w8N-Ayi6K-Y
z!c6?y4Ln;4P1Ur`5tIKzG4%*@@F?bx%YQm%-H-2A2xgeYbRy0d2`BV5IdB6hoIMay
zrI7Iya_Oww0kMY*fgCalxni%q0Qp((ldR7`YBA&>{N7Sz-?!yJ^(}KCr53FhrL3Vj
zpzH(6#1y4oNl+Bi)@)BxEK3%p&;`3=%2}YCsSpagV2x^G!E8;ERYSI1e`Fw}ntGK_
zExQzuhZNqcG^@!4vt-3HWLNctUuB<B%B>si3Ccyi&oYsrwBiLDgWs!?>==O|Wjwsv
zioQ90_|{Y7by~^8LqjukH&j>taJoKcu1=1|ecbiwG!yHinz9|ZuBmIOKR!X>Y^a&{
zUIEIRd`dg{6#`S2b<0p~Uy!mUrTnlI%Cu4_jZ!GTFNLyxp%i*d9z1FzP!`F&GuTZY
z6WcKiOV*8yhsVbxdia_rZGeYgTg<~pc8PC}RsQh(=8(Vqx?=wFU+=Ub9)5$&6HUy+
z4afBjL$}2rzV-5jc=*KORr+wMdF;7hhcTb{`19P0v|8;ZHl-PwzF|S=1t$L|wJFC_
z%(`Q@1E$NpNm-1dDM|>CA}OL6dcsbha{(}mxmRhmR!QEahDHFEEUJ7&x7;b)ZVO7Y
z_cH6Tlx8EMyo>*D%CZ&M{%roktIg=)2aP^_tv|hjhriJK+Dhu-@lDiIHx1mOp=0ue
zM7Fvqzds#}fdA23NySDDeLu8h{8wn2gD|xeX355T0&@``(`J;8(6FI*NUAOR(Z_vt
z6m$HXYk?WxTd|b#+>Sp7QJ;N9aV6JKLo*atpDF0vwbljb1evR)w_6F25_*!VNTK1#
zvxPx);ZCi-1kmHd+=c5f>a8kUz5;#35M8)6DCYdlOToNQF9q|_lu|IaO)UlU*8Hyl
z^9EmFc9TamupC8KLeo)2m4!$l-S4hFfVpspCopT(dO~1Ywj#-rp=zP1@j+ka_HR7|
z%3SW6)oQiG%h*z2g{~%9iZ1#xRCI2?<R)N7hIw{+t=3A2PS=MaWqVd$(W%z90i(({
zJXy=r3k_MdLR06u0|tGUFL#~=<~FVoU8~bqsjc`f5S@V~OPZ3`DE#te@#UYUv=d)n
zH8s^WY+nlmDbc08-njVgUm3+#v6U??v@B7N0R1q#Z@xJw&&s?5Q)?tO3`de2-IgR*
z&(LJD@3i-CnFq|dTphl~7}CC@D88&IVIX=gSEHC)pB@Ly9&*8$oo+l4m?=fQ4ZS#P
zGd~Z^;X}L|y4I{GFHRguOP0Mbkn&%gkKR8XpcyjvboOax<b7OA_5<0qLS0znpga2D
zu~UKhK<1vCJ{_&+<ATB*xX=Xj9S2g<r}N=M?*wJ~F!$WFYYB=W$%<ykwxo(us4~0P
zgI9p^X3r<cDzmjt{B4LbYx{wRhpk+hjRgODHrdA9Tdc;oReVQLO(U=rHD$ge-y!`y
zKDljtC$lS6uOwH>l}+1lr7(0wSBk#OJI{OqlvB8yt6pg_o6AyFSNBXq_+=<&?w2N#
z%aVKa>y_jUqvK2}hAmmPoEP(tnY#mX4)-Ff*OT%RPG>K)RLj&=Q4IYw-?{k=V7|k3
zIoBEQre*rjmnHm5)FL}@l(P79dx5gT9X!NR`UFgcPz*Kj;iolr%tlnZDdpm{^Tq<P
zo6J)#>V2YR$KH+Xm>LWj!x{7%mDFPGQ?<a@m3vdxnG>%`sw;<vAxn9+v0schpk#Qp
z+sS<lhqfJ>iVuU_9EeG8u(zA!n_SA@VC@cbTvWvmESyR!FXrd>k-Oz<8E=!^$1pBc
zBh+wWax2PHewM8&)jN&kK1N7GR#beQysFYRt7of9EXB4I&kuY_S4An&2KXz{VH^A}
z!$@V*R}J6QOw|xpmFUa-ZO{FHS(LG4Nxjd7i^1_jJ<vptKBbJ?r2@)2+$C!;7L4n=
zimmFJ>x!Nkdioz~w?NUjA;3nT|EgzNSYZfLtmi4qW<;ldJY^j0Jq*T=xSk)DLHuK?
zE=l|lO`1V9u_GQojVMSxam;FBjlPf;(Zt{xGab|SH9g3$iM57_0{bQIE!L<fo!xM>
z>Z)TIvX<NA+jqN-fVoNLu35d4STh4Qr4j0`mDA)q@}G|p8+n-fDh(zK2w`a{q2`&n
zFXVrHz6s2x+*8?TCB?F3IDzj=k{oJ+nCS3dIqNE5-ct&~&|`V!AucfA;2!=)yTM##
zCJbfCG;%u@{&M~cpxn=OEHpaFPh-orV#5(bHnrHxM3@&YITM&u`D@lmt{JXnY%p84
z#4&U;KQ{hx;xFY3fNnC=KruD^Bjei)VpP%xX74A#Sitp>G%Ji66WBQZF#2sV2L1LQ
zFUY<tn~X>fbqDrW5Gc9t%0tFv-<2%IwtZQ$1KY4B3R38&pK=e`$47BbWwVyFkX=6r
z5Yx6bRaB0}r}DQHGjXLDM+L&rHBC6xAi81}-*G%JlN9dP?~}?azP=gwfvro{AVykT
z?!Qg8gS5#Q7Wg2lYPw-y<_<1+7ta9ZYW|8flBT|H1&R(AnW_m(<$Xsnr@TkQG3l^j
zvl*`#iM~PQ*KNlW$U2};Kk$hno=#85&I>+G0wer$*iPDBs_&W>%sX2WJR#AC_}<1#
zLD`>sJvJG0Lh&U<LHtD$SHSll#WbJwi7TPEb!8UQr?igre8?_bdx4^50;-+Vm)`fa
z0pFre$oC?|q3y_a+9pbUi|FaAJwNE_oy5~|<?4ZCYAb&_{RVS(y#Y^``Dd=nNGaEl
z139!LKevjq)v1es(Ycsbg>kQGau|dz<hz;|vqktrU`|u`n55;XS}=j+z;yN8*<G&;
z@Ugcl-Nf<0Y6St(u0mn^go@mmFWmx4g?r()`mU*-$b}<clfocS1~JkX?opHDKx^tv
zYRsD`jv{%`>w~;GcPE)=fx^#IV=0K_+NSFJdYIQZ9e3{Ypxnz9CoM)lg)Xl^Ke3b`
zw|C;K^Rwe?EjBjo$f{%5#MdT@p=<W?z2w4Gx$du4pK$D1ryjn(Y^z3Y5q#K-^Fg_u
z>&9<&lUHtNNU-N^%@<Epq%ZTB65Dj_%d{&=x3{lLh88%k?&j6$Z<(+mC=0okW4oUG
zGB(_-et^iAB<yCT=W@*{lYv>v_2;!CtFE@<TzWxhTTr_MEl0|@;3&elhtFsw6?()X
zLf?{PSrcPKQG!e2jhy-@tz^Ef<l68_YmOA=jTCNqS{0P_xDuw_W+Y6g+6b!Hfs~gr
z`@@StS;9Rn?GEECwgQrgq*=mo73u@IaZ;?XOiEFbX&lJ3wiRfPl9nT1B(hMf_*PE8
zPSOEp!o-#}-;sUM3x=MT{fumPZo6AgoEOPZ5L#8R)r1c}UA6TWlQ_{ET+dOvo6NFt
z;r}w7P*$?sd4pE%>Vq}|=m~Cct=;V~*Ro6u2S+pVuG%j9)InLFYuB|Ik?GojX+b{@
zGK`b~cAbCS&cK|Q@o|{EH^?!PFQN$tb0W<52P9Z@Dsj3%?`??g=aJZiu*{@;VC&6J
z0A>?~cXB$_T4E14x+SS5qPU)v|Kyyum?Ws3#uYc6YSMe;c+fO05;@A7$M^TXG`RIr
zl=$P2(5LD+s_hvQ`48_4ebQIhYX`E8ALm|{oqAH1L9cLicn;+-uV4DLb(VwjU9QWh
zQ%|P6sGeaFC#xdpGNPMn`P(GjvBNzoon}(NAOp#O^V3x=VOK82T>aGAz|55Sn51!m
z%o^Db-N07dyqMw3myv)Z|EM&XuoUWep!R!)o;%auyO)O5IiJ6sl>H>v+wbLi3x599
zDCNU@%Aj0Q=)2GlGyg?;8hP3A9Y$2Ds8aFa0?VzDf9<NBfH{_{FLf(PjXcm4)%6uP
zjYUNZ^;Yzo`>eO3+b6(A`pkVd6VT_aprUxICuae$G50#|R@(_tthl~}0w@d4w*1%e
zl>@d|x6Y(g!;P;Xjl)uNX9&*v2`CqHEtqbdG5JZ7h_6Ui{+P7<7IEay=3^SksFdWZ
zh)4!H5)uS4(N@0as7b`hE*FgX;@Jy{Qa)TTX7w#iVE)X1n^yAMz|X4anygs)BkXIL
zF($Rq7;mL*!ujpP!W6zR>A9JE@9#mG#jUjL_8Btd+^CA|qG%8q5Ptpu=&g%leL1yi
zo#b_<hPn@rjA1L@U`)JfUm3-$x%UIW_`N+Kr7*EgK9M+OJV%l(*&4}uezhGv|FF^L
ze;=O32^pTnYNe7)7O-O9qUd|jBFK4v>3}nJh^NR@E7kb(_cR&93qVpdK~09f%!AAB
z0L0CBnQGFxWUCM}Hsp+OwM+Ce+mGxC&PCj80dSIW7CS&r3QQPTEY1?u*<Oya(FvpO
zxRpYDH_#Q5yDb*ViwujCAuuP53d~eNO{JEMvdIo^u|PK+H}BoQ)zOoHSs)jTSqy<W
zVN_rifT7!L(H$}{^SA=DT4^LDrcW$#Uk(gk_{>nw-#+*fIR7)0!AV+|$mBv+5|a4D
z&n7i8PXBNla2nj~x@x5v+kGq2>pUMCpv@JTRL*o$N9OM1E&yXu#%^vUce89;w&dB0
z&owf#Etx}u{|00lzy7_#-Z7GIxh4r?<V_U)di}S+?8L8t2Pc8CGy|q9^6Etc;L+zl
zeG3y9{=HnS))<S)N20TB1xOM$2XfL%)7Edi0M0W0si`uKf7|wC-_UgNKnZ=F>(_Y&
zm}mLIx!PbHzwj+9frRL>u$@VF+LQPG9-LX+fDAavh)hUo9+7D2=L~}X&+_d6*=~rx
zW}T$k43C`U!(<AAylXbC`#E3^&Zyq(Fo8cOv=EEKJ`fJJQ_i2@$eu9j$WC(@KbB>>
zNacY{6R#LNe3Y~I#aDszzg(-PTI<V$AnKVU!QQY!1k6lv^^CsJzxF&Gq(ivsS(U9x
zLf!yevxcciE8(OM&+;$C`crBn)EN88fg<Q=C{DCQMGk$P=N|eMFdOp^Ppw{OK9G+b
zSf9jB1Yc(j%9*?Q+Ta{57tOiks)xb(QQ`0N;Ed(q{5RKxN^=m<v<#D2pLsd=PP-7C
zUn@m%=v5sew|l~9FkmRnX(q2~1z{S`P`L_D`l_Z|`bT@yHREmRM$$WrIy=OhEd=NU
zWm{AM`F)!%S)rI0uBJO};>t0K37O{mhHz*r!c2aI9?(KD_kW6>&q6SCJHNB|kH9R>
zI5rJ7g*-52CA32?cWay_pWYOlJGtq-)mkH&-y3+Ei`zgEv~i;SyvCNX9+h6RCKGl<
zn;%`$9K{q>*Jh4lZhw*JH!EC%o2D`Gn!P~Y#qm7HF(gIykPncd<1oy6+TgwDvyXuJ
z;|wv{N`5C-(R_3ha5Q%?hoc|7_v&%rD17n8)<&7O7ohu(mctUEoY!Bu0-QJbw|2dn
zO!#$N(?Nd()fdnDqk72f<smpnaRXi8BxA$~44D>8E6ex9^$;qs-}>!(AcZp5wyO8_
z%OV2XL{#1JW!V=iU7{O&!LspS%-}{;tMyuq+2AU6H_8{Jyvpbn)jt8VoG%pWwWN{=
zTLi+|vq>w4ft<8TWj;s`66d_u8I{U26a{@H3@3LYlJiEP7`kdV*PjIDvW!)$GsW?i
zt;-rh5?1cG*H7-=37naHi=^IRiX<Tiln`M9PgKesFp8P+S}f(*z(=)FiG?YNe1utf
zl<{uvq+VpwBbR{iJHB4fs3yOQ>u4G>gj7}79Hm#!23PzPnESb@($z*S>5y<er2Gd+
z-VrksSEG;_C)^H*JzOYc%m<GH@>70C85ZKZxI>Zh;)%IdBcIp2DFd7ljXLWm_N~Cu
zbtC@`aNZ)aV$=9HT7&VeS?E@P3^Ww43CC~g4Y2IsN5MIV-&Cg2Oj@Ec0{Ce6g6@2t
zc&-Dz0k)j@bC4caxHmwf&+}F3-O-2<*gii+F#87Ba{eJ;Jjy@(4JL%EBQqH82*lU&
zdZhFpod(R_d^MoaWz+!KG+e`%#FO%)&Gy7$Q^EOAE}C=xw~5R=W~gY+hnH*y&XfH2
zX;zYM9MyL$3nlCjYJ!~T`zTv<!P$!ME^1bi5kp7veAhw|Q^pxhbJnDs6TiF;ILFCF
zbN+Jp2H;#<IOo|TC}-lzIjH|rbYDh_ik_GA<X7mr70X$;72OBLaH!bnxch^1J74TH
z>rDEyLn=d5T|9Y!zRuIAH=B^v!;Y;5A{7Y=$r}+aHIG!a)5j2}ZblP3mV-(cV)!E9
zo;#cG!N11wmh`eQ{YBtxMIsa|`P{{m+Kf6d>t|qE$xFd@NF8`+=C3xt^!lBFIWc1s
zFc_!>mV^ekzL6I*rB9VgW7^3@LmC{q4$H`d&Kp8}{qk5XED%%a)7e&vaeXA)A{)Zb
z9jV;oJP(-N_!oAw%k;gqpiOC#ZAiH<o*mzcqx&gNtCF<aA#X@4GuaTYP@uZh^E;7c
zySTS(DP5|++J@*-Hq09hxhkQU`;%&hr~U93VEmCEqi8X{0ZVmVNirNg^zz3jx>s*W
zq9Oe2pjB-%YJm+OgsqxZZk_u4Js$+;biPjAsxsMEXqg0OEBa>U-UW_)IXJKIcU!B@
zbRJPdMMEnv@zyp~$~ov0<#4{fUOAlaN#$@(U8@|<dmEI)xo@3vI7e?%4rd3o9M05{
zayajAS`KH2lJ>#ulH|@3r5w(jlH}WwYnH>gtRy*lNl9{arwQe7#;#uuXP=>RI8T+N
zhkRVpzTUDVxpQqv-{*mnIA@l0Z-2EUxpUiw<!}xw>Hb-_r1LX--Euhhtz8c1?UK&V
zA19W>x%iXiaPF8?4(FQ5<#0|cNx%M2NqWeECD}i_lw|)*Drq0ARnooiaY=gbFH5?&
zpD)R7e6A$<HvKF6ks6B(FKnHy<dY=SieV5#?riaCCle3p)BG&0R)ft-*Fz&z6xGx6
zat=N=_V%VZ&E&eFC9@$T+11c;*S{QiAt`{kKf|lsO6F)Oq=AEvOs#xLwR2~Y%=h>B
zafMbZS!Cg&-w_Ec1bW_1L9g$74lqCKZM8I}&6M*>DC9OUVMr6j&@Z#BwL36B$rvyc
z722$5NMWHLoD&8pf5=_WyW5XR^3>)0pihg*Yp`@*vygmfJAz>nnPRq|e;_dT4i${~
zm9-`?hsy<Ho<8C!63s4p(e^m`8j_@;6pZ=HqUV6QyXd$1?mI5Y2rYUGjNRjTQfO23
z+l(AMAoRQHFn2Au8kqNs-U3g(bR{tFW(hSWkc~1PWH>vDn-}w6o0Bx!J>`NiTTUlY
zzxRs9>^`=Xx4HIWx&;cwO#R~1z}zrgFy^oGh)nyasJEdC$Gl&}2`y=bqn%VZ94Kg*
z%w-2c3P;om_@QxeGRsOCHpykt&~aXjLDhiG&yCCZIBEb>Hi!WbVQ3?fMR?Kx<w);8
z08Wq*!0s@`d>+c|q!8V<atE-FUl^yda5%|;JXCZtXUZ^KQAIsUqCWNC7lX5+51yRn
z#Op?qs1?!y5v`~7OhR80HBDkTV(mETG@ZoIP4<sLMh;rKphcAu<}Yyg-Q@;Jmyo&p
ztX)Z_WEezumQdR*Ry<CR<uCs5129hFAIo-?$(AuuT&KI5czZ&+Xje6!1!hIV*!28I
zc6tJ4o1^Q37itb)d*&?~-Du{G12$Tlsq9C63;HQ5r0L>dPFjDy!j0>L(hJvO=Ko2S
zrH8sDyLp@Kt$$Sun9ca=Mw`uMQZWx6jU9bI@AtX>bDt;r^AP|0^og_;Jqk^<^b66k
zD9oRkbK5`T488RC=`vl!G>jteol#LsIufAAd{8Wh9?~niq^4piKL@>2NnQqS5IPci
zNw{J;=uxy~5vg?gK0jHvQ%#n@Vqlsdpf7?a-r$RFwwu<beNi}@t<#sE)w`onO%;-Y
zH#@J1`^_z1A`+B;=X98Y2NzRRT;%SDnrPDLx|CDjx*X2*&C1~{?v%s1Zu4?Dd$r2p
zOf<{kJTkQ$&e_#+IB$&<%b{Xt-2SmMGA(wPNp~{(Fk708@hzfv9GxTZ?Wx3aALh><
z@AM6cAx%rIAVhm1ZG`{yzNV#gvD#YPJ|MkVOu-8r#FC4WYg2seFukcy-C!CqKDh%y
zG2J++NkJb}(M=Zhq85L(2Hn&PpCKhlGx)ysF7x@3bxeHT=qxSV8aJvJJ99u*aJR2-
zeXp?`#`a-85d0l>e%<4|d2th~^cyk#_)v|c5J$f5ioQ{l8@}(GaqF)lIiEZ#?)_B+
zC#oBrI}e<5`FcT@=`N1y5Lnct!+74h(fM1ETCT^rJ>RSS9r;$w*nxs0L&p(pSwPq9
zRrGtFkkRiM5aygZ==v4HldtBUcC4*NsvYj*8dkMRB^fSI6ih$!0yIp_TZu9v6UX;g
z`SWxu%m7z1y)#fPBY%PV!^cd{>|z8=a@8C({zWm85{9`8)c^NJQUm?`F#mO`NiUIV
zJ0U_W=m43w=g%GY6U+LUjMrf+$7R@Vre|rsXesPkQ84FSQVM(CaM7F}97J4&|4@qN
zeDnT|!TAk;wl>QNF<-`0B*zyPI1wk@o4!x6oGJIweNZgtIEDCF7Y-NAS?iVTVxd~4
zku(%B$Tu_;4^=$5kLd|>t_5d2f4+95uNC+T;pUR+MD&BpTb=0al-+}+R%tP#W;}G}
zS4nq=+)mKxhaU^h8X3o@&9nmybs2M0dTr+gIrM_N^|iP!hLYlRn5ib{2kU#PCpxB(
zzRW>QqD^0vvDdmx!I%c6Ahhw1aG5{l%$bv2u?|jhPs<MM0y5n(e@etDe>wx4Q!+jd
zGo{1A1Snq$i7g_^p&#eMzIHO{kJC*yOu~LPFlx&81;<oJ_uCe6=h@W3sWLs(Z7Z}~
z6*C@l4+6SovIWepT(4n`ZKH|!1k?fx*&U)<0oB~+Y(qO&q&4?imG$bPCliK<O1fu=
zYwnbE@skTcdRfUJH4`Lg1TL!VP(hkwh6>#V%b(Bgrd+G`b!^>n#ZW$3(sbcbqV#pH
z+nV%&`!2UdMy=XTj)XBaJRWlxE!7n_;wb4~2V^(ns8u`3!6RtW2pbNg$80sfxp>CQ
z*$t*@RmK3e$#h&y_?LurkjSxn`(zTbeyecK;!Uy*`dW?cEP>o5jO>x5{L?ruT|IUn
z52rcofGA%_zeYzf1XbRs$orPLFOohc{41Nq=;)Io+kqqHE?HN$C5du3@g+~Kn$!%`
z&_&N<6_JFzvt>8iInKvhDF(B_q!7YD6E%Cr(^(Ytt&ag_Z13AhX-vFmq({8vDl%%k
z?U5`1r$_wS1M=`{wMOz1bxZ_aeILqH?jG^CEIl5a6~pvWQmxiXcJ6f{1~3EEat1=u
zQf=2~HUi{knJa5*tz-~Mk`zM<H5qmB`DM*bPaQy_y!^9SXS#66E|Li>3DGxU9fxkS
zWBb};q_$a|ZJ1^IC?qG1g7ccAza3B1>9Kr0x6V3R9rS++(e_%;t>-SifwY->kiXmN
z%sh6N*alG7&@o4pLznHI(}`2!2yTK5I;k?PG(8O+k3rG%ng=^9Buc?c{F|W8xWAwo
zLSsiukudkX!<)vC*$1EHCb8D)on#zdc641rM*&qR-y>SGN2Ux=4C|ewVyJ8AJ}Vox
z8{}0C$H+$l^Y@Gm-A#J(v;eard`AoYykBOY&#n&40et^Toz>Yr8LuP3oD%*r)TQ6v
zl4P%Vy)VPM^lObi?-z0DJH7;uzPcm-=^U5-GXq@uwFVP*g>s8q#`j>ii@pVwo2T?O
zfSZ)&B$eAhb6jNK=%JNEZqjSu?5RhA^Z|Fe21rR8M>Q}e%0`PpcMv3PfPVL`XNZ%C
zFU=ZEe{feL=7(z785u<b?zAJGA+4Ay886dFYU-AUX%y)3;Rk~tX@hO!6SI5c)EZ2h
z2PT9DhDCA+1Qm_Q!#?J!?A|!55Tm}I-5V!`LC%Vz!W2+MozhWUV*h>P9NF{^=rH$;
z972*oHoLjIN`3R%>|CD|$Co8XmeGPoIH_zrJ)qs?B<1T53jcsIDPQPlsJb2o&dXB%
z2-Oq2Y)FzZ`GT{_&Iy-w-_Sjbb6N=}y>eGhCEbg5<vVX!4qB%Ao?}>!_{z~UddfE@
zl4uLJ3w5o(jTt$kX#0gdjg+jM9<TYY4iGraW^x-~EQvu<GHsWy-VRWoK6_{EA>we7
zi-wdkOg~g)Re17gbRT^Etn7ejt=UT2>}Zi>;No#iR~Ao}qw?U@%|8p${(N<S?UjYL
zwrKyR8kU_miEZh0k}$m&Umt9CnBhSfO<|xHlW>4PG76Xckv3~C1Vgt$^Nl6Ie1)$?
zx0qNMVw9l<*S96!Nt<&1eB|Wpm|KhK*p2p#9&++b@t%1R=i3j(Ug<Q4X+Di5a|5(i
z_s!fHdFQ-YEQh85?|V=@pF~b^`UXaj6kt*dhJ-vv%Ov!r0Mj=);3!(9$9t1H)6G!T
zFf-oL)%;1*yG+~`oF@MsZ87$mB?T55zXksfyYLXD0?yi5oTSF4*rtsHP~^v0c{yvJ
zJ|JY!YA5Yn%hl2U&x1QZch%t^`g-OKr8&vIz$ny%Yt^z9vCi<+^!VhnakuW2>CvA!
zPfQQQr7%KHTe;~$McLb1WDi!r)02W3Gq#a_W9#Cbx+!M+b+Z#bfk|4OrX2bX26;<?
zXu>DG&|Z0d4PXxA8#w*R|1*2uIv-&Igz~~1PH6h&&6DEnj`WxB>&jT^xgU`y>tRNv
zSjs%z#V0>QCmirjOP48S2tDMNqw3TVuF<8d_Nl&R{i#)JSCc^kWo2#jIja`wxAq%F
zljz#ry!wwpI*Y${?J6@o-Zv}>&Y@6v@Y?E>v(5ozjO4{bMRUA$#*le+`~-p&2Tj{0
z$W8I?oDt_+rxHhpKU@@t3WR4Tk_3-i_~}IL8ap}2bOPcaF!I(?&-f_2O>eDTXQ~>A
zTh)hiRTjliY4F5nNb!WmEd~Ron~(;mI6=(NFmp<S@$c$jJjU13+6^{5Z75_ex}gZ`
z0(8m3!z6HinDJ%WN#)kYieb%sNi^aSO+os@Mnt*YXSiU@+$F=r5y8LTSPbN}B4awT
zp^W6jd~VV>V7^o|=JJn7S@`wC1!JZ>LPi!ImT_vBG)z|tUC%NxASCY$&Z#F)0p?Zy
zPGhS;T*nAJQbZ|gM(Gr@^IqoxbCpssX030Mq|zsfew*huil_S(h*{@u8<;(Y3dYEn
zUrGW+MSq*09#5m6g<{_QF)2R0qiD>ycgSdse-*t2cKAk|>scU1@kyG+J4IvG|2t6#
z&n_C%UHWy>=b-3wv-y0I=Xqn%J8ivhQwcp$0E5Ct&6Q<a%ZpiUlkACzs}OV8k#Vwb
zftYue(JfFYX7@8DlZH1%FWQuMsjw*&b7XWp3&q@F?MuQQMSq*6v$6}#Rw3r|8!RPW
z^`bG~Y6V19FZ$cO_?g(mEf6zwHoc?^#eD0Gc>HRCn1wgfi=a?U^9rh`6^fa99Mz!<
z#dJ@jDo3H1Pw!0)k3ulisrJ-?zXqnpcdB*R<Y*g-36`cJj+Z}ZeCXEMb1-Y2W-?Qi
zWLouxc?nmS#94QD-2~1^7ANV1l6>1z(f>}>awjV;c{&car-H_vRx(Q^&>clLk>2SK
zf>ctF5BDQAjpy()B0HT%0)ndMQ1vh$Mr?j$<ib7a&v8z4dJlAyd%!`M2+nWAQvLjU
zV9tP&Q;?Wa&;WZt#&k0AfIj7XW<bg5s^nZc<pgjx$$B5AjSFg6R<u|V4t_>c-%lf5
z9e>{Aq+EnurpFErX`twG$Q@uk%II?uQpP)dy`0jFS~bCd3sUw`(x{7tqv(1mb4l!b
zZ@wFnE!M`oWkgq1!?bf($=p+a44nD=5~glFIT00c90em6LpNuQ?%xj|kQLpnGlC{W
z2@?ufTq}3#{m_8%<+Uy|zFfgxM!~*q2$!)@DR83^=MhdyV_FF*fLUa44@zN>XCNwF
zxNVk@cB{+ynK0dEGBkzIw~JMi&@gB)C%p@Pxtw;5;t0=fi%Gq+R9plOQppBE(z{@{
zZ;l0II$zXuTgjIRRLxb9<t^UZH{#rKEfLah@MBWlHdAGYnsZDIbW}5U_20-wBqntc
zKVOk8Z<hlZW8RVGDOg}eUuMCli21oRLk@J9K#_uBs7_#*LGDn#dv9D&kmj%%zL*?p
zJF?~r4~~pv=geK>)S<NO>@vOfY(rCZ(Xn(<3#X%>j`N0AH6zlU+HnIkcD?EqW+0u3
zj7T#kyrHO_ibS@Umua+EYQDaGjvN6qZrXK3t9PNrmv%Ni;?f4rP@LEMgOvxAfj
z6<P`u8x?IKgr}lM*-1BD2+XYtf8jdGjppjA;bHi?B0Ohth;p{Rxg5^=x0J&<^XhUq
zOK&KL^Zxbaa1LHn4#&Q>9L~g&*6off%i&yeV>z6Emvl}STvZNd-ml8xy!`8OIJ2%P
zhx6RRaya|_vK-E|Ye-4!c&=d$PBM2DIqZRID=Pji$cYrU*E<iB)Ix6294aT1xi^xA
z@&ZX#a!!7@Y%ysK@f~jPuwG@dl`PHF0~K{sX5QvUM-NED1tyvBt)K!-R&`jtxjS7P
z`T?=6FXWoN;3R!3NME<`O0sO`9$NgnTHI$V&0*4gEi{%#j#n5uxzma(eX}1p9Hxi0
z9HNlLC9Ul9axVNT8JTfT28S^eacE4#f&!mW3_QSqyZSSv&g2%Z@1tJrB>j>oH6$BB
z@&x^oM~p(|ow+F)%Fz2bY8nzR+U#EOha-~`AQ+=rWhN1-kvcxN4j9N@uQ6RhEVx<V
zn-p(R78RX+I4iq^ZWUtoA29blg+cR0Q+KfCL>nBeMh&dJou3fj6xXF&uT^7j!HV|Z
z=%(SI14W?lv#@vu7Twm%ev<9d11Tw~RU+BKFw5CK(3F3hd>@A|c4}2dylJ6r1deZ*
zxm&XzyXQB^bcWtBl~bHPX-(=fRMN2nHSS?1p|383=Hx$n=6677_wKg!(u8;mkevL$
zmJP{D<zCRI$2s}*%l~fUIINrg^7Uj{mLriy9iF6SWp@nv@<$KoQwb7VXog;>lJA+q
zGk~Hx&8-In4OS)Rr6aS02CI_uoB4YYXZ}zIC%Ka~6+tjen9w}oolIr)Id7BT#7J-5
zQZl;M*CVJWqb+oX2_;kYtPCZJC!^^regELNe^2_AT1;m@+xC%}F9rFF!0y@jPEruc
z_fPdV2<hFSq3z(p2{7U;FQ+w$v~)Ww<9!%sD>|}c69oGIqi4gyWu#|=%Pj-}C*E?s
zm<xJPC^EcmVlfxWSn`Q@&C-lcGOnX)I-<L_sf)FKryXdfZx}m`r8Fb{{-gs<&yVXG
z90ivGq7F1si<=9!iW^U?6d>6c&Ee|OjsgJs#ea4N(P}Tq_~J|?4?XW}gxt~bDt8x!
z)gPs0IUMm^ue6wp86g%O4PpEoQ5H%%uNjYM<B+-ul8t(1R2Cp+py!Y3{$MVNgg?nQ
z3hRvJfjUDOF)9mg*1??gseJhHFM@LhUvSoINewzglSKoeJ#u8kIMH6%w;c!Eir`R1
z;NJ)R7fC7XsZJ?Hpx);NU(xE@b5Q=XQbk~6`t<s-8lL_@K~LZ9En)yHDd_3DoVx*@
zeqBLNcWN)==}!$8^Yr6qY=x))r=Y+6r{|M+#<oMnJpJoCk=9EO6tprUC)|doZ(q>U
z*BY}E$pbEEW%f9siKlN`(9^ftyU0EBszC~kw<zdu|4p5wdB0N7-~QUpPmp=0+}=s)
zr>n@Xl2!4iSI;CBe)XYZp8lmdr0DKZxtOQlIhRU;BA$Nk7l?entDwJq!><tu{B%KI
z_0P|cOs2mn#XLQHYJ1{{C}`ckF^}jTZxpod$J~BDX+>1fy0_n3gr^$?J^iweihTNi
zKKMPJK5G>}oobZtoH4*jR&O$`6HUy(@KNUN<@TA+UVk@mzL%jPx09_nY_x*JbQeF&
zZy!H@&Y<^U#y`8R2^W?M4`c59okPy1<2bpTq$X~Aj*NCmo+O-KDMh~dX!2{ZoQt9<
zrp0o;@G>2kTP){-Xq0WSoF!w+`96~`iRT#?$+>3w$>4mtaL(29iC^yA!uQ*<^Jolh
z;>tOw`9|Y8+0{KeFK3ODNqgu=hKuH0dk}36SS;t+v*>vHVmU9KL`St3%dw&X_r-G7
zjfS-s!=b*@@7_X!PSg0l(>gOl*+k;CC8NtxzS*|htwEYhT*NKNt~V;l^Z;}QRgmsr
z$hq6jjhV9rDO)NE6KNm=s(gT9Bo3xpqffY^<)624;U=-iIn80(&XK0QXcJ1jwxS%m
zY^OXrAb+$`Pd22-kX}>M9YvAywzxX(A8~J^G=@o21;juY6+1fbCfM`oLNV0!JYgw`
zq+P*J?`<@at%4CnLl<`gE%c21>Al~wcLeDR8K0-Yj3q#WI|JGGa_;or1)s|9v0QJk
zBkT}PM;@`{<ZqvI;EQp)w)E??lLG?`Nl_h4uv6{4uVeK!j8DG~Gfo{%^c2#IG|b3&
zNxx1c3?`3_N6@d7qXdXSnue(fPXvxQ{@Za&e-0;UUIiMNSY%kCo4XR{&Ib-5U6e9*
z8`FpmZ-Yz6g(;U;7<}~;mlKbgl7UI)h<Hd4KuQ|My9n-rsEgJy>y?6;E0ux?)+!W3
z(?gD5XNW}AdRr}(9@6ZKs*UvGcdm)f)V$W@R3C`bLumfic4yErmYn>pW}isw<!?cr
zS*Z341oOA(YW?jps@QN=tJzMDj6lnHWIn3CqUCMU^v~Dh%2E!7DPq<n^iZ=x$ICmJ
z;)Xr4J9z`cj95TU9lCY8u5j5(Bxlz9Rdy%u6bGjtgDEgyQaJT7;w)TafO6C9vXkCI
z!wty{3{Q|V?*9PgZhJiV{r~_UiwFb&00000{{{d;LjnN$D7~F|xYg9Pz^^i>)Zr#~
z9|)pylST;?@XUi^WrwJ-D2hH26p^YePT)`nv=&SVnxaLqV9_Fo6CxI+&NvlNaj4XJ
zcv^>ATfsU(`$V+mt$k!CC&|vaNk8|w+CTcW-}!wjds_4Qaoe|lC-!-=ZI`VlWVUN-
zZ$t3kw{H6*|68G)&7FAiS?5nV?|@v}@a@{#WUShTD7dn#G5*Hl5p9PX$K|taqYfX@
zHumrl+n=%x^7(|90dj2}(mM(%dyt6$5s)At5eXU=HLV#Z?{R**jq}jlJAv~xf?sLt
z6XE2t9_MWMe_+v&R9kZeTf0<iPAX7RhLSez-3>^`i2O<aJo0~m^!p5d(zr1-APw!{
zPb%-Z6Obmi;Op)3@q8fN*@7Lh_^(TW^mz+N8%GTS(#95iz1=3g38dxqbK50JFeFPb
zL~N^WXKK>=;X457ry_sS@~M9T(n~FnY0Sg|ka~CUC(WGmI*^vMK&Dw!$ql;&@h5G*
zbs3PpYyqio^t(WMss&$f%z{IJG^qtUV(R=8fwZ;-qz@l3fOLEdNZS4H0BH@!bvk()
z>GFXZkk;34qfC@k@kmDkNm3<SRV7`-olP;hh+6dJ03Q=oi}KZgS|m%3raPK!D0Xvk
z@)6GX)vo|&L0yH2a!RED$3(hjJDQ6HIVp#JHoW#{z>(@tVDz(<^MTK1ijpeYuBxW~
zY_spb9WWmwwm6kSKpaChOxbh<O+;zMx$gb$z!@*H#i^77;$Tt0wr1+0omQN`Ej<)4
zXLYc5K`poo92q$Z7HmUGtQ=EuuIfh)>bMT(rmkdztE1bXfEz>=Pb*I0q$aw4C6^C~
zqpG%SAXmdeVsR+vh|w1UXCK7o<byVaEtrO>s31@3!&5%&GT@vP!zpBghZG5}W@1MW
z6LaY8_Va@u2hPFl+pSV4gl;z(OA1D!i4)&$-bQfV+#X2(!@k`r#cW_B7_RC%IvgG=
z;YR3AF*iN<Yry;lamIYQg6x9<yfAcm{!;xjU>;?gVU=QVd305fG?Q#>yIs?olKkbF
zu#NQA)_y>Gu)aKEl;m&hP9VXpqapy=v4mVTvZW{XTLpx}+3!K6k_&8Uj4iBbt|7Q;
zb4j8{r~A7f0_VLLPBnByZ9#!+S~gWDE$5<9tATS=45wBMzHg4LIv|W>nQ1xG-n$q$
zMfO!t&1S>$$U1^YR7`xe(H$`J4HY<V*B_q94yfi!fgNBW-L3x{OYnC<J0(nCMU?B!
zh#%q2A5nvj7h%7Vya~VfgqXi~XZR8BnUV*B*ugyOVS=s+^?$821nX~mlQSstBfMny
z2Exr82^${H0bw_eU+MGXsFRHsVfqbk0AU$N!oPYGC%~;72{~miAgtoJ()W#g3kdgf
zl;Q1}#1{H32LgR1MvbFxAI57`Ee6dKu;{RDY7S@$8uKsmN_3-~OYa&9oD11@Lzsi%
zB|=~gBUw+%p_<QEPZM+NA8bcVwdC6;MDx*P$;JjKC9a%YF{KinKe-Q(Ze!cS)lx0^
z<!njNLH!ZUq+hN(<!qh1g4`z=-kepR{0caiBHo<rEz5z^Ek>SdP@mIf7fGUE%5G{?
z=-gi5P~Uo}$llr2prs1duwo!E@UWDS(}Qv*O<xBbJHwl^w#PNVnG>@<Ar9<oRV65F
zr{(mWa13x3cJSi-cpK-PwO0b?lDbwGiRD#mzJ5w#dA4pFt`1tN)C>}h<xTC?0g!p@
zLt4uQuN~WwEmO5*%}#plcuMfC?hN1@i<lR6En5s+)W}w_Y6!L%OK32IsO7ltIY$#u
zMF-QrQ_K2KLET-YU`cgdP1f0Al!)cHJzbR-A0WZ5vqa{q)UwsUs<;Z)OxIPMgb`LM
z%amis0OY0^S#m)u7=!OkGi2FDNoApnKdk$WfO&>}VrseY;=8hG8lvr_zG?Q^X)$n~
zXS?`ng^-2nNE&=M6^U|M&gE~N4xE3o-|AZE4K*D}R%P3@!AwoY@!slLmrn-HR_>hH
z-Kh@9<ODsv0_*@ywKc#c<apa{#)U5cXI6&2-O9o3W;&|mI`FRH_NJKVb{l*CV}Q|P
z#3={2foa*`AQwatC-wAt4`|=p$+mtD@#g$z^Q*wQk^NAIIht*nF1Qe|mDGXI{p@X=
zeP6g1IJea0*(<_n@Laz~j;SJ<hHaUKi?AE#d<iiN(Hp4zYhS5~Zi#B>1`;G!l?Btb
z6W&0cyKnQyzD~svbLSbpD#{VFuQ7^fIvp{X;1lG45^t;ERZ^4s<f#gOc5mwXk2)`l
z*=*2x2@WzFX*!ra$ukDL6Z^B-w*cua=1D4MbK#2{IgaeASW4`Qo4AeB=d1mQE3Ph4
zM}(92wSz2?$aWn;cO2=`ct)-keeC9qB_?|ZTb59S!PH!$?7+jC`Vso@8j?$NtjOjR
zLg6{XksTABIx*?v;AvC`97p82qb`pa;gtPdKr|{t(<F#zv7Ek9jfzS#uGoVllf_6<
zt^`*fLM32XqqsO@RFYjNXK|r5oSm|*;arq!4d<M}t>L^sq&1v@E#l<zt>IiZv^AXd
zEs|&SzOCV`YSB5peNMC2trEPcB^%kcD=Thdj;F?dwVZO=yXGKAFdPA6EwKZ{;~co~
z8Dg4dV&tiXzMBG1j;lMCn~>9!ayI?&QQ#cFoils&bl{kA-z~@yELj&2a*f1}Fppzb
zcLk0m^5*<*h6$WAI(Tyq{oWkl{3>Q&S3??zZAl~|3|dt}j;9}(S5bMGoS;VK$`bq|
zh<1#b73K7zoQ-cB37jjrb8g>E%-}0Cyg3VAdmlJ2aOW84J>uTUu=iVtgEVZqjs*V*
zX*nxyAp!fNxpR(qklt^6Ifu{40B4cNn{)M1JxMMSHx9KIhxZ|Qsy|`obQW_3|2?he
zBB|i65(OliLUMbN8o(R+JP4$LOh-~NSM=>gVgQ4ePjYRf7+O-lCB@k5X$*^n2_<ww
z&M20^d1)%`DKUn7Jz(x(Iwt^A3t&VQ37GsVb*99OX{gy@e6(O<qJb<?fkd(7d6pE@
z{j>c6Q|(}Xw6K#;6*Ne&vt%KyPx;s;*+_s1`9>|<R*{6n^pDRgs-OQD<<vsTn~oG+
zkiZF>ID6+b`q>(?p87^P{_0#E%zBdIPLB^OMTGkGBYu({L#U93VA>**El1H(hZsM+
z&(E^s$vO7Ver_TU4&B@P^!qJv?qgpF`A{+}MwTrISQd=57s76nh{Jaw`=sVe!6QVJ
zHyJX#MJeeKqWAR~ANpUdPZU!Q+}8+$Cth|T1I|sKJG$hA{-jnyWPi6(@P;!qxBmF3
zIPHC%pA;XOE9NV~XMxx~5M+X^^`ti(4b6@0`zOH6WEMXb3pwA5NJ4X1wjt1@n2OR2
zl8r!a{yQ<|-eyK+0a6M;ARuER3!L#zG6?-(JC528AfwppQYZvBnr5ja>jaC5A2{lE
z9ryJ#z!-H|B5v1QHSBgJFDY`g;qi<Hw<{H8@Ear->)sfZpc>p_ItgT&g6*KRN3+*$
zr0QpF%p+P0A5F=`uC5|0Z906vZ+u5}<U-5mL)kzOU&fZHfZtVX%89;i*PKDN@@1JA
zPA~@+JF1~6qG=>{O?aGjBPt~AK1QC9vsyAR5U>oWgq+<x6XE&0fU{$UH|PGRNp${f
zkvHd?K?pe4apx@fcoz~M<Ib7#*o(lqiu?L(+B_II->?;eFb8(DV<Y51NHZ<x@`p*l
zZwYtKNoz#l%;nCR@g;G!oY}#fv%?ZnG`=r)&e}uCKG?$jyPdt{4&ZD;yg7I=dFAfO
z@a9~aA;scf#M}q9P;Q_Ar7^1Fkcy9F9PeFNxmYJIotSr_7Rp97MWTMFj-EJ(dHgnx
z`qK%(SzX_6?NN^Z&b1M33Noo|HJZLlm5*{nEBJESsqpg~I@k((E$AqaZB>L+DnWPC
z2IS{%B1LlZnbkzaLW7P|56Fu$QgvA~k&{?q_p*0C*pB2oU&kyfi;#5Dl1)R=UBgOF
zqT%I<y~w9q&&<*<78*WXXI&-(!E=C<R7-dP>~WV7-D5x;p%x&3PfD|qWI0I)^lMF=
z;%hgNuT?DM178b@79B{SG!-{Rl^5M;y-R-t(nV}7w-_p0GiB4lh6uYkt#jpty)bdE
zu#aW29P}81uCCjnp$ks>9ER0?>2HKn2&Fnhf+OV0*u<7f%V~eM$+HI5L4czo8GLN2
zYzV14$!r+@0dQVpA5d@*gf@Z(x%HxG*-HA1nK$<$Ss6=XB+3V4mn2_L#+slej`evL
z=LOvs0q4p33bjW!Ts3fUnuef4=9wPz^9>g#wQ{ex)=y$V(VzD>!0>twIwTm`B-g!}
z3{flBSvrLzy0iVO#ZobNXrRy&lj4fRw==bJpWllt{;L_Lm0N76c*xcl-?klW+01%}
zcq{j^6TVM2n8+3-Tq_UPzk+Q=*HXKR7m{<?hyBG$A;-61nu@6zNRZN6x$DpO{an4H
znBei3N#U~QK;n7gV|n^E&JIuB4;;0=GP_4OC10DlksKD)^%EPl*gs11_H++Q``**7
zq@mT6*4{M@>evNEw6G$=UGU$~{;k<S8;$tV=>F(2o!ky%*ji|*6l@9rCl=x_82*#G
zLigVT<2z~qC+G!{ZAo{Kf)qJ*FNI~x{GJWbW7Tj&)O}Q-hbSP7BBnjP6F1HR%z11t
zbz^h3y0=xrSiz!er1rLMzm7&MSa(rFx=B531<E@TRp_OVP?~NOv)2Z$7`g}c|9cfM
zgV_&isTi!%mJFl_GS=nfH40Q4J?SRD>MEj*mMT8y+e$)GprGPZb^1Hq+$3G1R1NNC
z2}~VF#1a-`PH6n8*;yjtgVF3$Q_h7e2_e~86jf78eQE}+JQz49vrkRA7~B9@z><s&
z8R6kgG11596oJNeSmJ~(8^cwQpg@pROI@LK>PLQ{FR}qDp$a8O)O86<h7?O^*Z@=!
zL3fZsxEX9kq|&f3bwvb1I>^*?H(i$V@--!3bh0;Dr5uj9xh@nb+Rz13WBx_I7M10-
zh8(bnEY)mC7RY8pw$AR|6p^JTWhf2#Y`hpZHROJE%@7S$geoX49pm|iyf0pi<qf%V
z-!+381)pb$lKU##D5w^Kp)Wy_6$t|2?0Y3@6nGo>V!zE^WCK^rL6Z=X3k8ZBV#*#_
z8~B86oX0gkNlu7xDn6&3gxIj5iSWrTjRWK=-or%!1N)OQ(HrW5bO{iUZCJV{4^IG~
z_wTd6Yhpgt@<FXdfhuk&X261(HX8lZ9)3bc#CEO~gH9CF)ger*Ian6b+s+fNZBjW=
z4%yCz>$+HkR+!1X4qm^~34S;_B2l><^ahx=AuEt$tQsi2M907X5(y8m!z$%+D2(T7
zhA7L9p{4GgwI)X#kn`9ETe%uC*q}rQ)G=8VQa||{nx!gLvOzbOEt<%dWg)%&xnqSa
z(7c#OEFXHru<qKREop|OHRVLbxvrtUGAd3XsE~tW5E?E>s-Px4K~%FiXP!fPDk8RK
z(cm2D{I<FY3?)-Zq*_uz9{YiRGe-pp6`n!1h$gF87M--~a(b~BV9sG5&`K?&i#xVR
z8a8S;b@cnZCUwchYA$#Y=n5$P60}g$Qr9K_@Q34p^H9uN6YAKApdO55;tFzV&WU0Z
z6}DOp$^(rqVMpt#EvDvNaSY+y&*s#!L5_luWLvhSC+;EQZMRdbGH|}n&gZFRLpdFq
z1!WTkgoe`E%g+ua2F8zLFr`ppq~IV;cHst1YhWB@-b7jmvz^_w&?OCTs4ckQ@Jhei
zUOD+$;GD~T3~H6&>VV_Puw});s+5*9;*uubnOe9!0}^9R5sFQc_k*L)Xrp=_env~#
zY{;oj)pN~B99`*2NsYz^U(y{7YIK(*;__~yNH-@fY%o0dlAdWWKlqU7fh}&KooS=$
z0g!_4sQ^txL>Cf`Q-@nGzrk<%6cwnUDwzmmU<ggXlmkW5)3-2sZh=qg9VLY-X&e!9
z{lFk`QtNMx72OO@s5aTJ=uXR-`OpRA;zn$FN}==tD6MnrEvEFeoXlKWLBW!z6iNbj
z!ONjyXq}Zfec-rloZBw;>-u+#a{TrAyp6cj;4;M#8;)vKUN&}j`s-g{zdqx;|MfmU
zq1`aP%U`FqdiNKH!LJXiA9@t|^@ffI!|QLSt=c9u#9=OUysa*sMVdtHnqe<<HRNfu
zpm!!zuxM7&?c^28Otu~dPK7&X$m#(z7tH8~fRp9U*{A&x;MBNt-a47cGn6~$pbN?R
zlw+>R5J$HVXwfF*McZjPr;n!c@a1e=MfP<&cg|H8^Zjo0!JKkBX}&dvIn1P#%LWU;
zFr-;&rep|;oIDSPl0F^Z-zzAHk{Vpx_538_Rsri!HSOfC73yA_zm90RW9uX#O7gXK
z;$9>DiXdiZB=FyP*2m=w{{VoInHcLM7;85L#}JVMISL7tnSNdp#1F}!nNJXB%o9UN
ze*BZXrTIlSs?~DETz~b>fcZIZ%(xNX1Iz`yF`una0P`|$Om)OlM4jQi1%^CFQ=++I
z=H5#BMNi_5S$-Kgo}ckPo=dZ&gnBgZwfXms?ST0u?~nHW<R*1;VT|D@NGG|_R$9y_
zp1Q^r)B7>f!T$k~Ge%voBQeu?W0v+L0pdM7IAb=>rpJ>j=0{FTFn!P3oir5ZjrsWx
z_8|Uf-k1}wAky5!8}r6TwBJ5gOl~~6(`NCO<_GuP4Vdrol7^ZD!)yLvlBh}G75?SF
z^_)PNEY=}U8K=(h{%Vi-GJW6#Lm%LA5)8qF?D)hS>g6bWNp7_>>n~2!%h3?ZBVG<e
zl}M8JaQ3hJUJic<(u?=PukXQpHcQz;a8a=7Xb@h-u9<LAJiq4t`}hMcIbx1n<`>~{
z#Pk`l05B_=c>__5EI~t243#};G5yCr5145g&X^Ueh{fT=tW7zXL~dY3mJCBR^n^j8
zUKBI)`(z)V#vAj?5fW+Y$Qv_hC7tre6*J*C#Ns$P!x?i^Px8^0^8RSMuckY#y=#oB
zD7pgSNK@%diMi*)*GRYD3}?(sEB!%_95D2q**uwcGGMs4a-kthx@AIh4jEF%QU_OW
z8W7(Orj!fi4oNT-5vDW1D6G_ZkA*k<-W(kfPD5vgY&~UQTa({JIh@@o<A%Ha?kv0*
zqaW})&G2H(Zb;vcGD3wnijE!9c#3IkMi`z3FfjiFS$FmolM5BzU_sI$I&9<Asrv`6
zIi5^$W1p5>Ib=8!4ixw%FIA$u*!(x`49D8Vp#&S~4Jg<KWF#d|MWE54t#`Z)oQLaQ
z4MjP@PAMkzb%wz<x`LWPyy#FjuZC<LX+QTzW;>P$L^E`VG|HNsEM3$q4^Riimxur)
zlvH6GP*fqgrkp-P`^)&DETw$t9#c(7b+?d>(+_1?sWrLB^4W0yw&BVcyMmosp7Gz1
z;V*}=BeeN^P#!3awImZdTZxHNx#%X?;WB^jM`RP^3&C+2DmE2CgceYCGf1>2&yodx
zCQ202kfK^|`~zVW$Wb-aWDqa=`S=6;BouCtJ@)gnLb`^iiU9sONlOViWTu}G!VRK5
z;b+o(=McJ!bLYjkuPNm#A?-qNZKzR(P<rB$8XDHhoEKk6UTSP$MZ#L53t6vlQcNvH
zSc~S0KC<FAAbr5h6$Mf#SJcu>0g`!zSOTLA>8P*#Bpt>#3+A+d;SYwU4E=>t-(+7u
z<qsNTaDpBN9Xt!F2st21;^`QabK~09aO@Uw-aDwj7eZxlLg{;8F_ZD9f}1!Co~krc
zr}U5Q{Z`6XgV~ijq>5VgQATOAD=!*AFi*ya6MCR^P%#YXq$#Ga6<yV&>2awL8dae~
zVw0nr4o<(V#fC9cQBFQmT_;=6Y0QZ$A&y_3!>5v_$FtZbdLbXov4!#!9h*8-y(P^}
zr#kvW*OM%y5gDeAUTEmp`fW`b%=m!rd^nko?$uZg^0z?L4hS(!S%9txwr-|m-04Sz
zql`+Zm7N8BB*1m5CbDNvpo|q-Uts)>D`Wlrw2=@$#xPv~#%!)~e7q$KjGa4pGQOGO
zj|}F;KpTi}=}xYUyZ`PtdE>=6|54w!%!{$>Ml!cyB3C))Os0`IevBTU4gp3R*S%X9
z+X;+kxytd8;x{Se#n2umcl%ndjGY(wsUEx-OMgKOFPkf)v_GA-$&c~H1tjx&Ay-D5
zPJAKTbG=L6{EVbtP30;_`3kZgzv0UGWHx`sh?n`lQ47u_sR=8&GNwI42E(^=y?32s
z$k6lET;(|TUefyK`&`e*ng{vst68OcfRW)U#}`NJMj9P+m19{q{(AeNOZcz(zn1c6
z-1{l{R(Es#RvW$`O=h!P&-ppy{6IP{#y&&#B)K|V8Asj9U-y}J0g>Y|u8jR2AouPX
zu8fO+&!18Kiug+mu4l00^?iWxSFUoL@Fl5kIE<?tm+aP^#Im@`vG+;*&-vF|$y@uQ
zHm;1^eIzZH>w28~HnG*7=K8H>{SW{3So1oWGBud%w;EQWskr<YPo7FrN_%mYWBKIA
zfpIWb#<O2P01S;Q<7S)KgBQl+l7twdDeEwx-W57iF?Q`w-j7vW8Mog+-za{Jm)BBz
zkRRjoc_g{}t_)Acv>~Kn#yMOWdmAb+{+H{w`e@{Hq>PNK977}$|G0>&9P59!H!$Ae
z%IMRRb~56}=)8C)FeV_Lj5b8p{Cuu!zO`*2DIMkdtsWXZl=%8OcrwJBNd?ChT;<sF
zDLNR4ALHtW`fu}v^J6?9^WRr@KSk<MU*RgpfT3@YoNcah%o|Dc_G`JyvELx7xASA{
zu#x}XUHlaF`|x9&w1W5<9_Pw9Wh#y1@MG+_#m~^^#pt)*FP`JY&<D&XXOQbVIKFl#
zFuvl-xN<+51I~}}r(ra9$B!}Q9b$7n!j<vjX8w1{ic5)4|8g!2uSIA>f1_yLcc_^j
z?QSHc@60u`Nju_Fv7yaKJs%so^$Up1VojZmcglQHGQWsj^-~N@VnwEmpi3-FPrkG%
z#!vU6S-HI$(&Zyrxs7v5NLH?_Lo<B?X$iA(z1cKZJnwf>Z;wDK!Pzv>n#D16qwAqw
z-RRFRS<mI?>5307G^r?n)cC-P!yr+~Mx>G`^}vCvZe9$WKe01?OJzR~x884F5p@tl
zQDX^yrmr``Yv!G_uSk2>1QiB_DLQk8Xp}&|)isCFR+Fr66-w@eJY^Hw^UFr!oLG-@
z)tj{04<8Oa9=mtap52Tb|Hcl~^{#0a^v5t@P!w(ki5`zduQu7g<!Uale~|&>8SLno
zmiXiG$QN9Go?cX^HJRmHDu)K`YS8}Og$9gv;u+W!bF@kQmhCJ|A%JmZOOv61z{F|q
z!$F5oR{&$dD-Deb>s1Qakx2QFAvJ^e3r?#Cj`+b`l841^zf~!PhW5c*2z6bg<c=+j
zPl}GAJ7~uFK<dvf?Jk5ya=@G`0YUYUmUJeI_b9GgM+3Nw<5(yL+f~W}nGZ&qmZi?M
z-l=jCaE7o`?hDmmPB(^m>oDgZ{TDgi$F3(GULF)<I3Z;M!S$k97Q(69rw?|BV|E$4
zg1Ar(b?k>$<In}#u+6l~v)`ZMr|y?ve0V?{gUlHbOjWT`x9Q$}C@G4(kDUr%EQD@l
z1jE||19}Z7)s1%cN(cs#?KXrvXV)REk!SM2*2we1QKVh_xgu}QjV;;-V~5c1*4{P8
zK&Gg;(11Sib>MLh8{8Vs4MqCh_;Q{<p6&y_oN43va%k(vS3dDaf<;<CmKvH&lGcxo
z4xMuyEG3w%;x(-~=!h3c?G3v%FG%76NYfe^eJi+fM8ToI9&K84;~9R_;;3qq4VJ8f
z$`3ye1?5R=$-7g`pkI?Frn_Z0WB&2+uSoF%;)J1ltJ`O!9O_E;9aGMRI%Grfr6bvn
zYp1UCd}7!b;4EfWdX_^COhI|lVTb}LjZWSwl}gmNHyyCdk|;Fj$h2J2?*~3z;vU&l
zqNTE5Q681364E}6`T{{W9)Z0=EQu&-`b+1LZcH(HZnhL4X*NtC(LjeXQ%u>VH^78n
z-2$Yyn8TY&jRQi+4S-DvX5FY_dmMmq16(-t08;GT!Mp(~4F)H<0R$LvqsWn>xabW)
zmwwb~{o_kKE4A>_TVx+Qa(buI*iL`xBWJT3I-4pKUV<}DGxfCk=HgSyAgb-zYI?O8
zRMTBql?6k=u9I4QK5ggI$?$u&=~)dA98}@JKw~s1=4i%?Q_|z-ft0GDsiTq&!>&Xd
zYRePP&!FPGeEAHoH9}OJTCe~YA>=3+$;e7-;!<(GSVSV~ud`p}TBtA<T4O-hf7L)J
z^(^m|XCFeU@j7BS!MbI~!Y1qlq`0X$9WVR0Oq3JaZ90_sA_QJcoO+B(<9L%OrxG$g
z{h=cXIbIKg#^EMBInOqX0qK$>Ly{J>%CV7cr{yeb7@@+Gv#4Q!3Qx|%OUN*)=VJC-
zxE|k?5xCP}m9&H$Ps7=A9-Rllmoxq=5^1<G6Z74I97MhtxCb2}>38!K!})*qeYWB2
zt(<LWI$!ri8MZ1x90&_ZA0Mwb<L~{38Bt6*c#C3q5nvZMx{(w^9fb!DBV9ADV>UA?
zXDh*h_?E6ZFnb-wcr}AW=V{zl_La}x5lBccfaXFFXo5#UO6<7tIzav{espuGnh*ML
zp~a96O@<r;C;etrOB3(&$Am?-v`WxU!<s6?XG^4|QHArly-BO_IU-ZFtyY6=SzwY4
z7#@ZQnn!Zgmafd-2hi>ktV2@@u8d$~=w_y?mXuh<q1)^a4NXQO{^(l6jA*jWY*~Rz
zEw0B%WSjM%keyorS<wo}Af6Dq_I=K5(!#J78m<W=EilH&RgA=5XnHEAA54aYUc+u>
zSE~ipTNAm^{R^wIn)brHcBS9>k8xoJBP*r^1NC7zxQo*I1wOjRZ(_(1bKG0LubTsg
z-ZW?b(VsTbCxU6%XlK`z4I8@hfMJV;^dX!_GiLzlUmfiCtQML{iXd2|69Xiz&34^%
z+B>72_0)t$&p3jvK<gJ*NgY~w^H`cc!FbYgq5KI`gJ%;o8r4dw-Fgw?kM15x>I0bC
zZG<CRDuRK_qKpzpi0Se?bqBFN`mxoGTrQZW0byvt!M0->NsTryO{YrR4M$UG%DKFM
z7i7PkfD6-n9V1x^jknXTKO%lgL%FeKYF$Bt4+bqjK@UuO-yFQo&(w)xLXBo^OI5Ir
zz&V^0Lr>^$8r+?c6Pn8gbpZh;hC`YF64m6ofOi*+`-kuG<V)J+Oy5(?heT6FN|)RW
zq}Q0qC_wVBgU)X?PYhW>ZmPC(>z?y((C7gz1`nt%84mn9R-M#l`=R5;k^;1t3ny$w
zNf67IpyimDxZfB(p?|!P1of|HCN`GyxlnUk43176ChaCFdh#0RO3Q8q#=Be@&MwsE
z`0g2WAOGiW(h7chhWUEt^TB<L9mkTO0T_&TPyc$Jo82EsCH7^LFN7`|1Xi>RX#;ZF
z*Yk!Zlj_U)awr-B!;hgyp9`&8n~M`w37={34MoMNgtQymA|@dclX^V89{QOER~JXj
z8x0<)E-}RM08OVQl++{OVIFF5Kyk$UuEGDr5i@@4LclzMV%8?)^tQ2Jo6x9PP3rXa
z8eA_M1ei^6(ggJyN5r}WeY6E3tuuQ1Kd&ZXJf<C8&WAE>L?mF<2B(0NbRwmv4(?<6
zx(!cGw|z+|@<2AHkPkW`4H<eg%BGv#ndW`B(SL|<J_?**QmO;}KqXt&g~alBNvVq(
zrYc7CfI>rC_|CdP?W!hbj`xr10rV~C*JBZB2FVN(mJ79@C8fJyvbzQhqy8@ftQS8+
z`;W6Ufm(PDIzpljvCrh10F~&d2dSmYdfl?Ykg+L~89I_8+D2k6&70)?#Jkkk<xTpg
zcomRlwm_z~$EiDlH|hWKRHpW>NxF(0$m%u?2`4Af%XCbGt{c5fLxD=!mUPn;Oxs9n
zS@kWi1Wb-?z~xK97YK|uqPQW^O?ziwF)%(MzMKyYC6H{DIP$S9Bo6FRXWkmq_kW=%
z$Ny^e<ZBqO0D%crGMYi6F7_?2kvi#1*uoUD!3$e5WLJ@8(^OMWK>ui+OXgn1T-dqb
zM+1iuTstuQD!s$_usPF6|6ZmC6ga{D`Ve}THN~_OcBS`(IW-eMp1xcNg<MTpwoMT$
zV)BSM<xD>LF5vXc#BhRYHHJRx2FQ~<$;W%O=WHScW8=AT=;CbJ@k&yZAG0{cU^#&+
zS<ot4Hmrn;LvMlSul@_E@?hTrg^+@%Kwux{QaI^Hlic+UZG6Pq(4qF{kk;qIMw3;X
zTGKgPBiR=(vJXx%8+1&956=`~Vw{sc7yG1Ual>LRm>Xk)_Ks!K5u~&Q1_OWMmrI~1
zCb$K#2&<1Ud(cXgEl|;j^YeVR&$yTmTD@2jNJDbO?!eOQz3Km+pLZF>l!9SHQGh_J
z36ZeG)dlpEtr|r_t=F?dt;JIC9k67q8mgcO$%fN-kQqIQfp9|%q?`*tBv^k-lw<Vv
zD1`b}lneZjen%AI@3Cw>O-<C`bPDqDcm~6N^TF+YdIB#-WwW0$&?N&>FLm33io4EK
zj8k7Fum9YbZ`F`-SC@m7S*o&VB$C71z#Go&A3O4(T*?K{31pChuIXY`OM6xR7WrZ6
z$Q4!!)n9-zYeK(!0m@Ugrkv<id2bK0^v|FePPhz5Qgi{M<W+Hgpg@lgOqNT<P`Us~
zzExfDccq;au@F%`lkqB-Ly2lIDh6_(Km$xXlGICpF$OfzXTux_->4X}hhUOJT28+q
zt>K(Azlk%l91gL$t_g$5aNIzOCU)+@hJTy&iE?}mpl+!lOMsgO#*?DNUfgb!bma)%
zB>L_CQjH(CTLw>V;M?ioR?$oYxmxPj{{Ic3ZH*X5s2qy5Nh*ww0r$7&B)xg)C0d?z
zE?`7<z@Qwus14nAWl2=+_IO6aB}zZpu*C-g;}EtnP!9RNAq5JXGL%h<Nk5rC*{vsu
z8`jtn-3Z6DCD{>eK~K&3&+4B6M?!3Q!ugjPWOgV>wbaCukg02J+Ma%Gam2M&X{cbR
zhuG?0Ol3Kv=*mRvy-yqDCn<GFfL8~RuVyD9&~Nqn2Zxi;CwuKHA*)t{JI1gfJxxhH
zHEz=bev}<WIl;UhsFt-I#}S2;hSQC1eExO07<$%hQ#KV9%Dtu3#j>;fT7{@VQ_Y61
zXImuBTUAIs(R<PBWOB^CnHWwez^XW^?TU_&d=etPoKMpS17{1{30bTKgL#l+VZ+>T
zQx&xI!Mv?ckU0b0*?LDYln~{ZFgZv9k5JOtp57$fzaI21(F%EE_I}l0AC4HM=b>a)
zc?W0Ax))j^P3JF3TdKQ6&X^~^raP^@YYbFmgSG)hQc8PD%oV4QAlpWfGv>_;$aL40
zyriKQ4t|uhKI_lEa7v+!4&pl@ohID$g&3Wk@gq@z6I28=3sSsoXk#L$Hm9B|5?9SU
z_TyG6g)V9lioTpwo+tWD_r?C$s>m~4YVf3yXWFQ{1Z7Q+CHT*@L<t{!N&@X?cJL#-
zGnF_UU&`<!TsF~99OOf|Z_evLSi(_;l~YOcvn_}pLAz}k5WeI{IQeM*b>u_1Yyl}8
zo5XRYGY`^P_q+&W?|+B1b>{e$Zkc}~X)el9h65*&mV?VV%JBLq($8fm+dv5ubmUkt
za@tnX5N6LMC+=&Ggs1l=-j-uTeuQU^B&yZ#GyDic$B+gWLp%5pF8J$GAbieo9oCNW
zbtyiC&$r{f(jynq+nX2Rm@kMb_y~u+M75?JfBZ5DaIxcpCC|Lgf2%b?F(Jf-u^=b*
zp#48)_wTgY{r~_UiwFb&00000{{{d;LjnMr083_ofHgOG7<m8J2r&ZLqCor~3P5sZ
zW+tZb$@!&uCANkP0YDXwLK-S6E-r#CA`TkZK!l?+gRe`Jks*VhuQNl0uQO9J)Ee3J
zV4yYAgt1#=ZiHfuMx%<8N{5D%ii-k?)-0E623iv;!pI<i%^FnS2#H9z2sySidUOzL
p4FJB&4t6yG001A02m}BC000301^_}s0stET0{{R300000004ypUY`H}

literal 0
HcmV?d00001

diff --git a/tests/testdata/counter/bam/single.bam b/tests/testdata/counter/bam/single.bam
new file mode 100644
index 0000000000000000000000000000000000000000..6170ec7084acb3a2868ae1285e63a7abc70b457b
GIT binary patch
literal 217
zcmb2|=3rp}f&Xj_PR>jW(G0~!UsA86CL|PmD0s;8ZM)W4?Q{BCp1u!@IQX9MeB1o#
z$(N6tjf|cYaTGlNQnJzOl)nB+pQlMISqV%@{%nD4Y-|jZ_wm>{1C5kNGdqF7JE5Ur
ze))W(g1_|&r%oswP~dv<zny8q9LJiXy3DxFwVKWh%FNA+Iwo{<xJ{nm)!xKrCh+jZ
lnFp_hQd3Smc>el~z{A&PSQuEX65Ug<SRl>74Dt#X006e6Ofvug

literal 0
HcmV?d00001


From 1c1d06204d34f8cbfacbc009bfbc020c2ad1f934 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Mon, 1 May 2023 10:01:49 -0700
Subject: [PATCH 16/18] Bugfix for decollapsed outputs: proper linebreaks
 between buffered writes

---
 tiny/rna/counter/hts_parsing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tiny/rna/counter/hts_parsing.py b/tiny/rna/counter/hts_parsing.py
index 81b1b789..ec27df54 100644
--- a/tiny/rna/counter/hts_parsing.py
+++ b/tiny/rna/counter/hts_parsing.py
@@ -193,11 +193,11 @@ def _write_decollapsed_sam(self) -> None:
             if name != prev_name:
                 seq_count = int(name.rsplit(self._collapser_token, 1)[1])
 
-            aln_out.extend([aln.to_string()] * seq_count)
+            aln_out.extend([aln.to_string() + '\n'] * seq_count)
             prev_name = name
 
         with open(self._get_decollapsed_filename(), 'a') as sam_o:
-            sam_o.write('\n'.join(aln_out))
+            sam_o.writelines(aln_out)
             self._decollapsed_reads.clear()
 
 
From 7f226216d9603998de2d40106c38f6642efa40c7 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Mon, 1 May 2023 10:38:52 -0700
Subject: [PATCH 17/18] Upgrading
 GFFValidator.alignment_chroms_mismatch_heuristic() to use Pysam so that it
 can parse BAM files

---
 tiny/rna/counter/validation.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tiny/rna/counter/validation.py b/tiny/rna/counter/validation.py
index f53a4049..a068be6e 100644
--- a/tiny/rna/counter/validation.py
+++ b/tiny/rna/counter/validation.py
@@ -180,11 +180,10 @@ def alignment_chroms_mismatch_heuristic(self, sam_files: List[str], subset_size=
 
         for file in sam_files:
             file_chroms = set()
-            with open(file, 'rb') as f:
-                for i, line in zip(range(subset_size), f):
-                    if line[0] == ord('@'): continue
-                    file_chroms.add(line.split(b'\t')[2].strip().decode())
-                    if i % 10000 == 0 and len(file_chroms & self.chrom_set): break
+            reader = pysam.AlignmentFile(file)
+            for i, aln in enumerate(reader.head(subset_size)):
+                file_chroms.add(aln.reference_name)
+                if i % 10000 == 0 and len(file_chroms & self.chrom_set): break
 
             if not len(file_chroms & self.chrom_set):
                 files_wo_overlap[file] = file_chroms

From eb0287e40d21a53b14f4f37e13fa961046364e41 Mon Sep 17 00:00:00 2001
From: Alex Tate <0xalextate@gmail.com>
Date: Mon, 1 May 2023 11:30:14 -0700
Subject: [PATCH 18/18] Minor streamlining of info about BAM file requirements

---
 doc/tiny-count.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/tiny-count.md b/doc/tiny-count.md
index 3537c16c..8f367aa9 100644
--- a/doc/tiny-count.md
+++ b/doc/tiny-count.md
@@ -7,10 +7,16 @@ For an explanation of tiny-count's parameters in the Run Config and by commandli
 tiny-count offers a variety of options for refining your analysis. You might find that repeat analyses are required while tuning these options to your goals. Using the command `tiny recount`, tinyRNA will run the workflow starting at the tiny-count step using inputs from a prior end-to-end run to save time. See the [pipeline resume documentation](Pipeline.md#resuming-a-prior-analysis) for details and prerequisites.
 
 ## Running as a Standalone Tool
+Skip to [Feature Selection](#feature-selection) if you are using the tinyRNA workflow.
+
 If you would like to run tiny-count as a standalone tool, not as part of an end-to-end or resumed analysis, you can do so with the command `tiny-count`. The command has [one required argument](Parameters.md#full-tiny-count-help-string): your Paths File. Your Samples Sheet will need to list SAM or BAM alignment files rather than FASTQ files in the `Input Files` column. Alignment files from third party sources are also supported, and if they have been produced from reads collapsed by tiny-collapse or fastx, tiny-count will honor the reported read counts.
 
 #### Input File Requirements
-The SAM/BAM files provided during standalone runs _must_ be ordered so that multi-mapping read alignments are listed adjacent to one another. This adjacency convention is required for proper normalization by genomic hits. For this reason, files with ambiguous sorting or grouping (`SO:unsorted`, `SO:unknown`, or `GO:none`) will be rejected unless they were produced by an alignment tool that we recognize for following the adjacency convention. At this time, this includes Bowtie, Bowtie2, and STAR (an admittedly incomplete list).
+The SAM/BAM files provided during standalone runs _must_ be ordered so that multi-mapping read alignments are listed adjacent to one another. This adjacency convention is required for proper normalization by genomic hits. For this reason, files with ambiguous order will be rejected unless they were produced by an alignment tool that we recognize for following the adjacency convention. At this time, this includes Bowtie, Bowtie2, and STAR (an admittedly incomplete list).
+
+#### BAM File Tips
+- Use the `--no-PG` option with `samtools view` when converting alignments 
+- Pysam will issue two warnings about missing index files; they can be ignored
 
 #### Using Non-collapsed Sequence Alignments
 While third-party SAM files from non-collapsed reads are supported, there are some caveats. These files will result in substantially higher resource usage and runtimes; we strongly recommend collapsing prior to alignment. Additionally, the sequence-related stats produced by tiny-count will no longer represent _unique_ sequences. These stats will instead refer to all sequences with unique QNAMEs (that is, multi-alignment bundles still cary a sequence count of 1).