Skip to content

Commit

Permalink
Merge branch 'dev' into 'master'
Browse files Browse the repository at this point in the history
Dev into master

See merge request tron/easyquant!18
  • Loading branch information
patricksorn committed Nov 24, 2023
2 parents 3afde1c + c93eacf commit 3bf8062
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 8 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ breakpoint (spanning pairs).
- Count reads using `bp_quant count`
- Output:
- Table with read counts per input sequence
- Table with info on each read (`read_info.tsv`)

## Dependencies

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "bp_quant"
version = "0.5.0"
version = "0.5.1"
authors = [
{name = "TRON - Translational Oncology at the University Medical Center of the Johannes Gutenberg University Mainz", email = "[email protected]"},
]
Expand Down
1 change: 1 addition & 0 deletions src/bp_quant/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,5 @@ def bp_quant_cli():
try:
args.func(args)
except AttributeError as e:
print(e)
parser.parse_args(["--help"])
5 changes: 4 additions & 1 deletion src/bp_quant/plot_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,16 @@ def plot(seq_to_pos, read_info, output_path):
read_dict = {}

with open(read_info) as inf:
next(inf)
for line in inf:
elements = line.rstrip().split("\t")
read_name = elements[0]
read_group = elements[1]
seq_name = elements[2]
start = int(elements[3])
stop = int(elements[4])
read_type = elements[5]
mismatches = int(elements[5])
read_type = elements[6]

if seq_name not in read_dict:
read_dict[seq_name] = {}
Expand Down Expand Up @@ -137,6 +139,7 @@ def add_plot_reads_args(parser):
help="Output file",
required=True
)
parser.set_defaults(func=plot_reads_command)


def plot_reads_command(args):
Expand Down
33 changes: 28 additions & 5 deletions src/bp_quant/requantify.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ def classify_read(aln_start, aln_stop, aln_pairs, intervals, allow_mismatches, b
# define match bases for get_aligned_pairs()
MATCH_BASES = ['A', 'C', 'G', 'T']

read_info = {"junc": False, "within": False, "interval": "", "anchor": 0}
read_info = {"junc": False, "within": False, "interval": "", "anchor": 0, "nm_in_bp_area": 0}

for (interval_name, ref_start, ref_stop) in intervals:
# Check if read spans ref start
if aln_start <= ref_stop - bp_dist and aln_stop >= ref_stop + bp_dist:

num_mismatches = 0
# test that read maps exact (or no del/ins) in pos +/- bp_dist

reg_start = ref_stop - bp_dist
Expand All @@ -90,12 +90,16 @@ def classify_read(aln_start, aln_stop, aln_pairs, intervals, allow_mismatches, b
# test if reference sequence are all capital (means match) according
# to https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.get_aligned_pairs
aln_seq = [s for (q, r, s) in aln_pairs if r is not None and reg_start <= r and r < reg_end]
no_snp = all(s in MATCH_BASES for s in aln_seq)
# Get number of mismatches for this read
match_list = [s in MATCH_BASES for s in aln_seq]
no_snp = all(match_list)
num_mismatches = match_list.count(False)
if (no_ins_or_del and no_snp) or allow_mismatches:
anchor = min(aln_stop - ref_stop, ref_stop - aln_start)
read_info["junc"] = True
read_info["anchor"] = anchor
read_info["interval"] = interval_name
read_info["nm_in_bp_area"] = num_mismatches


# read maps completely within the interval
Expand All @@ -117,6 +121,15 @@ def __init__(self, seq_table_file, bam_file, output_path, bp_dist, allow_mismatc
self.quant_file = os.path.join(output_path, "quantification.tsv")
self.reads_file = os.path.join(output_path, "read_info.tsv.gz")
self.reads_out = gzip.open(self.reads_file, "wb")
self.reads_out.write(("\t".join([
"name",
"mate",
"reference",
"start",
"end",
"num_mismatches_in_bp_area",
"classification"
]) + "\n").encode())
self.bp_dist = bp_dist
self.allow_mismatches = allow_mismatches
self.interval_mode = interval_mode
Expand Down Expand Up @@ -382,8 +395,18 @@ def quantify(self, r1, r2):
r1_type = "softjunc"
if not r2_type:
r2_type = "softjunc"
self.reads_out.write("{}\t{}\t{}\t{}\t{}\t{}\n".format(read_name, "R1", seq_name, r1_start, r1_stop, r1_type).encode())
self.reads_out.write("{}\t{}\t{}\t{}\t{}\t{}\n".format(read_name, "R2", seq_name, r2_start, r2_stop, r2_type).encode())
r1_mismatches = r1_info["nm_in_bp_area"]
r2_mismatches = r2_info["nm_in_bp_area"]
self.reads_out.write(
"{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
read_name, "R1", seq_name, r1_start, r1_stop, r1_mismatches, r1_type
).encode()
)
self.reads_out.write(
"{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(
read_name, "R2", seq_name, r2_start, r2_stop, r2_mismatches, r2_type
).encode()
)


def write_results(self):
Expand Down
2 changes: 1 addition & 1 deletion src/bp_quant/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
version_info = (0, 5, 0)
version_info = (0, 5, 1)
version = '.'.join(str(c) for c in version_info)

0 comments on commit 3bf8062

Please sign in to comment.