Skip to content

Commit

Permalink
Python3 conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
Florian Trigodet authored and Florian Trigodet committed Jan 14, 2020
1 parent fe45f3f commit ad4dc7a
Show file tree
Hide file tree
Showing 46 changed files with 390 additions and 389 deletions.
18 changes: 9 additions & 9 deletions Oligotyping/lib/b6lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

QUERY_ID, SUBJECT_ID, IDENTITY, ALIGNMENT_LENGTH,\
MISMATCHES, GAPS, Q_START, Q_END, S_START, S_END,\
E_VALUE, BIT_SCORE, Q_LEN, S_LEN = range(0, 14)
E_VALUE, BIT_SCORE, Q_LEN, S_LEN = list(range(0, 14))


class B6Entry:
Expand Down Expand Up @@ -167,11 +167,11 @@ def print_b6_file_stats(self):
numpy.min(self.matrix[x]),
numpy.max(self.matrix[x]))

print
print()
TABULAR('Total Hits', pretty_print(len(self.matrix[IDENTITY])))
print
print ' mean std min max'
print
print()
print(' mean std min max')
print()
TABULAR('Identity', INFO(IDENTITY))
TABULAR('Alignment Length', INFO(ALIGNMENT_LENGTH))
TABULAR('Mismatches', INFO(MISMATCHES))
Expand All @@ -182,7 +182,7 @@ def print_b6_file_stats(self):
TABULAR('Target End', INFO(S_END))
TABULAR('E-Value', INFO(E_VALUE))
TABULAR('Bit Score', INFO(BIT_SCORE))
print
print()

def visualize_b6_output(self, title_hint, Q_LENGTH = 101):
if self.matrix == []:
Expand Down Expand Up @@ -236,12 +236,12 @@ def _setp(b, c = 'red'):
ax1.plot(p1, c = 'green', label = 'Alignment Start Position')
ax1.plot(p2, c = 'black', linewidth = 3)
ax1.plot(p2, c = 'red', label = 'Alignment End Position')
plt.fill_between(range(0, len(p1)), p1, y2 = 0, color = 'black', alpha = 0.5)
plt.fill_between(range(0, len(p2)), p2, y2 = 0, color = 'black', alpha = 0.5)
plt.fill_between(list(range(0, len(p1))), p1, y2 = 0, color = 'black', alpha = 0.5)
plt.fill_between(list(range(0, len(p2))), p2, y2 = 0, color = 'black', alpha = 0.5)

plt.ylabel('Percent of Hits')
plt.xlabel('Position')
plt.xticks(range(0, Q_LENGTH, Q_LENGTH / 100), range(1, Q_LENGTH + 1, Q_LENGTH / 100), rotation=90, size='xx-small')
plt.xticks(list(range(0, Q_LENGTH, Q_LENGTH / 100)), list(range(1, Q_LENGTH + 1, Q_LENGTH / 100)), rotation=90, size='xx-small')
plt.yticks([t for t in range(0, 101, 10)], ['%s%%' % t for t in range(0, 101, 10)], size='xx-small')
plt.ylim(ymin = 0, ymax = 100)
plt.xlim(xmin = 0, xmax = Q_LENGTH - 1)
Expand Down
48 changes: 24 additions & 24 deletions Oligotyping/lib/decomposer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import time
import numpy
import shutil
import cPickle
import pickle
import logging

import Oligotyping as o
Expand Down Expand Up @@ -140,9 +140,9 @@ def check_apps(self):
try:
blast.LocalBLAST(None, None, None)
except blast.ModuleVersionError:
raise utils.ConfigError, blast.version_error_text
raise utils.ConfigError(blast.version_error_text)
except blast.ModuleBinaryError:
raise utils.ConfigError, blast.missing_binary_error_text
raise utils.ConfigError(blast.missing_binary_error_text)

# FIXME: check R modules here.

Expand All @@ -156,10 +156,10 @@ def check_dirs(self):
try:
os.makedirs(self.output_directory)
except:
raise utils.ConfigError, "Output directory does not exist (attempt to create one failed as well): '%s'" % \
(self.output_directory)
raise utils.ConfigError("Output directory does not exist (attempt to create one failed as well): '%s'" % \
(self.output_directory))
if not os.access(self.output_directory, os.W_OK):
raise utils.ConfigError, "You do not have write permission for the output directory: '%s'" % self.output_directory
raise utils.ConfigError("You do not have write permission for the output directory: '%s'" % self.output_directory)

self.tmp_directory = self.generate_output_destination('TMP', directory = True)
self.nodes_directory = self.generate_output_destination('NODES', directory = True)
Expand All @@ -169,18 +169,18 @@ def check_dirs(self):

def check_input_files(self):
if (not os.path.exists(self.alignment)) or (not os.access(self.alignment, os.R_OK)):
raise utils.ConfigError, "Alignment file is not accessible: '%s'" % self.alignment
raise utils.ConfigError("Alignment file is not accessible: '%s'" % self.alignment)

if self.sample_mapping:
if (not os.path.exists(self.sample_mapping)) or (not os.access(self.sample_mapping, os.R_OK)):
raise utils.ConfigError, "Sample mapping file is not accessible: '%s'" % self.sample_mapping
raise utils.ConfigError("Sample mapping file is not accessible: '%s'" % self.sample_mapping)

samples = None
if not self.skip_check_input_file:
self.progress.new('Checking the input FASTA')
samples = utils.check_input_alignment(self.alignment, self.sample_name_separator, self.progress)
if not samples:
raise utils.ConfigError, 'Exiting.'
raise utils.ConfigError('Exiting.')
self.progress.end()

if self.sample_mapping:
Expand Down Expand Up @@ -220,8 +220,8 @@ def _init_topology(self):
self.root = self.topology.add_new_node('root', reads, root = True)

if self.root.size < self.min_actual_abundance:
raise utils.ConfigError, "The number of reads in alignment file (%d) is smaller than --min-actual-abundance (%d)" % \
(self.root.size, self.min_actual_abundance)
raise utils.ConfigError("The number of reads in alignment file (%d) is smaller than --min-actual-abundance (%d)" % \
(self.root.size, self.min_actual_abundance))

self.node_ids_to_analyze = ['root']

Expand Down Expand Up @@ -420,9 +420,9 @@ def _generate_raw_topology(self):
if node.reads[0].frequency < self.min_substantive_abundance:
if node.node_id == 'root':
self.progress.end()
raise utils.ConfigError, "Number of unique reads in the root node (%d) is less than the declared minimum (%d)." \
raise utils.ConfigError("Number of unique reads in the root node (%d) is less than the declared minimum (%d)." \
% (node.reads[0].frequency,
self.min_substantive_abundance)
self.min_substantive_abundance))

else:
# remove the node and store its content.
Expand Down Expand Up @@ -536,7 +536,7 @@ def _generate_raw_topology(self):

oligo = ''.join([read.seq[d] for d in node.discriminants])

if new_nodes_dict.has_key(oligo):
if oligo in new_nodes_dict:
new_nodes_dict[oligo]['reads'].append(read)
else:
new_node_id = self.topology.get_new_node_id()
Expand All @@ -546,7 +546,7 @@ def _generate_raw_topology(self):


# all reads in the parent node are analyzed. time to add spawned nodes into the topology.
oligos = new_nodes_dict.keys()
oligos = list(new_nodes_dict.keys())
len_oligos = len(oligos)
for i in range(0, len_oligos):
self.progress.update(p + ' / new nodes %d of %d ' % (i + 1, len_oligos))
Expand Down Expand Up @@ -678,7 +678,7 @@ def _refine_topology(self):

abundant_reads_in_outlier_bin = []

if self.topology.outliers.has_key('maximum_variation_allowed_reason'):
if 'maximum_variation_allowed_reason' in self.topology.outliers:
abundant_reads_in_outlier_bin = [read_object for read_object in \
self.topology.outliers['maximum_variation_allowed_reason'] \
if read_object.frequency > self.min_substantive_abundance]
Expand Down Expand Up @@ -1075,7 +1075,7 @@ def get_dict_entry_tmpl():

self.progress.update('Processing reads that were represented in results')
for sample in self.samples_dict:
if not read_distribution_dict.has_key(sample):
if sample not in read_distribution_dict:
read_distribution_dict[sample] = get_dict_entry_tmpl()

read_distribution_dict[sample]['represented_reads'] = sum(self.samples_dict[sample].values())
Expand All @@ -1086,7 +1086,7 @@ def get_dict_entry_tmpl():
for read_id in read_object.ids:
sample = utils.get_sample_name_from_defline(read_id, self.sample_name_separator)

if not read_distribution_dict.has_key(sample):
if sample not in read_distribution_dict:
read_distribution_dict[sample] = get_dict_entry_tmpl()

read_distribution_dict[sample][reason] += 1
Expand Down Expand Up @@ -1166,11 +1166,11 @@ def _generate_samples_dict(self):
for read_id in read.ids:
sample = utils.get_sample_name_from_defline(read_id, self.sample_name_separator)

if not self.samples_dict.has_key(sample):
if sample not in self.samples_dict:
self.samples_dict[sample] = {}
self.samples.append(sample)

if self.samples_dict[sample].has_key(node_id):
if node_id in self.samples_dict[sample]:
self.samples_dict[sample][node_id] += 1
else:
self.samples_dict[sample][node_id] = 1
Expand Down Expand Up @@ -1238,7 +1238,7 @@ def _store_topology_dict(self):
self.progress.end()

topology_dict_file_path = self.generate_output_destination('TOPOLOGY-LIGHT.cPickle')
cPickle.dump(topology_dict, open(topology_dict_file_path, 'w'))
pickle.dump(topology_dict, open(topology_dict_file_path, 'w'))
self.run.info('topology_light_dict', topology_dict_file_path)


Expand Down Expand Up @@ -1347,7 +1347,7 @@ def _generate_html_output(self):
from Oligotyping.utils.html.error import HTMLError
try:
from Oligotyping.utils.html.for_decomposition import generate_html_output
except HTMLError, e:
except HTMLError as e:
sys.stdout.write('\n\n\t%s\n\n' % e)
sys.exit()

Expand Down Expand Up @@ -1383,7 +1383,7 @@ def _generate_default_figures(self):

figures_dict = generate_default_figures(self)
figures_dict_file_path = self.generate_output_destination("FIGURES.cPickle")
cPickle.dump(figures_dict, open(figures_dict_file_path, 'w'))
pickle.dump(figures_dict, open(figures_dict_file_path, 'w'))

self.progress.end()
self.run.info('figures_dict_file_path', figures_dict_file_path)
Expand All @@ -1397,7 +1397,7 @@ def _generate_exclusive_figures(self):

exclusive_figures_dict = generate_exclusive_figures(self)
exclusive_figures_dict_file_path = self.generate_output_destination("EXCLUSIVE-FIGURES.cPickle")
cPickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w'))
pickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w'))

self.progress.end()
self.run.info('exclusive_figures_dict_file_path', exclusive_figures_dict_file_path)
Expand Down
10 changes: 5 additions & 5 deletions Oligotyping/lib/entropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued
progress.new('Processing the Alignment')

# processing the alignment file..
while alignment.next():
while next(alignment):
# check the alignment lengths along the way:
if previous_alignment_length:
if previous_alignment_length != len(alignment.seq):
raise EntropyError, "Not all reads have the same length."
raise EntropyError("Not all reads have the same length.")

# print out process info
if alignment.pos % 10000 == 0:
Expand All @@ -96,7 +96,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued
try:
frequency = freq_from_defline(alignment.id)
except IndexError:
raise EntropyError, "Reads declared as unique, but they do not have proper deflines. See help for --uniqued."
raise EntropyError("Reads declared as unique, but they do not have proper deflines. See help for --uniqued.")

for i in range(0, frequency):
lines.append(alignment.seq)
Expand Down Expand Up @@ -124,7 +124,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued

if weighted:
if not qual_stats_dict:
raise EntropyError, "Weighted entropy is selected, but no qual stats are provided"
raise EntropyError("Weighted entropy is selected, but no qual stats are provided")
e = entropy(column, l_qual = qual_stats_dict[position], amino_acid_sequences = amino_acid_sequences)
else:
e = entropy(column, amino_acid_sequences = amino_acid_sequences)
Expand Down Expand Up @@ -164,7 +164,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued

def quick_entropy(l, amino_acid_sequences = False):
if len(set([len(x) for x in l])) != 1:
raise EntropyError, "Not all vectors have the same length."
raise EntropyError("Not all vectors have the same length.")

entropy_tpls = []
for position in range(0, len(l[0])):
Expand Down
20 changes: 10 additions & 10 deletions Oligotyping/lib/fastalib.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def write_seq(self, seq, split = True):
self.output_file_obj.write('%s\n' % seq)

def split(self, sequence, piece_length = 80):
ticks = range(0, len(sequence), piece_length) + [len(sequence)]
ticks = list(range(0, len(sequence), piece_length)) + [len(sequence)]
return '\n'.join([sequence[ticks[x]:ticks[x + 1]] for x in range(0, len(ticks) - 1)])

def close(self):
Expand All @@ -50,7 +50,7 @@ def __init__(self, f_name):

self.fasta = SequenceSource(f_name)

while self.fasta.next():
while next(self.fasta):
if self.fasta.pos % 1000 == 0 or self.fasta.pos == 1:
sys.stderr.write('\r[fastalib] Reading FASTA into memory: %s' % (self.fasta.pos))
sys.stderr.flush()
Expand Down Expand Up @@ -94,7 +94,7 @@ def __init__(self, fasta_file_path, lazy_init = True, unique = False, allow_mixe

def init_unique_hash(self):
while self.next_regular():
hash = hashlib.sha1(self.seq.upper()).hexdigest()
hash = hashlib.sha1(self.seq.upper().encode('utf-8')).hexdigest()
if hash in self.unique_hash_dict:
self.unique_hash_dict[hash]['ids'].append(self.id)
self.unique_hash_dict[hash]['count'] += 1
Expand All @@ -111,7 +111,7 @@ def init_unique_hash(self):
self.total_unique = len(self.unique_hash_dict)
self.reset()

def next(self):
def __next__(self):
if self.unique:
return self.next_unique()
else:
Expand Down Expand Up @@ -159,7 +159,7 @@ def next_regular(self):

def get_seq_by_read_id(self, read_id):
self.reset()
while self.next():
while next(self):
if self.id == read_id:
return self.seq

Expand All @@ -184,7 +184,7 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len

self.reset()

while self.next():
while next(self):
if self.pos % 10000 == 0 or self.pos == 1:
sys.stderr.write('\r[fastalib] Reading: %s' % (self.pos))
sys.stderr.flush()
Expand Down Expand Up @@ -213,7 +213,7 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len
plt.subplots_adjust(left=0.05, bottom = 0.03, top = 0.95, right = 0.98)

plt.plot(seq_len_distribution, color = 'black', alpha = 0.3)
plt.fill_between(range(0, max_seq_len + 1), seq_len_distribution, y2 = 0, color = 'black', alpha = 0.15)
plt.fill_between(list(range(0, max_seq_len + 1)), seq_len_distribution, y2 = 0, color = 'black', alpha = 0.15)
plt.ylabel('number of sequences')
plt.xlabel('sequence length')

Expand All @@ -223,8 +223,8 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len
if ytickstep == None:
ytickstep = max(seq_len_distribution) / 20 or 1

plt.xticks(range(xtickstep, max_seq_len + 1, xtickstep), rotation=90, size='xx-small')
plt.yticks(range(0, max(seq_len_distribution) + 1, ytickstep),
plt.xticks(list(range(xtickstep, max_seq_len + 1, xtickstep)), rotation=90, size='xx-small')
plt.yticks(list(range(0, max(seq_len_distribution) + 1, ytickstep)),
[y for y in range(0, max(seq_len_distribution) + 1, ytickstep)],
size='xx-small')
plt.xlim(xmin = 0, xmax = max_seq_len)
Expand Down Expand Up @@ -281,7 +281,7 @@ def __init__(self, quals_file_path, lazy_init = True):
self.reset()


def next(self):
def __next__(self):
self.id = self.file_pointer.readline()[1:].strip()
self.quals = None
self.quals_int = None
Expand Down
Loading

0 comments on commit ad4dc7a

Please sign in to comment.