Python3 conversion

FlorianTrigodet · Jan 14, 2020 · ad4dc7a · ad4dc7a
1 parent fe45f3f
commit ad4dc7a
Show file tree

Hide file tree

Showing 46 changed files with 390 additions and 389 deletions.
diff --git a/Oligotyping/lib/b6lib.py b/Oligotyping/lib/b6lib.py
@@ -25,7 +25,7 @@
 
 QUERY_ID, SUBJECT_ID, IDENTITY, ALIGNMENT_LENGTH,\
 MISMATCHES, GAPS, Q_START, Q_END, S_START, S_END,\
-E_VALUE, BIT_SCORE, Q_LEN, S_LEN = range(0, 14)
+E_VALUE, BIT_SCORE, Q_LEN, S_LEN = list(range(0, 14))
 
 
 class B6Entry:
@@ -167,11 +167,11 @@ def print_b6_file_stats(self):
                                      numpy.min(self.matrix[x]),
                                      numpy.max(self.matrix[x]))
 
-        print
+        print()
         TABULAR('Total Hits', pretty_print(len(self.matrix[IDENTITY])))
-        print
-        print '                        mean       std         min         max'
-        print
+        print()
+        print('                        mean       std         min         max')
+        print()
         TABULAR('Identity', INFO(IDENTITY))
         TABULAR('Alignment Length', INFO(ALIGNMENT_LENGTH))
         TABULAR('Mismatches', INFO(MISMATCHES))
@@ -182,7 +182,7 @@ def print_b6_file_stats(self):
         TABULAR('Target End', INFO(S_END))
         TABULAR('E-Value', INFO(E_VALUE))
         TABULAR('Bit Score', INFO(BIT_SCORE))
-        print
+        print()
 
     def visualize_b6_output(self, title_hint, Q_LENGTH = 101):
         if self.matrix == []:
@@ -236,12 +236,12 @@ def _setp(b, c = 'red'):
         ax1.plot(p1, c = 'green', label = 'Alignment Start Position')
         ax1.plot(p2, c = 'black', linewidth = 3)
         ax1.plot(p2, c = 'red', label = 'Alignment End Position')
-        plt.fill_between(range(0, len(p1)), p1, y2 = 0, color = 'black', alpha = 0.5)
-        plt.fill_between(range(0, len(p2)), p2, y2 = 0, color = 'black', alpha = 0.5)
+        plt.fill_between(list(range(0, len(p1))), p1, y2 = 0, color = 'black', alpha = 0.5)
+        plt.fill_between(list(range(0, len(p2))), p2, y2 = 0, color = 'black', alpha = 0.5)
 
         plt.ylabel('Percent of Hits')
         plt.xlabel('Position')
-        plt.xticks(range(0, Q_LENGTH, Q_LENGTH / 100), range(1, Q_LENGTH + 1, Q_LENGTH / 100), rotation=90, size='xx-small')
+        plt.xticks(list(range(0, Q_LENGTH, Q_LENGTH / 100)), list(range(1, Q_LENGTH + 1, Q_LENGTH / 100)), rotation=90, size='xx-small')
         plt.yticks([t for t in range(0, 101, 10)], ['%s%%' % t for t in range(0, 101, 10)], size='xx-small')
         plt.ylim(ymin = 0, ymax = 100)
         plt.xlim(xmin = 0, xmax = Q_LENGTH - 1)

diff --git a/Oligotyping/lib/decomposer.py b/Oligotyping/lib/decomposer.py
@@ -16,7 +16,7 @@
 import time
 import numpy
 import shutil
-import cPickle
+import pickle
 import logging
 
 import Oligotyping as o
@@ -140,9 +140,9 @@ def check_apps(self):
         try:
             blast.LocalBLAST(None, None, None)
         except blast.ModuleVersionError:
-            raise utils.ConfigError, blast.version_error_text
+            raise utils.ConfigError(blast.version_error_text)
         except blast.ModuleBinaryError:
-            raise utils.ConfigError, blast.missing_binary_error_text
+            raise utils.ConfigError(blast.missing_binary_error_text)
 
         # FIXME: check R modules here.
 
@@ -156,10 +156,10 @@ def check_dirs(self):
             try:
                 os.makedirs(self.output_directory)
             except:
-                raise utils.ConfigError, "Output directory does not exist (attempt to create one failed as well): '%s'" % \
-                                                                          (self.output_directory)
+                raise utils.ConfigError("Output directory does not exist (attempt to create one failed as well): '%s'" % \
+                                                                          (self.output_directory))
         if not os.access(self.output_directory, os.W_OK):
-            raise utils.ConfigError, "You do not have write permission for the output directory: '%s'" % self.output_directory
+            raise utils.ConfigError("You do not have write permission for the output directory: '%s'" % self.output_directory)
 
         self.tmp_directory = self.generate_output_destination('TMP', directory = True)
         self.nodes_directory = self.generate_output_destination('NODES', directory = True)
@@ -169,18 +169,18 @@ def check_dirs(self):
 
     def check_input_files(self):
         if (not os.path.exists(self.alignment)) or (not os.access(self.alignment, os.R_OK)):
-            raise utils.ConfigError, "Alignment file is not accessible: '%s'" % self.alignment
+            raise utils.ConfigError("Alignment file is not accessible: '%s'" % self.alignment)
 
         if self.sample_mapping:
             if (not os.path.exists(self.sample_mapping)) or (not os.access(self.sample_mapping, os.R_OK)):
-                raise utils.ConfigError, "Sample mapping file is not accessible: '%s'" % self.sample_mapping
+                raise utils.ConfigError("Sample mapping file is not accessible: '%s'" % self.sample_mapping)
 
         samples = None
         if not self.skip_check_input_file:
             self.progress.new('Checking the input FASTA')
             samples = utils.check_input_alignment(self.alignment, self.sample_name_separator, self.progress)
             if not samples:
-                raise utils.ConfigError, 'Exiting.'
+                raise utils.ConfigError('Exiting.')
             self.progress.end()
 
         if self.sample_mapping:
@@ -220,8 +220,8 @@ def _init_topology(self):
         self.root = self.topology.add_new_node('root', reads, root = True)
 
         if self.root.size < self.min_actual_abundance:
-            raise utils.ConfigError, "The number of reads in alignment file (%d) is smaller than --min-actual-abundance (%d)" % \
-                                                                (self.root.size, self.min_actual_abundance)
+            raise utils.ConfigError("The number of reads in alignment file (%d) is smaller than --min-actual-abundance (%d)" % \
+                                                                (self.root.size, self.min_actual_abundance))
 
         self.node_ids_to_analyze = ['root']
 
@@ -420,9 +420,9 @@ def _generate_raw_topology(self):
                 if node.reads[0].frequency < self.min_substantive_abundance:
                     if node.node_id == 'root':
                         self.progress.end()
-                        raise utils.ConfigError, "Number of unique reads in the root node (%d) is less than the declared minimum (%d)." \
+                        raise utils.ConfigError("Number of unique reads in the root node (%d) is less than the declared minimum (%d)." \
                                                 % (node.reads[0].frequency,
-                                                   self.min_substantive_abundance)
+                                                   self.min_substantive_abundance))
 
                     else:
                         # remove the node and store its content.
@@ -536,7 +536,7 @@ def _generate_raw_topology(self):
 
                     oligo = ''.join([read.seq[d] for d in node.discriminants])
 
-                    if new_nodes_dict.has_key(oligo):
+                    if oligo in new_nodes_dict:
                         new_nodes_dict[oligo]['reads'].append(read)
                     else:
                         new_node_id = self.topology.get_new_node_id()
@@ -546,7 +546,7 @@ def _generate_raw_topology(self):
 
 
                 # all reads in the parent node are analyzed. time to add spawned nodes into the topology.
-                oligos = new_nodes_dict.keys()
+                oligos = list(new_nodes_dict.keys())
                 len_oligos = len(oligos)
                 for i in range(0, len_oligos):
                     self.progress.update(p + ' / new nodes %d of %d ' % (i + 1, len_oligos))
@@ -678,7 +678,7 @@ def _refine_topology(self):
 
                 abundant_reads_in_outlier_bin = []
 
-                if self.topology.outliers.has_key('maximum_variation_allowed_reason'):    
+                if 'maximum_variation_allowed_reason' in self.topology.outliers:    
                     abundant_reads_in_outlier_bin = [read_object for read_object in \
                                                         self.topology.outliers['maximum_variation_allowed_reason'] \
                                                             if read_object.frequency > self.min_substantive_abundance]
@@ -1075,7 +1075,7 @@ def get_dict_entry_tmpl():
 
         self.progress.update('Processing reads that were represented in results')
         for sample in self.samples_dict:
-            if not read_distribution_dict.has_key(sample):
+            if sample not in read_distribution_dict:
                 read_distribution_dict[sample] = get_dict_entry_tmpl()
 
             read_distribution_dict[sample]['represented_reads'] = sum(self.samples_dict[sample].values())
@@ -1086,7 +1086,7 @@ def get_dict_entry_tmpl():
                 for read_id in read_object.ids:
                     sample = utils.get_sample_name_from_defline(read_id, self.sample_name_separator)
 
-                    if not read_distribution_dict.has_key(sample):
+                    if sample not in read_distribution_dict:
                         read_distribution_dict[sample] = get_dict_entry_tmpl()
 
                     read_distribution_dict[sample][reason] += 1
@@ -1166,11 +1166,11 @@ def _generate_samples_dict(self):
                 for read_id in read.ids:
                     sample = utils.get_sample_name_from_defline(read_id, self.sample_name_separator)
 
-                    if not self.samples_dict.has_key(sample):
+                    if sample not in self.samples_dict:
                         self.samples_dict[sample] = {}
                         self.samples.append(sample)
 
-                    if self.samples_dict[sample].has_key(node_id):
+                    if node_id in self.samples_dict[sample]:
                         self.samples_dict[sample][node_id] += 1
                     else:
                         self.samples_dict[sample][node_id] = 1
@@ -1238,7 +1238,7 @@ def _store_topology_dict(self):
         self.progress.end()
 
         topology_dict_file_path = self.generate_output_destination('TOPOLOGY-LIGHT.cPickle')
-        cPickle.dump(topology_dict, open(topology_dict_file_path, 'w'))
+        pickle.dump(topology_dict, open(topology_dict_file_path, 'w'))
         self.run.info('topology_light_dict', topology_dict_file_path)
 
 
@@ -1347,7 +1347,7 @@ def _generate_html_output(self):
         from Oligotyping.utils.html.error import HTMLError
         try:
             from Oligotyping.utils.html.for_decomposition import generate_html_output
-        except HTMLError, e:
+        except HTMLError as e:
             sys.stdout.write('\n\n\t%s\n\n' % e)
             sys.exit()
 
@@ -1383,7 +1383,7 @@ def _generate_default_figures(self):
 
         figures_dict = generate_default_figures(self)
         figures_dict_file_path = self.generate_output_destination("FIGURES.cPickle")
-        cPickle.dump(figures_dict, open(figures_dict_file_path, 'w'))
+        pickle.dump(figures_dict, open(figures_dict_file_path, 'w'))
 
         self.progress.end()
         self.run.info('figures_dict_file_path', figures_dict_file_path)
@@ -1397,7 +1397,7 @@ def _generate_exclusive_figures(self):
 
         exclusive_figures_dict = generate_exclusive_figures(self)
         exclusive_figures_dict_file_path = self.generate_output_destination("EXCLUSIVE-FIGURES.cPickle")
-        cPickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w'))
+        pickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w'))
 
         self.progress.end()
         self.run.info('exclusive_figures_dict_file_path', exclusive_figures_dict_file_path)

diff --git a/Oligotyping/lib/entropy.py b/Oligotyping/lib/entropy.py
@@ -79,11 +79,11 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued
     progress.new('Processing the Alignment')
 
     # processing the alignment file..
-    while alignment.next():
+    while next(alignment):
         # check the alignment lengths along the way:
         if previous_alignment_length:
             if previous_alignment_length != len(alignment.seq):
-                raise EntropyError, "Not all reads have the same length."
+                raise EntropyError("Not all reads have the same length.")
 
         # print out process info
         if alignment.pos % 10000 == 0:
@@ -96,7 +96,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued
             try:
                 frequency = freq_from_defline(alignment.id)
             except IndexError:
-                raise EntropyError, "Reads declared as unique, but they do not have proper deflines. See help for --uniqued."
+                raise EntropyError("Reads declared as unique, but they do not have proper deflines. See help for --uniqued.")
 
             for i in range(0, frequency):
                 lines.append(alignment.seq)
@@ -124,7 +124,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued
 
             if weighted:
                 if not qual_stats_dict: 
-                    raise EntropyError, "Weighted entropy is selected, but no qual stats are provided"
+                    raise EntropyError("Weighted entropy is selected, but no qual stats are provided")
                 e = entropy(column, l_qual = qual_stats_dict[position], amino_acid_sequences = amino_acid_sequences)
             else:
                 e = entropy(column, amino_acid_sequences = amino_acid_sequences)
@@ -164,7 +164,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued
 
 def quick_entropy(l, amino_acid_sequences = False):
     if len(set([len(x) for x in l])) != 1:
-        raise EntropyError, "Not all vectors have the same length."
+        raise EntropyError("Not all vectors have the same length.")
 
     entropy_tpls = []
     for position in range(0, len(l[0])):

diff --git a/Oligotyping/lib/fastalib.py b/Oligotyping/lib/fastalib.py
@@ -36,7 +36,7 @@ def write_seq(self, seq, split = True):
         self.output_file_obj.write('%s\n' % seq)
 
     def split(self, sequence, piece_length = 80):
-        ticks = range(0, len(sequence), piece_length) + [len(sequence)]
+        ticks = list(range(0, len(sequence), piece_length)) + [len(sequence)]
         return '\n'.join([sequence[ticks[x]:ticks[x + 1]] for x in range(0, len(ticks) - 1)])
 
     def close(self):
@@ -50,7 +50,7 @@ def __init__(self, f_name):
 
         self.fasta = SequenceSource(f_name)
 
-        while self.fasta.next():
+        while next(self.fasta):
             if self.fasta.pos % 1000 == 0 or self.fasta.pos == 1:
                 sys.stderr.write('\r[fastalib] Reading FASTA into memory: %s' % (self.fasta.pos))
                 sys.stderr.flush()
@@ -94,7 +94,7 @@ def __init__(self, fasta_file_path, lazy_init = True, unique = False, allow_mixe
 
     def init_unique_hash(self):
         while self.next_regular():
-            hash = hashlib.sha1(self.seq.upper()).hexdigest()
+            hash = hashlib.sha1(self.seq.upper().encode('utf-8')).hexdigest()
             if hash in self.unique_hash_dict:
                 self.unique_hash_dict[hash]['ids'].append(self.id)
                 self.unique_hash_dict[hash]['count'] += 1
@@ -111,7 +111,7 @@ def init_unique_hash(self):
         self.total_unique = len(self.unique_hash_dict)
         self.reset()
 
-    def next(self):
+    def __next__(self):
         if self.unique:
             return self.next_unique()
         else:
@@ -159,7 +159,7 @@ def next_regular(self):
 
     def get_seq_by_read_id(self, read_id):
         self.reset()
-        while self.next():
+        while next(self):
             if self.id == read_id:
                 return self.seq
 
@@ -184,7 +184,7 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len
 
         self.reset()
 
-        while self.next():
+        while next(self):
             if self.pos % 10000 == 0 or self.pos == 1:
                 sys.stderr.write('\r[fastalib] Reading: %s' % (self.pos))
                 sys.stderr.flush()
@@ -213,7 +213,7 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len
         plt.subplots_adjust(left=0.05, bottom = 0.03, top = 0.95, right = 0.98)
 
         plt.plot(seq_len_distribution, color = 'black', alpha = 0.3)
-        plt.fill_between(range(0, max_seq_len + 1), seq_len_distribution, y2 = 0, color = 'black', alpha = 0.15)
+        plt.fill_between(list(range(0, max_seq_len + 1)), seq_len_distribution, y2 = 0, color = 'black', alpha = 0.15)
         plt.ylabel('number of sequences')
         plt.xlabel('sequence length')
 
@@ -223,8 +223,8 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len
         if ytickstep == None:
             ytickstep = max(seq_len_distribution) / 20 or 1
 
-        plt.xticks(range(xtickstep, max_seq_len + 1, xtickstep), rotation=90, size='xx-small')
-        plt.yticks(range(0, max(seq_len_distribution) + 1, ytickstep),
+        plt.xticks(list(range(xtickstep, max_seq_len + 1, xtickstep)), rotation=90, size='xx-small')
+        plt.yticks(list(range(0, max(seq_len_distribution) + 1, ytickstep)),
                    [y for y in range(0, max(seq_len_distribution) + 1, ytickstep)],
                    size='xx-small')
         plt.xlim(xmin = 0, xmax = max_seq_len)
@@ -281,7 +281,7 @@ def __init__(self, quals_file_path, lazy_init = True):
             self.reset()
 
 
-    def next(self):
+    def __next__(self):
         self.id = self.file_pointer.readline()[1:].strip()
         self.quals = None
         self.quals_int = None