diff --git a/Oligotyping/lib/b6lib.py b/Oligotyping/lib/b6lib.py index ac6c7f8..b5b761d 100644 --- a/Oligotyping/lib/b6lib.py +++ b/Oligotyping/lib/b6lib.py @@ -25,7 +25,7 @@ QUERY_ID, SUBJECT_ID, IDENTITY, ALIGNMENT_LENGTH,\ MISMATCHES, GAPS, Q_START, Q_END, S_START, S_END,\ -E_VALUE, BIT_SCORE, Q_LEN, S_LEN = range(0, 14) +E_VALUE, BIT_SCORE, Q_LEN, S_LEN = list(range(0, 14)) class B6Entry: @@ -167,11 +167,11 @@ def print_b6_file_stats(self): numpy.min(self.matrix[x]), numpy.max(self.matrix[x])) - print + print() TABULAR('Total Hits', pretty_print(len(self.matrix[IDENTITY]))) - print - print ' mean std min max' - print + print() + print(' mean std min max') + print() TABULAR('Identity', INFO(IDENTITY)) TABULAR('Alignment Length', INFO(ALIGNMENT_LENGTH)) TABULAR('Mismatches', INFO(MISMATCHES)) @@ -182,7 +182,7 @@ def print_b6_file_stats(self): TABULAR('Target End', INFO(S_END)) TABULAR('E-Value', INFO(E_VALUE)) TABULAR('Bit Score', INFO(BIT_SCORE)) - print + print() def visualize_b6_output(self, title_hint, Q_LENGTH = 101): if self.matrix == []: @@ -236,12 +236,12 @@ def _setp(b, c = 'red'): ax1.plot(p1, c = 'green', label = 'Alignment Start Position') ax1.plot(p2, c = 'black', linewidth = 3) ax1.plot(p2, c = 'red', label = 'Alignment End Position') - plt.fill_between(range(0, len(p1)), p1, y2 = 0, color = 'black', alpha = 0.5) - plt.fill_between(range(0, len(p2)), p2, y2 = 0, color = 'black', alpha = 0.5) + plt.fill_between(list(range(0, len(p1))), p1, y2 = 0, color = 'black', alpha = 0.5) + plt.fill_between(list(range(0, len(p2))), p2, y2 = 0, color = 'black', alpha = 0.5) plt.ylabel('Percent of Hits') plt.xlabel('Position') - plt.xticks(range(0, Q_LENGTH, Q_LENGTH / 100), range(1, Q_LENGTH + 1, Q_LENGTH / 100), rotation=90, size='xx-small') + plt.xticks(list(range(0, Q_LENGTH, Q_LENGTH / 100)), list(range(1, Q_LENGTH + 1, Q_LENGTH / 100)), rotation=90, size='xx-small') plt.yticks([t for t in range(0, 101, 10)], ['%s%%' % t for t in range(0, 101, 10)], size='xx-small') plt.ylim(ymin = 0, ymax = 100) plt.xlim(xmin = 0, xmax = Q_LENGTH - 1) diff --git a/Oligotyping/lib/decomposer.py b/Oligotyping/lib/decomposer.py index 5a7f22c..465e90d 100644 --- a/Oligotyping/lib/decomposer.py +++ b/Oligotyping/lib/decomposer.py @@ -16,7 +16,7 @@ import time import numpy import shutil -import cPickle +import pickle import logging import Oligotyping as o @@ -140,9 +140,9 @@ def check_apps(self): try: blast.LocalBLAST(None, None, None) except blast.ModuleVersionError: - raise utils.ConfigError, blast.version_error_text + raise utils.ConfigError(blast.version_error_text) except blast.ModuleBinaryError: - raise utils.ConfigError, blast.missing_binary_error_text + raise utils.ConfigError(blast.missing_binary_error_text) # FIXME: check R modules here. @@ -156,10 +156,10 @@ def check_dirs(self): try: os.makedirs(self.output_directory) except: - raise utils.ConfigError, "Output directory does not exist (attempt to create one failed as well): '%s'" % \ - (self.output_directory) + raise utils.ConfigError("Output directory does not exist (attempt to create one failed as well): '%s'" % \ + (self.output_directory)) if not os.access(self.output_directory, os.W_OK): - raise utils.ConfigError, "You do not have write permission for the output directory: '%s'" % self.output_directory + raise utils.ConfigError("You do not have write permission for the output directory: '%s'" % self.output_directory) self.tmp_directory = self.generate_output_destination('TMP', directory = True) self.nodes_directory = self.generate_output_destination('NODES', directory = True) @@ -169,18 +169,18 @@ def check_dirs(self): def check_input_files(self): if (not os.path.exists(self.alignment)) or (not os.access(self.alignment, os.R_OK)): - raise utils.ConfigError, "Alignment file is not accessible: '%s'" % self.alignment + raise utils.ConfigError("Alignment file is not accessible: '%s'" % self.alignment) if self.sample_mapping: if (not os.path.exists(self.sample_mapping)) or (not os.access(self.sample_mapping, os.R_OK)): - raise utils.ConfigError, "Sample mapping file is not accessible: '%s'" % self.sample_mapping + raise utils.ConfigError("Sample mapping file is not accessible: '%s'" % self.sample_mapping) samples = None if not self.skip_check_input_file: self.progress.new('Checking the input FASTA') samples = utils.check_input_alignment(self.alignment, self.sample_name_separator, self.progress) if not samples: - raise utils.ConfigError, 'Exiting.' + raise utils.ConfigError('Exiting.') self.progress.end() if self.sample_mapping: @@ -220,8 +220,8 @@ def _init_topology(self): self.root = self.topology.add_new_node('root', reads, root = True) if self.root.size < self.min_actual_abundance: - raise utils.ConfigError, "The number of reads in alignment file (%d) is smaller than --min-actual-abundance (%d)" % \ - (self.root.size, self.min_actual_abundance) + raise utils.ConfigError("The number of reads in alignment file (%d) is smaller than --min-actual-abundance (%d)" % \ + (self.root.size, self.min_actual_abundance)) self.node_ids_to_analyze = ['root'] @@ -420,9 +420,9 @@ def _generate_raw_topology(self): if node.reads[0].frequency < self.min_substantive_abundance: if node.node_id == 'root': self.progress.end() - raise utils.ConfigError, "Number of unique reads in the root node (%d) is less than the declared minimum (%d)." \ + raise utils.ConfigError("Number of unique reads in the root node (%d) is less than the declared minimum (%d)." \ % (node.reads[0].frequency, - self.min_substantive_abundance) + self.min_substantive_abundance)) else: # remove the node and store its content. @@ -536,7 +536,7 @@ def _generate_raw_topology(self): oligo = ''.join([read.seq[d] for d in node.discriminants]) - if new_nodes_dict.has_key(oligo): + if oligo in new_nodes_dict: new_nodes_dict[oligo]['reads'].append(read) else: new_node_id = self.topology.get_new_node_id() @@ -546,7 +546,7 @@ def _generate_raw_topology(self): # all reads in the parent node are analyzed. time to add spawned nodes into the topology. - oligos = new_nodes_dict.keys() + oligos = list(new_nodes_dict.keys()) len_oligos = len(oligos) for i in range(0, len_oligos): self.progress.update(p + ' / new nodes %d of %d ' % (i + 1, len_oligos)) @@ -678,7 +678,7 @@ def _refine_topology(self): abundant_reads_in_outlier_bin = [] - if self.topology.outliers.has_key('maximum_variation_allowed_reason'): + if 'maximum_variation_allowed_reason' in self.topology.outliers: abundant_reads_in_outlier_bin = [read_object for read_object in \ self.topology.outliers['maximum_variation_allowed_reason'] \ if read_object.frequency > self.min_substantive_abundance] @@ -1075,7 +1075,7 @@ def get_dict_entry_tmpl(): self.progress.update('Processing reads that were represented in results') for sample in self.samples_dict: - if not read_distribution_dict.has_key(sample): + if sample not in read_distribution_dict: read_distribution_dict[sample] = get_dict_entry_tmpl() read_distribution_dict[sample]['represented_reads'] = sum(self.samples_dict[sample].values()) @@ -1086,7 +1086,7 @@ def get_dict_entry_tmpl(): for read_id in read_object.ids: sample = utils.get_sample_name_from_defline(read_id, self.sample_name_separator) - if not read_distribution_dict.has_key(sample): + if sample not in read_distribution_dict: read_distribution_dict[sample] = get_dict_entry_tmpl() read_distribution_dict[sample][reason] += 1 @@ -1166,11 +1166,11 @@ def _generate_samples_dict(self): for read_id in read.ids: sample = utils.get_sample_name_from_defline(read_id, self.sample_name_separator) - if not self.samples_dict.has_key(sample): + if sample not in self.samples_dict: self.samples_dict[sample] = {} self.samples.append(sample) - if self.samples_dict[sample].has_key(node_id): + if node_id in self.samples_dict[sample]: self.samples_dict[sample][node_id] += 1 else: self.samples_dict[sample][node_id] = 1 @@ -1238,7 +1238,7 @@ def _store_topology_dict(self): self.progress.end() topology_dict_file_path = self.generate_output_destination('TOPOLOGY-LIGHT.cPickle') - cPickle.dump(topology_dict, open(topology_dict_file_path, 'w')) + pickle.dump(topology_dict, open(topology_dict_file_path, 'w')) self.run.info('topology_light_dict', topology_dict_file_path) @@ -1347,7 +1347,7 @@ def _generate_html_output(self): from Oligotyping.utils.html.error import HTMLError try: from Oligotyping.utils.html.for_decomposition import generate_html_output - except HTMLError, e: + except HTMLError as e: sys.stdout.write('\n\n\t%s\n\n' % e) sys.exit() @@ -1383,7 +1383,7 @@ def _generate_default_figures(self): figures_dict = generate_default_figures(self) figures_dict_file_path = self.generate_output_destination("FIGURES.cPickle") - cPickle.dump(figures_dict, open(figures_dict_file_path, 'w')) + pickle.dump(figures_dict, open(figures_dict_file_path, 'w')) self.progress.end() self.run.info('figures_dict_file_path', figures_dict_file_path) @@ -1397,7 +1397,7 @@ def _generate_exclusive_figures(self): exclusive_figures_dict = generate_exclusive_figures(self) exclusive_figures_dict_file_path = self.generate_output_destination("EXCLUSIVE-FIGURES.cPickle") - cPickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w')) + pickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w')) self.progress.end() self.run.info('exclusive_figures_dict_file_path', exclusive_figures_dict_file_path) diff --git a/Oligotyping/lib/entropy.py b/Oligotyping/lib/entropy.py index f264fe7..20dcbaf 100644 --- a/Oligotyping/lib/entropy.py +++ b/Oligotyping/lib/entropy.py @@ -79,11 +79,11 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued progress.new('Processing the Alignment') # processing the alignment file.. - while alignment.next(): + while next(alignment): # check the alignment lengths along the way: if previous_alignment_length: if previous_alignment_length != len(alignment.seq): - raise EntropyError, "Not all reads have the same length." + raise EntropyError("Not all reads have the same length.") # print out process info if alignment.pos % 10000 == 0: @@ -96,7 +96,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued try: frequency = freq_from_defline(alignment.id) except IndexError: - raise EntropyError, "Reads declared as unique, but they do not have proper deflines. See help for --uniqued." + raise EntropyError("Reads declared as unique, but they do not have proper deflines. See help for --uniqued.") for i in range(0, frequency): lines.append(alignment.seq) @@ -124,7 +124,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued if weighted: if not qual_stats_dict: - raise EntropyError, "Weighted entropy is selected, but no qual stats are provided" + raise EntropyError("Weighted entropy is selected, but no qual stats are provided") e = entropy(column, l_qual = qual_stats_dict[position], amino_acid_sequences = amino_acid_sequences) else: e = entropy(column, amino_acid_sequences = amino_acid_sequences) @@ -164,7 +164,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued def quick_entropy(l, amino_acid_sequences = False): if len(set([len(x) for x in l])) != 1: - raise EntropyError, "Not all vectors have the same length." + raise EntropyError("Not all vectors have the same length.") entropy_tpls = [] for position in range(0, len(l[0])): diff --git a/Oligotyping/lib/fastalib.py b/Oligotyping/lib/fastalib.py index 8fe8666..228ffaa 100644 --- a/Oligotyping/lib/fastalib.py +++ b/Oligotyping/lib/fastalib.py @@ -36,7 +36,7 @@ def write_seq(self, seq, split = True): self.output_file_obj.write('%s\n' % seq) def split(self, sequence, piece_length = 80): - ticks = range(0, len(sequence), piece_length) + [len(sequence)] + ticks = list(range(0, len(sequence), piece_length)) + [len(sequence)] return '\n'.join([sequence[ticks[x]:ticks[x + 1]] for x in range(0, len(ticks) - 1)]) def close(self): @@ -50,7 +50,7 @@ def __init__(self, f_name): self.fasta = SequenceSource(f_name) - while self.fasta.next(): + while next(self.fasta): if self.fasta.pos % 1000 == 0 or self.fasta.pos == 1: sys.stderr.write('\r[fastalib] Reading FASTA into memory: %s' % (self.fasta.pos)) sys.stderr.flush() @@ -94,7 +94,7 @@ def __init__(self, fasta_file_path, lazy_init = True, unique = False, allow_mixe def init_unique_hash(self): while self.next_regular(): - hash = hashlib.sha1(self.seq.upper()).hexdigest() + hash = hashlib.sha1(self.seq.upper().encode('utf-8')).hexdigest() if hash in self.unique_hash_dict: self.unique_hash_dict[hash]['ids'].append(self.id) self.unique_hash_dict[hash]['count'] += 1 @@ -111,7 +111,7 @@ def init_unique_hash(self): self.total_unique = len(self.unique_hash_dict) self.reset() - def next(self): + def __next__(self): if self.unique: return self.next_unique() else: @@ -159,7 +159,7 @@ def next_regular(self): def get_seq_by_read_id(self, read_id): self.reset() - while self.next(): + while next(self): if self.id == read_id: return self.seq @@ -184,7 +184,7 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len self.reset() - while self.next(): + while next(self): if self.pos % 10000 == 0 or self.pos == 1: sys.stderr.write('\r[fastalib] Reading: %s' % (self.pos)) sys.stderr.flush() @@ -213,7 +213,7 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len plt.subplots_adjust(left=0.05, bottom = 0.03, top = 0.95, right = 0.98) plt.plot(seq_len_distribution, color = 'black', alpha = 0.3) - plt.fill_between(range(0, max_seq_len + 1), seq_len_distribution, y2 = 0, color = 'black', alpha = 0.15) + plt.fill_between(list(range(0, max_seq_len + 1)), seq_len_distribution, y2 = 0, color = 'black', alpha = 0.15) plt.ylabel('number of sequences') plt.xlabel('sequence length') @@ -223,8 +223,8 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len if ytickstep == None: ytickstep = max(seq_len_distribution) / 20 or 1 - plt.xticks(range(xtickstep, max_seq_len + 1, xtickstep), rotation=90, size='xx-small') - plt.yticks(range(0, max(seq_len_distribution) + 1, ytickstep), + plt.xticks(list(range(xtickstep, max_seq_len + 1, xtickstep)), rotation=90, size='xx-small') + plt.yticks(list(range(0, max(seq_len_distribution) + 1, ytickstep)), [y for y in range(0, max(seq_len_distribution) + 1, ytickstep)], size='xx-small') plt.xlim(xmin = 0, xmax = max_seq_len) @@ -281,7 +281,7 @@ def __init__(self, quals_file_path, lazy_init = True): self.reset() - def next(self): + def __next__(self): self.id = self.file_pointer.readline()[1:].strip() self.quals = None self.quals_int = None diff --git a/Oligotyping/lib/oligotyping.py b/Oligotyping/lib/oligotyping.py index 9ddfd78..f53907d 100644 --- a/Oligotyping/lib/oligotyping.py +++ b/Oligotyping/lib/oligotyping.py @@ -15,7 +15,7 @@ import sys import copy import shutil -import cPickle +import pickle import logging import itertools import math @@ -32,6 +32,7 @@ from Oligotyping.visualization.oligotype_sets_distribution import vis_oligotype_sets_distribution from Oligotyping.visualization.oligotype_distribution_stack_bar import oligotype_distribution_stack_bar from Oligotyping.visualization.oligotype_distribution_across_samples import oligotype_distribution_across_samples +from functools import reduce class Oligotyping: @@ -52,7 +53,7 @@ def __init__(self, args = None): self.project = None self.output_directory = None self.sample_name_separator = '_' - self.limit_representative_sequences = sys.maxint + self.limit_representative_sequences = sys.maxsize self.quick = False self.no_figures = False self.no_display = False @@ -89,7 +90,7 @@ def __init__(self, args = None): self.project = args.project or os.path.basename(args.alignment).split('.')[0] self.output_directory = args.output_directory self.sample_name_separator = args.sample_name_separator - self.limit_representative_sequences = args.limit_representative_sequences or sys.maxint + self.limit_representative_sequences = args.limit_representative_sequences or sys.maxsize self.quick = args.quick self.no_figures = args.no_figures self.no_display = args.no_display @@ -140,19 +141,19 @@ def check_apps(self): try: blast.LocalBLAST(None, None, None) except blast.ModuleVersionError: - raise utils.ConfigError, blast.version_error_text + raise utils.ConfigError(blast.version_error_text) except blast.ModuleBinaryError: - raise utils.ConfigError, blast.missing_binary_error_text + raise utils.ConfigError(blast.missing_binary_error_text) # FIXME: check R modules here. def check_dirs(self): if self.number_of_auto_components != None and self.selected_components != None: - raise utils.ConfigError, "You either have to declare 'auto components' (-c) or 'selected components' (-C)." + raise utils.ConfigError("You either have to declare 'auto components' (-c) or 'selected components' (-C).") if self.number_of_auto_components == None and self.selected_components == None: - raise utils.ConfigError, "Both 'auto components' (-c), and 'selected components' (-C) were declared." + raise utils.ConfigError("Both 'auto components' (-c), and 'selected components' (-C) were declared.") # check output associated stuff if not self.output_directory: @@ -162,10 +163,10 @@ def check_dirs(self): try: os.makedirs(self.output_directory) except: - raise utils.ConfigError, "Output directory does not exist (attempt to create one failed as well): '%s'" % \ - (self.output_directory) + raise utils.ConfigError("Output directory does not exist (attempt to create one failed as well): '%s'" % \ + (self.output_directory)) if not os.access(self.output_directory, os.W_OK): - raise utils.ConfigError, "You do not have write permission for the output directory: '%s'" % self.output_directory + raise utils.ConfigError("You do not have write permission for the output directory: '%s'" % self.output_directory) self.tmp_directory = self.generate_output_destination('TMP', directory = True) self.figures_directory = self.generate_output_destination('FIGURES', directory = True) @@ -173,25 +174,25 @@ def check_dirs(self): def check_input(self): if (not os.path.exists(self.alignment)) or (not os.access(self.alignment, os.R_OK)): - raise utils.ConfigError, "Alignment file is not accessible: '%s'" % self.alignment + raise utils.ConfigError("Alignment file is not accessible: '%s'" % self.alignment) if (not os.path.exists(self.entropy)) or (not os.access(self.entropy, os.R_OK)): - raise utils.ConfigError, "Entropy file is not accessible: '%s'" % self.entropy + raise utils.ConfigError("Entropy file is not accessible: '%s'" % self.entropy) if self.sample_mapping: if (not os.path.exists(self.sample_mapping)) or (not os.access(self.sample_mapping, os.R_OK)): - raise utils.ConfigError, "Sample mapping file is not accessible: '%s'" % self.sample_mapping + raise utils.ConfigError("Sample mapping file is not accessible: '%s'" % self.sample_mapping) if self.colors_list_file: if not os.path.exists(self.colors_list_file): - raise utils.ConfigError, "Colors list file does not exist: '%s'" % self.colors_list_file + raise utils.ConfigError("Colors list file does not exist: '%s'" % self.colors_list_file) first_characters = list(set([c.strip()[0] for c in open(self.colors_list_file)])) if len(first_characters) != 1 or first_characters[0] != '#': - raise utils.ConfigError, "Colors list file does not seem to be correctly formatted" + raise utils.ConfigError("Colors list file does not seem to be correctly formatted") # set the alignment lentgh (it will be necessary to check certain params) alignment = u.SequenceSource(self.alignment) - alignment.next() + next(alignment) self.alignment_length = len(alignment.seq) alignment.close() @@ -203,7 +204,7 @@ def check_input(self): self.progress.new('Checking the input FASTA') samples = utils.check_input_alignment(self.alignment, self.sample_name_separator, self.progress) if not samples: - raise utils.ConfigError, 'Exiting.' + raise utils.ConfigError('Exiting.') self.progress.end() if self.sample_mapping: @@ -218,44 +219,44 @@ def check_params(self): try: self.selected_components = [int(c) for c in self.selected_components.split(',')] except: - raise utils.ConfigError, "Selected components should be comma separated integer values (such as '4,8,15,25,47')." + raise utils.ConfigError("Selected components should be comma separated integer values (such as '4,8,15,25,47').") if max(self.selected_components) >= self.alignment_length: - raise utils.ConfigError, "There is at least one component ('%d') that is bigger than the alignment length."\ - % max(self.selected_components) + raise utils.ConfigError("There is at least one component ('%d') that is bigger than the alignment length."\ + % max(self.selected_components)) if min(self.selected_components) < 0: - raise utils.ConfigError, "Selected components can't be smaller than 0" + raise utils.ConfigError("Selected components can't be smaller than 0") components_declared_more_than_once = [c[0] for c in itertools.groupby(sorted(self.selected_components))\ if len(list(c[1])) > 1] N = len(components_declared_more_than_once) if N: - raise utils.ConfigError, "You declared %s component%s (%s) more than once."\ + raise utils.ConfigError("You declared %s component%s (%s) more than once."\ % ('a' if N == 1 else '%s' % str(N), 's' if N > 1 else '', - ', '.join([str(c) for c in components_declared_more_than_once])) + ', '.join([str(c) for c in components_declared_more_than_once]))) if self.min_base_quality: try: self.min_base_quality = int(self.min_base_quality) assert(self.min_base_quality >= 0 and self.min_base_quality <= 40) except: - raise utils.ConfigError, "Minimum base quality must be an integer between 0 and 40." + raise utils.ConfigError("Minimum base quality must be an integer between 0 and 40.") if self.limit_oligotypes_to: self.limit_oligotypes_to = [o.strip().upper() for o in self.limit_oligotypes_to.split(',')] if len(self.limit_oligotypes_to) == 1: - raise utils.ConfigError, "There must be more than one oligotype for --limit-oligotypes parameter." + raise utils.ConfigError("There must be more than one oligotype for --limit-oligotypes parameter.") if len([n for n in ''.join(self.limit_oligotypes_to) if n not in ['A', 'T', 'C', 'G', '-']]): - raise utils.ConfigError, "Oligotypes defined by --limit-oligotypes parameter seems to have ambiguous characters." + raise utils.ConfigError("Oligotypes defined by --limit-oligotypes parameter seems to have ambiguous characters.") if self.exclude_oligotypes: self.exclude_oligotypes = [o.strip().upper() for o in self.exclude_oligotypes.split(',')] if len([n for n in ''.join(self.exclude_oligotypes) if n not in ['A', 'T', 'C', 'G', '-']]): - raise utils.ConfigError, "Oligotypes defined by --exclude-oligotypes parameter seems to have ambiguous characters." + raise utils.ConfigError("Oligotypes defined by --exclude-oligotypes parameter seems to have ambiguous characters.") return True @@ -450,14 +451,14 @@ def _construct_samples_dict(self): num_reads_eliminated_due_to_min_base_quality = 0 self.fasta.reset() - while self.fasta.next(): + while next(self.fasta): if self.fasta.pos % 1000 == 0: self.progress.update('Analyzing: %s' \ % (utils.pretty_print(self.fasta.pos))) sample = utils.get_sample_name_from_defline(self.fasta.id, self.sample_name_separator) - if not self.samples_dict.has_key(sample): + if sample not in self.samples_dict: self.samples_dict[sample] = {} self.samples.append(sample) @@ -483,7 +484,7 @@ def _construct_samples_dict(self): # checking the base qualities oligo = ''.join(self.fasta.seq[o] for o in self.bases_of_interest_locs) - if self.samples_dict[sample].has_key(oligo): + if oligo in self.samples_dict[sample]: self.samples_dict[sample][oligo] += 1 else: self.samples_dict[sample][oligo] = 1 @@ -495,16 +496,16 @@ def _construct_samples_dict(self): if self.quals_dict: self.run.info('num_reads_eliminated_due_to_min_base_quality', num_reads_eliminated_due_to_min_base_quality) if self.fasta.total_seq == num_reads_eliminated_due_to_min_base_quality: - raise utils.ConfigError, "All reads were eliminated due to --min-base-quality (%d) rule" % self.min_base_quality + raise utils.ConfigError("All reads were eliminated due to --min-base-quality (%d) rule" % self.min_base_quality) def _register_removal(self, oligo, reason = 'unknown'): - if not self.excluded_read_ids_tracker.has_key(reason): + if reason not in self.excluded_read_ids_tracker: self.excluded_read_ids_tracker[reason] = {} for sample in self.samples: - if self.samples_dict[sample].has_key(oligo): - if not self.excluded_read_ids_tracker[reason].has_key(sample): + if oligo in self.samples_dict[sample]: + if sample not in self.excluded_read_ids_tracker[reason]: self.excluded_read_ids_tracker[reason][sample] = self.samples_dict[sample][oligo] else: self.excluded_read_ids_tracker[reason][sample] += self.samples_dict[sample][oligo] @@ -569,7 +570,7 @@ def _contrive_abundant_oligos(self): # for each sample. computing it here once is more optimized. sample_sums = {} SUM = lambda sample: sum([self.samples_dict[sample][o] for o in non_singleton_oligos \ - if self.samples_dict[sample].has_key(o)]) + if o in self.samples_dict[sample]]) for sample in self.samples: sample_sums[sample] = SUM(sample) @@ -583,7 +584,7 @@ def _contrive_abundant_oligos(self): percent_abundances = [] for sample in self.samples: - if self.samples_dict[sample].has_key(oligo): + if oligo in self.samples_dict[sample]: percent_abundances.append((self.samples_dict[sample][oligo] * 100.0 / sample_sums[sample], self.samples_dict[sample][oligo], sample_sums[sample], @@ -625,7 +626,7 @@ def _contrive_abundant_oligos(self): self.progress.update(utils.P(i, len(non_singleton_oligos))) oligo_actual_abundance = sum([self.samples_dict[sample][oligo] for sample in self.samples_dict\ - if self.samples_dict[sample].has_key(oligo)]) + if oligo in self.samples_dict[sample]]) if self.min_actual_abundance > oligo_actual_abundance: oligos_for_removal.append(oligo) @@ -695,8 +696,8 @@ def _contrive_abundant_oligos(self): self.run.info('num_oligos_after_l_elim', len(self.abundant_oligos)) if len(self.abundant_oligos) == 0: - raise utils.ConfigError, "\n\n\t--limit-oligotypes parameter eliminated all oligotypes.\ - \n\tPlease make sure --limit-oligotypes matches with actual oligotypes.\n\n\tQuiting.\n" + raise utils.ConfigError("\n\n\t--limit-oligotypes parameter eliminated all oligotypes.\ + \n\tPlease make sure --limit-oligotypes matches with actual oligotypes.\n\n\tQuiting.\n") # if 'exclude_oligotypes' is defined, remove them from analysis if they are present if self.exclude_oligotypes: @@ -711,12 +712,12 @@ def _contrive_abundant_oligos(self): # storing final counts for oligo in self.abundant_oligos: self.final_oligo_counts_dict[oligo] = sum([self.samples_dict[sample][oligo] for sample in self.samples_dict\ - if self.samples_dict[sample].has_key(oligo)]) + if oligo in self.samples_dict[sample]]) # in case no oligos left if not len(self.abundant_oligos): - raise utils.ConfigError, "\n\n\tAll oligotypes were discarded during the noise removal step.\ - \n\tPlease check your parameters.\n\n\tQuiting.\n" + raise utils.ConfigError("\n\n\tAll oligotypes were discarded during the noise removal step.\ + \n\tPlease check your parameters.\n\n\tQuiting.\n") # if there is only one oligotype left, skip basic analyses if len(self.abundant_oligos) == 1: @@ -892,7 +893,7 @@ def get_dict_entry_tmpl(): self.progress.update('Processing reads that were represented in results') for sample in self.samples_dict: - if not read_distribution_dict.has_key(sample): + if sample not in read_distribution_dict: read_distribution_dict[sample] = get_dict_entry_tmpl() read_distribution_dict[sample]['represented_reads'] = sum(self.samples_dict[sample].values()) @@ -900,7 +901,7 @@ def get_dict_entry_tmpl(): for reason in self.excluded_read_ids_tracker: self.progress.update('Processing excluded oligos (%s)' % (reason)) for sample in self.excluded_read_ids_tracker[reason]: - if not read_distribution_dict.has_key(sample): + if sample not in read_distribution_dict: read_distribution_dict[sample] = get_dict_entry_tmpl() read_distribution_dict[sample][reason] = self.excluded_read_ids_tracker[reason][sample] @@ -921,9 +922,9 @@ def _generate_random_colors(self): # it means user provided a list of colors to be used for oligotypes colors = [c.strip() for c in open(self.colors_list_file).readlines()] if len(colors) < len(self.abundant_oligos): - raise utils.ConfigError, "Number of colors defined in colors file (%d),\ + raise utils.ConfigError("Number of colors defined in colors file (%d),\ is smaller than the number of abundant oligotypes (%d)" % \ - (len(colors), len(self.abundant_oligos)) + (len(colors), len(self.abundant_oligos))) colors_dict = {} for i in range(0, len(self.abundant_oligos)): colors_dict[self.abundant_oligos[i]] = colors[i] @@ -959,7 +960,7 @@ def _agglomerate_oligos_based_on_cosine_similarity(self): self.progress.new('Generating data objects for newly generated oligotype sets') self.progress.update('New Colors') - self.oligotype_set_ids = range(0, len(self.oligotype_sets)) + self.oligotype_set_ids = list(range(0, len(self.oligotype_sets))) self.colors_dict_for_oligotype_sets = {} for set_id in self.oligotype_set_ids: @@ -996,7 +997,7 @@ def _generate_MATRIX_files_for_oligotype_sets(self): counts = [] percents = [] for sample in self.samples: - if d[sample].has_key(oligotype_set_id): + if oligotype_set_id in d[sample]: counts.append(d[sample][oligotype_set_id]) percents.append(d[sample][oligotype_set_id] * 100.0 / sum(d[sample].values())) else: @@ -1031,10 +1032,10 @@ def _get_unique_sequence_distributions_within_abundant_oligos(self): # listed in this dictionary MAY NOT be the final oligos once the noise # filtering step has ended. - temp_unique_distributions = dict(zip(self.abundant_oligos, [{} for x in range(0, len(self.abundant_oligos))])) + temp_unique_distributions = dict(list(zip(self.abundant_oligos, [{} for x in range(0, len(self.abundant_oligos))]))) self.fasta.reset() - while self.fasta.next(): + while next(self.fasta): if self.progress and self.fasta.pos % 1000 == 0: self.progress.update('Computing sequence distributions: %.2f%%' \ % (self.fasta.pos * 100.0 / self.fasta.total_seq)) @@ -1046,7 +1047,7 @@ def _get_unique_sequence_distributions_within_abundant_oligos(self): temp_unique_distributions[oligo][self.fasta.seq] = 1 for oligo in self.abundant_oligos: - temp_unique_distributions[oligo] = sorted(temp_unique_distributions[oligo].values(), reverse = True) + temp_unique_distributions[oligo] = sorted(list(temp_unique_distributions[oligo].values()), reverse = True) return temp_unique_distributions @@ -1073,7 +1074,7 @@ def _generate_representative_sequences(self): unique_files_dict[oligo] = {'file': open(fasta_file_path + '_unique', 'w'), 'path': fasta_file_path + '_unique'} except IOError: - print '\n\t'.join(['', + print('\n\t'.join(['', 'WARNING: Oligotyping process has reached the maximum number of open files', 'limit defined by the operating system. There are "%d" oligotypes to be'\ % len(self.abundant_oligos), @@ -1085,15 +1086,15 @@ def _generate_representative_sequences(self): '', 'Until this issue is solved, representative sequences are not going to be', 'computed.', - '']) + ''])) # clean after yourself. close every file, delete directory, exit. - [map(lambda x: x.close(), [g[o]['file'] for o in g]) for g in [fasta_files_dict, unique_files_dict]] + [[x.close() for x in [g[o]['file'] for o in g]] for g in [fasta_files_dict, unique_files_dict]] shutil.rmtree(output_directory_for_reps) sys.exit() self.fasta.reset() - while self.fasta.next(): + while next(self.fasta): if self.fasta.pos % 1000 == 0: self.progress.update('Generating Individual FASTA Files: %.2f%%' \ % (self.fasta.pos * 100.0 / self.fasta.total_seq)) @@ -1120,7 +1121,7 @@ def _generate_representative_sequences(self): # is distributed among samples: distribution_among_samples = {} - fasta.next() + next(fasta) # this is the first read in the unique reads list, which is the most abundant unique sequence # for the oligotype. so we are going to store it in a dict to generate # representative sequences FASTA file: @@ -1131,7 +1132,7 @@ def _generate_representative_sequences(self): # FIXME: I am going to come back to this and fix it at some point. Storing 'distribution_among_samples' # information in separate cPickle files per oligo is not the smartest thing to do. self.final_oligo_unique_distribution_dict[oligo] = [] - while fasta.next() and fasta.pos <= self.limit_representative_sequences: + while next(fasta) and fasta.pos <= self.limit_representative_sequences: unique_files_dict[oligo]['file'].write('>%s_%d|freq:%d\n'\ % (oligo, fasta.pos, @@ -1144,10 +1145,10 @@ def _generate_representative_sequences(self): for sample_id in fasta.ids: sample_name = utils.get_sample_name_from_defline(sample_id, self.sample_name_separator) - if not distribution_among_samples.has_key(sample_name): + if sample_name not in distribution_among_samples: distribution_among_samples[sample_name] = {} d = distribution_among_samples[sample_name] - if not d.has_key(fasta.pos): + if fasta.pos not in d: d[fasta.pos] = 1 else: d[fasta.pos] += 1 @@ -1157,7 +1158,7 @@ def _generate_representative_sequences(self): unique_fasta_path = unique_files_dict[oligo]['path'] distribution_among_samples_dict_path = unique_fasta_path + '_distribution.cPickle' - cPickle.dump(distribution_among_samples, open(distribution_among_samples_dict_path, 'w')) + pickle.dump(distribution_among_samples, open(distribution_among_samples_dict_path, 'wb')) self.progress.end() @@ -1249,7 +1250,7 @@ def _perform_local_BLAST_search_for_oligo_representative(self, unique_files_dict self.progress.update('Storing representative sequences for "%s" ...' % oligo) unique_fasta_path = unique_files_dict[oligo]['path'] unique_fasta = u.SequenceSource(unique_fasta_path) - unique_fasta.next() + next(unique_fasta) representative_fasta_entries.append((oligo, unique_fasta.seq),) unique_fasta.close() utils.append_reads_to_FASTA(representative_fasta_entries, query) @@ -1284,10 +1285,10 @@ def _perform_local_BLAST_search_for_oligo_representative(self, unique_files_dict for oligo in self.abundant_oligos: unique_fasta_path = unique_files_dict[oligo]['path'] fancy_blast_result_output_path = unique_fasta_path + '_BLAST.cPickle' - if fancy_results_dict.has_key(oligo): - cPickle.dump(fancy_results_dict[oligo], open(fancy_blast_result_output_path, 'w')) + if oligo in fancy_results_dict: + pickle.dump(fancy_results_dict[oligo], open(fancy_blast_result_output_path, 'w')) else: - cPickle.dump([], open(fancy_blast_result_output_path, 'w')) + pickle.dump([], open(fancy_blast_result_output_path, 'w')) def _perform_remote_BLAST_search_for_oligo_representative(self, oligo, unique_files_dict): @@ -1296,7 +1297,7 @@ def _perform_remote_BLAST_search_for_oligo_representative(self, oligo, unique_fi unique_fasta_path = unique_files_dict[oligo]['path'] unique_fasta = u.SequenceSource(unique_fasta_path) - unique_fasta.next() + next(unique_fasta) blast_output_xml = unique_fasta_path + '_BLAST.xml' blast_output_dict = unique_fasta_path + '_BLAST.cPickle' @@ -1307,7 +1308,7 @@ def blast_search_wrapper(seq, xml_path, pickle_path): try: results = r.search(seq, xml_path) results_list = r.get_fancy_results_list(results) - cPickle.dump(results_list, open(pickle_path, 'w')) + pickle.dump(results_list, open(pickle_path, 'w')) return True except: return False @@ -1348,7 +1349,7 @@ def _generate_entropy_figure_for_abundant_oligotype(self, oligo, unique_fasta_pa for i in range(0, self.alignment_length): color_per_column[i] = color_shade_dict[entropy_values_per_column[i]] - cPickle.dump(color_per_column, open(color_per_column_path, 'w')) + pickle.dump(color_per_column, open(color_per_column_path, 'wb')) def _generate_oligos_across_samples_figure(self): @@ -1393,7 +1394,7 @@ def _generate_default_figures(self): figures_dict = generate_default_figures(self) figures_dict_file_path = self.generate_output_destination("FIGURES.cPickle") - cPickle.dump(figures_dict, open(figures_dict_file_path, 'w')) + pickle.dump(figures_dict, open(figures_dict_file_path, 'wb')) self.progress.end() self.run.info('figures_dict_file_path', figures_dict_file_path) @@ -1407,7 +1408,7 @@ def _generate_exclusive_figures(self): exclusive_figures_dict = generate_exclusive_figures(self) exclusive_figures_dict_file_path = self.generate_output_destination("EXCLUSIVE-FIGURES.cPickle") - cPickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w')) + pickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w')) self.progress.end() self.run.info('exclusive_figures_dict_file_path', exclusive_figures_dict_file_path) @@ -1440,7 +1441,7 @@ def _generate_html_output(self): from Oligotyping.utils.html.error import HTMLError try: from Oligotyping.utils.html.for_oligotyping import generate_html_output - except HTMLError, e: + except HTMLError as e: sys.stdout.write('\n\n\t%s\n\n' % e) sys.exit() diff --git a/Oligotyping/lib/shared.py b/Oligotyping/lib/shared.py index 7dbb7be..9f521a9 100644 --- a/Oligotyping/lib/shared.py +++ b/Oligotyping/lib/shared.py @@ -116,11 +116,11 @@ def generate_exclusive_figures(_object): for category in sample_mapping_dict: exclusive_figures_dict[category] = {} - samples = sample_mapping_dict[category].keys() + samples = list(sample_mapping_dict[category].keys()) # double filter: first makes sure sample was not removed from the analysis due to losing all its reads during the # refinement, second makes sure that sample was actually mapped to something in the sample mapping file. - samples = filter(lambda s: sample_mapping_dict[category][s], filter(lambda s: s in _object.samples, samples)) + samples = [s for s in [s for s in samples if s in _object.samples] if sample_mapping_dict[category][s]] samples.sort() mapping_file_path = get_temporary_file_name('%s-' % category, '-mapping.txt', _object.tmp_directory) diff --git a/Oligotyping/lib/topology.py b/Oligotyping/lib/topology.py index 3766ad2..d4ef932 100644 --- a/Oligotyping/lib/topology.py +++ b/Oligotyping/lib/topology.py @@ -54,7 +54,7 @@ def get_new_node_id(self): def add_new_node(self, node_id, unique_read_objects_list, root = False, parent_id = None): if not self.nodes_output_directory: - raise ConfigError, "Nodes output directory has to be declared before adding new nodes" + raise ConfigError("Nodes output directory has to be declared before adding new nodes") node = Node(node_id, self.nodes_output_directory) @@ -103,15 +103,15 @@ def get_node(self, node_id): def print_node(self, node_id): node = self.nodes[node_id] - print - print 'Node "%s"' % node - print '---------------------------------' - print 'Alive : %s' % (not node.killed) - print 'Dirty : %s' % node.dirty - print 'Size : %d' % node.size - print 'Parent : %s' % node.parent - print 'Children :', node.children - print + print() + print('Node "%s"' % node) + print('---------------------------------') + print('Alive : %s' % (not node.killed)) + print('Dirty : %s' % node.dirty) + print('Size : %d' % node.size) + print('Parent : %s' % node.parent) + print('Children :', node.children) + print() def get_final_count(self): diff --git a/Oligotyping/utils/blast.py b/Oligotyping/utils/blast.py index 20dcd34..3cc47cb 100644 --- a/Oligotyping/utils/blast.py +++ b/Oligotyping/utils/blast.py @@ -12,7 +12,7 @@ import os import time import copy -import cStringIO +import io import Oligotyping.lib.fastalib as u import Oligotyping.lib.b6lib as b6lib @@ -97,7 +97,7 @@ def __str__(self): from Bio.Blast import NCBIWWW from Bio.Blast import NCBIXML except: - raise MissingModuleError, biopython_error_text + raise MissingModuleError(biopython_error_text) class LocalBLAST: @@ -140,11 +140,11 @@ def get_cmd_line_params_dict(self): def binary_check(self): if (not is_program_exist(self.binary)) or (not is_program_exist(self.makeblastdb)): - raise ModuleBinaryError, missing_binary_error_text + raise ModuleBinaryError(missing_binary_error_text) def version_check(self): - version_text = check_command_output('%(binary)s -version' % self.get_cmd_line_params_dict()) + version_text = check_command_output('%(binary)s -version' % self.get_cmd_line_params_dict()).decode("utf-8") # we expect to see an output like this: # # blastn: 2.2.26+ @@ -153,7 +153,7 @@ def version_check(self): major_blastn_version = version_text.strip().split()[1].split('.')[0] if major_blastn_version != '2': - raise ModuleVersionError, version_error_text + raise ModuleVersionError(version_error_text) def search_parallel(self, num_processes, num_reads_per_process = 2000, keep_parts = False): @@ -222,7 +222,7 @@ def get_results_dict(self, mismatches = None, gaps = None, min_identity = None, b6 = b6lib.B6Source(self.output) ids_with_hits = set() - while b6.next(): + while next(b6): if b6.entry.query_id == b6.entry.subject_id: continue @@ -292,8 +292,8 @@ def get_fancy_results_dict(self, max_per_query = 10, defline_white_space_mask = query_counts = {} fancy_results_dict = {} - while b6.next(): - if not query_counts.has_key(b6.entry.query_id): + while next(b6): + if b6.entry.query_id not in query_counts: query_counts[b6.entry.query_id] = 1 if query_counts[b6.entry.query_id] - 1 == max_per_query: @@ -301,7 +301,7 @@ def get_fancy_results_dict(self, max_per_query = 10, defline_white_space_mask = else: query_counts[b6.entry.query_id] += 1 - if not fancy_results_dict.has_key(b6.entry.query_id): + if b6.entry.query_id not in fancy_results_dict: fancy_results_dict[b6.entry.query_id] = [] query_seq = input_fasta.get_seq_by_read_id(b6.entry.query_id).replace('-', '') @@ -344,7 +344,7 @@ def search(self, sequence, output_file = None): if output_file: open(output_file, "w").write(result) - return cStringIO.StringIO(result) + return io.StringIO(result) def get_fancy_results_list(self, blast_results, num_results = 20): @@ -386,5 +386,5 @@ def get_fancy_results_list(self, blast_results, num_results = 20): try: u = LocalBLAST(None, None) except ModuleVersionError: - raise ModuleVersionError, version_error_text + raise ModuleVersionError(version_error_text) diff --git a/Oligotyping/utils/cosine_similarity.py b/Oligotyping/utils/cosine_similarity.py index e84c0cd..0965cef 100755 --- a/Oligotyping/utils/cosine_similarity.py +++ b/Oligotyping/utils/cosine_similarity.py @@ -56,7 +56,7 @@ def get_oligotype_sets_greedy(oligos, vectors, cosine_similarity_threshold, outp vector = vectors[oligo] shortest_distance_set_ID = None - shortest_distance = sys.maxint + shortest_distance = sys.maxsize for set_representative in set_representatives: distance = cosine_distance(set_representatives[set_representative], vector) @@ -93,16 +93,16 @@ def get_oligotype_sets(oligos, vectors, cosine_similarity_threshold, output_file distances = {} for i in range(0, len(oligos)): - if not distances.has_key(oligos[i]): + if oligos[i] not in distances: distances[oligos[i]] = {} for j in range(i, len(oligos)): - if not distances.has_key(oligos[j]): + if oligos[j] not in distances: distances[oligos[j]] = {} distances[oligos[i]][oligos[j]] = cosine_distance(vectors[oligos[i]], vectors[oligos[j]]) distances[oligos[j]][oligos[i]] = cosine_distance(vectors[oligos[i]], vectors[oligos[j]]) - ids = range(0, len(oligos)) + ids = list(range(0, len(oligos))) while 1: if not len(ids): break @@ -161,11 +161,11 @@ def get_samples(): samples = get_samples() - print '\n\t%d oligotypes split into %d partitions based on cosine similarity of %f. Here how they were distributed:\n'\ - % (len(oligos), len(partitions), args.cosine_similarity_threshold) + print('\n\t%d oligotypes split into %d partitions based on cosine similarity of %f. Here how they were distributed:\n'\ + % (len(oligos), len(partitions), args.cosine_similarity_threshold)) for partition in partitions: - print ' - %s\n' % (', '.join(partition)) + print(' - %s\n' % (', '.join(partition))) vis_oligotype_sets_distribution(partitions, vectors, samples, legend = True,\ project_title = 'Cosine Similarity Threshold %.4f' % args.cosine_similarity_threshold) diff --git a/Oligotyping/utils/html/for_decomposition.py b/Oligotyping/utils/html/for_decomposition.py index 37e4c18..4e3b4b3 100644 --- a/Oligotyping/utils/html/for_decomposition.py +++ b/Oligotyping/utils/html/for_decomposition.py @@ -13,13 +13,13 @@ import os import copy import shutil -import cPickle +import pickle from Oligotyping.utils.constants import pretty_names from Oligotyping.utils.utils import pretty_print from Oligotyping.utils.utils import get_samples_dict_from_environment_file from Oligotyping.utils.random_colors import get_list_of_colors -from error import HTMLError +from .error import HTMLError try: @@ -59,7 +59,7 @@ from django.template.loader import render_to_string from django.template.defaultfilters import register except ImportError: - raise HTMLError, 'You need to have Django module (http://djangoproject.com) installed on your system to generate HTML output.' + raise HTMLError('You need to have Django module (http://djangoproject.com) installed on your system to generate HTML output.') @register.filter(name='diffs') def diffs(l, index): @@ -99,7 +99,7 @@ def get_blast_hits(d, max_num = 8): ret_line = '
BLAST search results at a glance (%d of %d total hits are shown):' %\ (num_show, len(d)) - for i in d.keys()[0:num_show]: + for i in list(d.keys())[0:num_show]: if d[i]['identity'] == 100.0: ret_line += '
* %s (identity: %.2f%%, query coverage: %.2f%%)' \
% (d[i]['hit_def'].replace("'", '"'),
@@ -138,7 +138,7 @@ def get_colors(number_of_colors):
@register.filter(name='values')
def values(d):
- return d.values()
+ return list(d.values())
@register.filter(name='mod')
def mod(value, arg):
@@ -164,7 +164,7 @@ def sumvals(arg, clean = None):
@register.filter(name='mklist')
def mklist(arg):
- return range(0, int(arg))
+ return list(range(0, int(arg)))
t = get_template('index_for_decomposition.tmpl')
@@ -199,8 +199,8 @@ def copy_as(source, dest_name):
def get_figures_dict(html_dict_prefix):
html_dict_key = '%s_file_path' % html_dict_prefix
- if html_dict.has_key(html_dict_key):
- figures_dict = cPickle.load(open(html_dict[html_dict_key]))
+ if html_dict_key in html_dict:
+ figures_dict = pickle.load(open(html_dict[html_dict_key]))
for _map in figures_dict:
for _func in figures_dict[_map]:
for _op in figures_dict[_map][_func]:
@@ -219,12 +219,12 @@ def get_figures_dict(html_dict_prefix):
html_dict['exclusive_figures_dict'] = get_figures_dict('exclusive_figures_dict')
- if html_dict.has_key('node_representatives_file_path'):
+ if 'node_representatives_file_path' in html_dict:
html_dict['node_representatives_file_path'] = copy_as(run_info_dict['node_representatives_file_path'], 'node-representatives.fa.txt')
else:
html_dict['node_representatives_file_path'] = None
- if run_info_dict.has_key('blast_ref_db') and os.path.exists(run_info_dict['blast_ref_db']):
+ if 'blast_ref_db' in run_info_dict and os.path.exists(run_info_dict['blast_ref_db']):
html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa')
if run_info_dict['sample_mapping']:
@@ -267,8 +267,8 @@ def get_figures_dict(html_dict_prefix):
args = parser.parse_args()
- run_info_dict = cPickle.load(open(args.run_info_dict_path))
+ run_info_dict = pickle.load(open(args.run_info_dict_path))
index_page = generate_html_output(run_info_dict, args.output_directory)
- print '\n\tHTML output is ready: "%s"\n' % index_page
+ print('\n\tHTML output is ready: "%s"\n' % index_page)
diff --git a/Oligotyping/utils/html/for_oligotyping.py b/Oligotyping/utils/html/for_oligotyping.py
index b6a72e6..c422720 100644
--- a/Oligotyping/utils/html/for_oligotyping.py
+++ b/Oligotyping/utils/html/for_oligotyping.py
@@ -14,14 +14,14 @@
import sys
import copy
import shutil
-import cPickle
+import pickle
from Oligotyping.lib import fastalib as u
from Oligotyping.utils.constants import pretty_names
from Oligotyping.utils.utils import pretty_print
from Oligotyping.utils.utils import get_samples_dict_from_environment_file
from Oligotyping.utils.random_colors import get_list_of_colors
-from error import HTMLError
+from .error import HTMLError
try:
@@ -61,7 +61,7 @@
from django.template.loader import render_to_string
from django.template.defaultfilters import register
except ImportError:
- raise HTMLError, 'You need to have Django module (http://djangoproject.com) installed on your system to generate HTML output.'
+ raise HTMLError('You need to have Django module (http://djangoproject.com) installed on your system to generate HTML output.')
@register.filter(name='diffs')
def diffs(l, index):
@@ -150,7 +150,7 @@ def get_colors(number_of_colors):
@register.filter(name='values')
def values(d):
- return d.values()
+ return list(d.values())
@register.filter(name='mod')
def mod(value, arg):
@@ -180,7 +180,7 @@ def sumvals(arg, clean = None):
@register.filter(name='mklist')
def mklist(arg):
- return range(0, int(arg))
+ return list(range(0, int(arg)))
t = get_template('index_for_oligo.tmpl')
@@ -254,8 +254,8 @@ def copy_as(source, dest_name, essential = True):
def get_figures_dict(html_dict_prefix):
html_dict_key = '%s_file_path' % html_dict_prefix
- if html_dict.has_key(html_dict_key):
- figures_dict = cPickle.load(open(html_dict[html_dict_key]))
+ if html_dict_key in html_dict:
+ figures_dict = pickle.load(open(html_dict[html_dict_key], 'rb'))
for _map in figures_dict:
for _func in figures_dict[_map]:
for _op in figures_dict[_map][_func]:
@@ -285,11 +285,11 @@ def get_figures_dict(html_dict_prefix):
html_dict['oligotype_sets_file'] = copy_as(run_info_dict['oligotype_sets_file_path'], 'oligotype_sets.txt')
html_dict['oligotype_sets'] = [l.strip().split('\t')[1].split(',') for l in open(run_info_dict['oligotype_sets_file_path'])]
- if html_dict.has_key('representative_seqs_fasta_file_path'):
+ if 'representative_seqs_fasta_file_path' in html_dict:
html_dict['representative_seqs_fasta_file_path'] = copy_as(run_info_dict['representative_seqs_fasta_file_path'], 'oligo-representatives.fa.txt')
else:
html_dict['representative_seqs_fasta_file_path'] = None
- if run_info_dict.has_key('blast_ref_db') and os.path.exists(run_info_dict['blast_ref_db']):
+ if 'blast_ref_db' in run_info_dict and os.path.exists(run_info_dict['blast_ref_db']):
html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa')
html_dict['entropy_components'] = [int(x) for x in html_dict['bases_of_interest_locs'].split(',')]
html_dict['samples_dict'] = get_samples_dict_from_environment_file(run_info_dict['environment_file_path'])
@@ -314,13 +314,13 @@ def get_figures_dict(html_dict_prefix):
# get oligo frequencies
html_dict['frequency'] = {}
for oligo in html_dict['oligos']:
- html_dict['frequency'][oligo] = pretty_print(sum([d[oligo] for d in html_dict['samples_dict'].values() if d.has_key(oligo)]))
+ html_dict['frequency'][oligo] = pretty_print(sum([d[oligo] for d in list(html_dict['samples_dict'].values()) if oligo in d]))
# get purity score
html_dict['purity_score'] = run_info_dict['final_purity_score_dict']
# get total purity score
html_dict['total_purity_score'] = run_info_dict['total_purity_score_dict']
# get unique sequence dict (which will contain the most frequent unique sequence for given oligotype)
- if html_dict.has_key('output_directory_for_reps'):
+ if 'output_directory_for_reps' in html_dict:
html_dict['rep_oligo_seqs_clean_dict'], html_dict['rep_oligo_seqs_fancy_dict'] = get_unique_sequences_dict(html_dict)
html_dict['oligo_reps_dict'] = get_oligo_reps_dict(html_dict, html_output_directory)
html_dict['component_reference'] = ''.join(['|' % i for i in range(0, html_dict['alignment_length'])])
@@ -331,7 +331,7 @@ def get_figures_dict(html_dict_prefix):
# FIXME: code below is very inefficient and causes a huge
# memory issue. fix it by not using deepcopy.
# generate individual oligotype pages
- if html_dict.has_key('output_directory_for_reps'):
+ if 'output_directory_for_reps' in html_dict:
for i in range(0, len(html_dict['oligos'])):
oligo = html_dict['oligos'][i]
tmp_dict = copy.deepcopy(html_dict)
@@ -350,14 +350,14 @@ def get_figures_dict(html_dict_prefix):
rendered = render_to_string('single_oligo.tmpl', tmp_dict)
- open(oligo_page, 'w').write(rendered.encode("utf-8"))
+ open(oligo_page, 'wb').write(rendered.encode("utf-8"))
# generate index
index_page = os.path.join(html_output_directory, 'index.html')
rendered = render_to_string('index_for_oligo.tmpl', html_dict)
- open(index_page, 'w').write(rendered.encode("utf-8"))
+ open(index_page, 'wb').write(rendered.encode("utf-8"))
return index_page
@@ -370,21 +370,21 @@ def get_colors_dict(colors_file_path):
def get_oligos_list(oligos_file_path):
oligos_list = []
fasta = u.SequenceSource(oligos_file_path)
- while fasta.next():
+ while next(fasta):
oligos_list.append(fasta.seq)
return oligos_list
def get_oligo_distribution_dict(oligo, html_dict):
rep_dir = html_dict['output_directory_for_reps']
- oligo_distribution_dict = cPickle.load(open(os.path.join(rep_dir, '%.5d_'\
- % html_dict['oligos'].index(oligo) + oligo + '_unique_distribution.cPickle')))
+ oligo_distribution_dict = pickle.load(open(os.path.join(rep_dir, '%.5d_'\
+ % html_dict['oligos'].index(oligo) + oligo + '_unique_distribution.cPickle'), 'rb'))
ret_dict = {}
for sample in oligo_distribution_dict:
ret_dict[sample] = [0] * 20
for i in range(0, 20):
- if oligo_distribution_dict[sample].has_key(i + 1):
+ if i + 1 in oligo_distribution_dict[sample]:
ret_dict[sample][i] = oligo_distribution_dict[sample][i + 1]
return ret_dict
@@ -416,7 +416,7 @@ def get_oligo_reps_dict(html_dict, html_output_directory):
oligo_reps_dict['fancy_seqs'][oligo] = []
oligo_reps_dict['clear_seqs'][oligo] = []
oligo_reps_dict['frequency'][oligo] = []
- while uniques.next() and uniques.pos <= 20:
+ while next(uniques) and uniques.pos <= 20:
oligo_reps_dict['clear_seqs'][oligo].append(uniques.seq)
oligo_reps_dict['fancy_seqs'][oligo].append(get_decorated_sequence(uniques.seq, html_dict['entropy_components']))
oligo_reps_dict['frequency'][oligo].append(pretty_print(uniques.id.split('|')[1].split(':')[1]))
@@ -426,13 +426,13 @@ def get_oligo_reps_dict(html_dict, html_output_directory):
for column, entropy in [x.strip().split('\t') for x in open(entropy_file_path)]:
entropy_values_per_column[int(column)] = float(entropy)
- color_per_column = cPickle.load(open(alignment_base_path + '_unique_color_per_column.cPickle'))
+ color_per_column = pickle.load(open(alignment_base_path + '_unique_color_per_column.cPickle', 'rb'))
oligo_reps_dict['component_references'][oligo] = ''.join(['|' % (color_per_column[i], i, entropy_values_per_column[i]) for i in range(0, html_dict['alignment_length'])])
blast_results_dict = alignment_base_path + '_unique_BLAST.cPickle'
if os.path.exists(blast_results_dict):
html_dict['blast_results_found'] = True
- oligo_reps_dict['blast_results'][oligo] = cPickle.load(open(blast_results_dict))
+ oligo_reps_dict['blast_results'][oligo] = pickle.load(open(blast_results_dict))
else:
oligo_reps_dict['blast_results'][oligo] = None
@@ -440,7 +440,7 @@ def get_oligo_reps_dict(html_dict, html_output_directory):
def get_alignment_length(alignment_path):
alignment = u.SequenceSource(alignment_path)
- alignment.next()
+ next(alignment)
return len(alignment.seq)
def get_unique_sequences_dict(html_dict):
@@ -452,7 +452,7 @@ def get_unique_sequences_dict(html_dict):
for i in range(0, len(oligos)):
unique_file_path = os.path.join(rep_dir, '%.5d_' % i + oligos[i] + '_unique')
f = u.SequenceSource(unique_file_path)
- f.next()
+ next(f)
rep_oligo_seqs_clean_dict[oligos[i]] = f.seq
rep_oligo_seqs_fancy_dict[oligos[i]] = get_decorated_sequence(f.seq, html_dict['entropy_components'])
f.close()
@@ -460,7 +460,7 @@ def get_unique_sequences_dict(html_dict):
def get_decorated_sequence(seq, components):
"""returns sequence with html decorations"""
- return ''.join(map(lambda j: '%s' % seq[j] if j in components else seq[j], [j for j in range(len(seq))]))
+ return ''.join(['%s' % seq[j] if j in components else seq[j] for j in [j for j in range(len(seq))]])
if __name__ == '__main__':
import argparse
@@ -475,8 +475,8 @@ def get_decorated_sequence(seq, components):
args = parser.parse_args()
- run_info_dict = cPickle.load(open(args.run_info_dict_path))
+ run_info_dict = pickle.load(open(args.run_info_dict_path))
index_page = generate_html_output(run_info_dict, args.output_directory, args.entropy_figure)
- print '\n\tHTML output is ready: "%s"\n' % index_page
+ print('\n\tHTML output is ready: "%s"\n' % index_page)
diff --git a/Oligotyping/utils/random_colors.py b/Oligotyping/utils/random_colors.py
index d7ee65f..287f8c4 100755
--- a/Oligotyping/utils/random_colors.py
+++ b/Oligotyping/utils/random_colors.py
@@ -82,4 +82,4 @@ def get_color_shade_dict_for_list_of_values(values, colormap = 'OrRd'):
if not args.output_file:
for oligo in colors_dict:
- print '%s: %s' % (oligo, colors_dict[oligo])
+ print('%s: %s' % (oligo, colors_dict[oligo]))
diff --git a/Oligotyping/utils/utils.py b/Oligotyping/utils/utils.py
index b62cefe..f2cadfe 100644
--- a/Oligotyping/utils/utils.py
+++ b/Oligotyping/utils/utils.py
@@ -20,7 +20,7 @@
import random
import string
import termios
-import cPickle
+import pickle
import textwrap
import tempfile
import subprocess
@@ -91,7 +91,7 @@ def get_unit_counts_and_percents(units, samples_dict):
counts = []
percents = []
for unit in units:
- if samples_dict[sample].has_key(unit):
+ if unit in samples_dict[sample]:
counts.append(samples_dict[sample][unit])
percents.append(samples_dict[sample][unit] * 100.0 / sample_totals[sample])
else:
@@ -105,7 +105,7 @@ def get_unit_counts_and_percents(units, samples_dict):
def import_error(e):
- print '''
+ print('''
Sorry. It seems you are missing a module that is required by
the oligotyping pipeline. Here is the original import error:
@@ -117,7 +117,7 @@ def import_error(e):
https://meren.github.io/2012/05/11/oligotyping-pipeline-explained/
- \n''' % e
+ \n''' % e)
sys.exit()
@@ -174,7 +174,7 @@ def generate_MATRIX_files_for_units_across_samples(units, samples, MN_fp, SN_fp,
def get_num_nt_diff_between_two_aligned_sequences(seq1, seq2):
if len(seq1) != len(seq2):
- raise LibError, "Two sequences are not equal in length:\n\t%s\n\t%s" % (seq1, seq2)
+ raise LibError("Two sequences are not equal in length:\n\t%s\n\t%s" % (seq1, seq2))
return len(["diff" for i in range(0, len(seq1)) if seq1[i] != seq2[i]])
@@ -209,7 +209,7 @@ def homopolymer_indel_exists(seq1, seq2):
return False
isHP = lambda x: len(set(x)) == 1
- isHPindel = lambda (s, e): seq1[s:e] == seq2[s:e] and isHP(seq1[s:e]) == 1 and seq2[gap_index] == seq2[s]
+ isHPindel = lambda s_e: seq1[s_e[0]:s_e[1]] == seq2[s_e[0]:s_e[1]] and isHP(seq1[s_e[0]:s_e[1]]) == 1 and seq2[gap_index] == seq2[s_e[0]]
def DownStream(sequence):
i = 3
@@ -279,7 +279,7 @@ def mask_defline_whitespaces_in_FASTA(fasta_file_path, defline_white_space_mask
fasta = u.SequenceSource(fasta_file_path)
output = u.FastaOutput(fasta_file_path + '.tmp')
- while fasta.next():
+ while next(fasta):
output.write_id(fasta.id.replace(' ', defline_white_space_mask))
output.write_seq(fasta.seq, split = False)
@@ -289,13 +289,13 @@ def unique_and_store_alignment(alignment_path, output_path):
output = u.FastaOutput(output_path)
alignment = u.SequenceSource(alignment_path, unique = True)
- alignment.next()
+ next(alignment)
most_abundant_unique_read = alignment.seq
alignment.reset()
read_ids = []
unique_read_counts = []
- while alignment.next():
+ while next(alignment):
read_ids += alignment.ids
unique_read_counts.append(len(alignment.ids))
output.store(alignment, split = False)
@@ -313,7 +313,7 @@ def generate_TAB_delim_file_from_dict(data_dict, output_file_path, order, first_
for item in data_dict:
line = [item]
for column in order:
- if not data_dict[item].has_key(column):
+ if column not in data_dict[item]:
line.append('')
else:
line.append(str(data_dict[item][column]))
@@ -336,19 +336,19 @@ def get_unique_sequences_from_FASTA(alignment, limit = 10):
fasta = u.SequenceSource(alignment, unique = True, lazy_init = False)
- while fasta.next() and fasta.pos < limit:
+ while next(fasta) and fasta.pos < limit:
unique_sequences.append((fasta.seq, len(fasta.ids), len(fasta.ids) / float(fasta.total_seq)))
return unique_sequences
def get_oligos_sorted_by_abundance(samples_dict, oligos = None, min_abundance = 0):
- samples = samples_dict.keys()
+ samples = list(samples_dict.keys())
samples.sort()
if oligos == None:
oligos = []
- map(lambda o: oligos.extend(o), [v.keys() for v in samples_dict.values()])
+ list(map(lambda o: oligos.extend(o), [list(v.keys()) for v in list(samples_dict.values())]))
oligos = list(set(oligos))
abundant_oligos = []
@@ -358,7 +358,7 @@ def get_oligos_sorted_by_abundance(samples_dict, oligos = None, min_abundance =
for sample in samples:
sum_sample = sum(samples_dict[sample].values())
- if samples_dict[sample].has_key(oligo):
+ if oligo in samples_dict[sample]:
percent_abundances.append((samples_dict[sample][oligo] * 100.0 / sum_sample,\
samples_dict[sample][oligo], sum_sample, sample))
@@ -394,8 +394,8 @@ def generate_gexf_network_file(units, samples_dict, unit_percents, output_file,
output = open(output_file, 'w')
samples = sorted(samples_dict.keys())
- sample_mapping_categories = sorted([k for k in sample_mapping_dict.keys() if k != 'colors']) if sample_mapping_dict else None
- unit_mapping_categories = sorted([k for k in unit_mapping_dict.keys() if k not in ['colors', 'labels']]) if unit_mapping_dict else None
+ sample_mapping_categories = sorted([k for k in list(sample_mapping_dict.keys()) if k != 'colors']) if sample_mapping_dict else None
+ unit_mapping_categories = sorted([k for k in list(unit_mapping_dict.keys()) if k not in ['colors', 'labels']]) if unit_mapping_dict else None
output.write('''\n''')
output.write('''