diff --git a/Oligotyping/lib/b6lib.py b/Oligotyping/lib/b6lib.py index ac6c7f8..b5b761d 100644 --- a/Oligotyping/lib/b6lib.py +++ b/Oligotyping/lib/b6lib.py @@ -25,7 +25,7 @@ QUERY_ID, SUBJECT_ID, IDENTITY, ALIGNMENT_LENGTH,\ MISMATCHES, GAPS, Q_START, Q_END, S_START, S_END,\ -E_VALUE, BIT_SCORE, Q_LEN, S_LEN = range(0, 14) +E_VALUE, BIT_SCORE, Q_LEN, S_LEN = list(range(0, 14)) class B6Entry: @@ -167,11 +167,11 @@ def print_b6_file_stats(self): numpy.min(self.matrix[x]), numpy.max(self.matrix[x])) - print + print() TABULAR('Total Hits', pretty_print(len(self.matrix[IDENTITY]))) - print - print ' mean std min max' - print + print() + print(' mean std min max') + print() TABULAR('Identity', INFO(IDENTITY)) TABULAR('Alignment Length', INFO(ALIGNMENT_LENGTH)) TABULAR('Mismatches', INFO(MISMATCHES)) @@ -182,7 +182,7 @@ def print_b6_file_stats(self): TABULAR('Target End', INFO(S_END)) TABULAR('E-Value', INFO(E_VALUE)) TABULAR('Bit Score', INFO(BIT_SCORE)) - print + print() def visualize_b6_output(self, title_hint, Q_LENGTH = 101): if self.matrix == []: @@ -236,12 +236,12 @@ def _setp(b, c = 'red'): ax1.plot(p1, c = 'green', label = 'Alignment Start Position') ax1.plot(p2, c = 'black', linewidth = 3) ax1.plot(p2, c = 'red', label = 'Alignment End Position') - plt.fill_between(range(0, len(p1)), p1, y2 = 0, color = 'black', alpha = 0.5) - plt.fill_between(range(0, len(p2)), p2, y2 = 0, color = 'black', alpha = 0.5) + plt.fill_between(list(range(0, len(p1))), p1, y2 = 0, color = 'black', alpha = 0.5) + plt.fill_between(list(range(0, len(p2))), p2, y2 = 0, color = 'black', alpha = 0.5) plt.ylabel('Percent of Hits') plt.xlabel('Position') - plt.xticks(range(0, Q_LENGTH, Q_LENGTH / 100), range(1, Q_LENGTH + 1, Q_LENGTH / 100), rotation=90, size='xx-small') + plt.xticks(list(range(0, Q_LENGTH, Q_LENGTH / 100)), list(range(1, Q_LENGTH + 1, Q_LENGTH / 100)), rotation=90, size='xx-small') plt.yticks([t for t in range(0, 101, 10)], ['%s%%' % t for t in range(0, 101, 10)], size='xx-small') plt.ylim(ymin = 0, ymax = 100) plt.xlim(xmin = 0, xmax = Q_LENGTH - 1) diff --git a/Oligotyping/lib/decomposer.py b/Oligotyping/lib/decomposer.py index 5a7f22c..465e90d 100644 --- a/Oligotyping/lib/decomposer.py +++ b/Oligotyping/lib/decomposer.py @@ -16,7 +16,7 @@ import time import numpy import shutil -import cPickle +import pickle import logging import Oligotyping as o @@ -140,9 +140,9 @@ def check_apps(self): try: blast.LocalBLAST(None, None, None) except blast.ModuleVersionError: - raise utils.ConfigError, blast.version_error_text + raise utils.ConfigError(blast.version_error_text) except blast.ModuleBinaryError: - raise utils.ConfigError, blast.missing_binary_error_text + raise utils.ConfigError(blast.missing_binary_error_text) # FIXME: check R modules here. @@ -156,10 +156,10 @@ def check_dirs(self): try: os.makedirs(self.output_directory) except: - raise utils.ConfigError, "Output directory does not exist (attempt to create one failed as well): '%s'" % \ - (self.output_directory) + raise utils.ConfigError("Output directory does not exist (attempt to create one failed as well): '%s'" % \ + (self.output_directory)) if not os.access(self.output_directory, os.W_OK): - raise utils.ConfigError, "You do not have write permission for the output directory: '%s'" % self.output_directory + raise utils.ConfigError("You do not have write permission for the output directory: '%s'" % self.output_directory) self.tmp_directory = self.generate_output_destination('TMP', directory = True) self.nodes_directory = self.generate_output_destination('NODES', directory = True) @@ -169,18 +169,18 @@ def check_dirs(self): def check_input_files(self): if (not os.path.exists(self.alignment)) or (not os.access(self.alignment, os.R_OK)): - raise utils.ConfigError, "Alignment file is not accessible: '%s'" % self.alignment + raise utils.ConfigError("Alignment file is not accessible: '%s'" % self.alignment) if self.sample_mapping: if (not os.path.exists(self.sample_mapping)) or (not os.access(self.sample_mapping, os.R_OK)): - raise utils.ConfigError, "Sample mapping file is not accessible: '%s'" % self.sample_mapping + raise utils.ConfigError("Sample mapping file is not accessible: '%s'" % self.sample_mapping) samples = None if not self.skip_check_input_file: self.progress.new('Checking the input FASTA') samples = utils.check_input_alignment(self.alignment, self.sample_name_separator, self.progress) if not samples: - raise utils.ConfigError, 'Exiting.' + raise utils.ConfigError('Exiting.') self.progress.end() if self.sample_mapping: @@ -220,8 +220,8 @@ def _init_topology(self): self.root = self.topology.add_new_node('root', reads, root = True) if self.root.size < self.min_actual_abundance: - raise utils.ConfigError, "The number of reads in alignment file (%d) is smaller than --min-actual-abundance (%d)" % \ - (self.root.size, self.min_actual_abundance) + raise utils.ConfigError("The number of reads in alignment file (%d) is smaller than --min-actual-abundance (%d)" % \ + (self.root.size, self.min_actual_abundance)) self.node_ids_to_analyze = ['root'] @@ -420,9 +420,9 @@ def _generate_raw_topology(self): if node.reads[0].frequency < self.min_substantive_abundance: if node.node_id == 'root': self.progress.end() - raise utils.ConfigError, "Number of unique reads in the root node (%d) is less than the declared minimum (%d)." \ + raise utils.ConfigError("Number of unique reads in the root node (%d) is less than the declared minimum (%d)." \ % (node.reads[0].frequency, - self.min_substantive_abundance) + self.min_substantive_abundance)) else: # remove the node and store its content. @@ -536,7 +536,7 @@ def _generate_raw_topology(self): oligo = ''.join([read.seq[d] for d in node.discriminants]) - if new_nodes_dict.has_key(oligo): + if oligo in new_nodes_dict: new_nodes_dict[oligo]['reads'].append(read) else: new_node_id = self.topology.get_new_node_id() @@ -546,7 +546,7 @@ def _generate_raw_topology(self): # all reads in the parent node are analyzed. time to add spawned nodes into the topology. - oligos = new_nodes_dict.keys() + oligos = list(new_nodes_dict.keys()) len_oligos = len(oligos) for i in range(0, len_oligos): self.progress.update(p + ' / new nodes %d of %d ' % (i + 1, len_oligos)) @@ -678,7 +678,7 @@ def _refine_topology(self): abundant_reads_in_outlier_bin = [] - if self.topology.outliers.has_key('maximum_variation_allowed_reason'): + if 'maximum_variation_allowed_reason' in self.topology.outliers: abundant_reads_in_outlier_bin = [read_object for read_object in \ self.topology.outliers['maximum_variation_allowed_reason'] \ if read_object.frequency > self.min_substantive_abundance] @@ -1075,7 +1075,7 @@ def get_dict_entry_tmpl(): self.progress.update('Processing reads that were represented in results') for sample in self.samples_dict: - if not read_distribution_dict.has_key(sample): + if sample not in read_distribution_dict: read_distribution_dict[sample] = get_dict_entry_tmpl() read_distribution_dict[sample]['represented_reads'] = sum(self.samples_dict[sample].values()) @@ -1086,7 +1086,7 @@ def get_dict_entry_tmpl(): for read_id in read_object.ids: sample = utils.get_sample_name_from_defline(read_id, self.sample_name_separator) - if not read_distribution_dict.has_key(sample): + if sample not in read_distribution_dict: read_distribution_dict[sample] = get_dict_entry_tmpl() read_distribution_dict[sample][reason] += 1 @@ -1166,11 +1166,11 @@ def _generate_samples_dict(self): for read_id in read.ids: sample = utils.get_sample_name_from_defline(read_id, self.sample_name_separator) - if not self.samples_dict.has_key(sample): + if sample not in self.samples_dict: self.samples_dict[sample] = {} self.samples.append(sample) - if self.samples_dict[sample].has_key(node_id): + if node_id in self.samples_dict[sample]: self.samples_dict[sample][node_id] += 1 else: self.samples_dict[sample][node_id] = 1 @@ -1238,7 +1238,7 @@ def _store_topology_dict(self): self.progress.end() topology_dict_file_path = self.generate_output_destination('TOPOLOGY-LIGHT.cPickle') - cPickle.dump(topology_dict, open(topology_dict_file_path, 'w')) + pickle.dump(topology_dict, open(topology_dict_file_path, 'w')) self.run.info('topology_light_dict', topology_dict_file_path) @@ -1347,7 +1347,7 @@ def _generate_html_output(self): from Oligotyping.utils.html.error import HTMLError try: from Oligotyping.utils.html.for_decomposition import generate_html_output - except HTMLError, e: + except HTMLError as e: sys.stdout.write('\n\n\t%s\n\n' % e) sys.exit() @@ -1383,7 +1383,7 @@ def _generate_default_figures(self): figures_dict = generate_default_figures(self) figures_dict_file_path = self.generate_output_destination("FIGURES.cPickle") - cPickle.dump(figures_dict, open(figures_dict_file_path, 'w')) + pickle.dump(figures_dict, open(figures_dict_file_path, 'w')) self.progress.end() self.run.info('figures_dict_file_path', figures_dict_file_path) @@ -1397,7 +1397,7 @@ def _generate_exclusive_figures(self): exclusive_figures_dict = generate_exclusive_figures(self) exclusive_figures_dict_file_path = self.generate_output_destination("EXCLUSIVE-FIGURES.cPickle") - cPickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w')) + pickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w')) self.progress.end() self.run.info('exclusive_figures_dict_file_path', exclusive_figures_dict_file_path) diff --git a/Oligotyping/lib/entropy.py b/Oligotyping/lib/entropy.py index f264fe7..20dcbaf 100644 --- a/Oligotyping/lib/entropy.py +++ b/Oligotyping/lib/entropy.py @@ -79,11 +79,11 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued progress.new('Processing the Alignment') # processing the alignment file.. - while alignment.next(): + while next(alignment): # check the alignment lengths along the way: if previous_alignment_length: if previous_alignment_length != len(alignment.seq): - raise EntropyError, "Not all reads have the same length." + raise EntropyError("Not all reads have the same length.") # print out process info if alignment.pos % 10000 == 0: @@ -96,7 +96,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued try: frequency = freq_from_defline(alignment.id) except IndexError: - raise EntropyError, "Reads declared as unique, but they do not have proper deflines. See help for --uniqued." + raise EntropyError("Reads declared as unique, but they do not have proper deflines. See help for --uniqued.") for i in range(0, frequency): lines.append(alignment.seq) @@ -124,7 +124,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued if weighted: if not qual_stats_dict: - raise EntropyError, "Weighted entropy is selected, but no qual stats are provided" + raise EntropyError("Weighted entropy is selected, but no qual stats are provided") e = entropy(column, l_qual = qual_stats_dict[position], amino_acid_sequences = amino_acid_sequences) else: e = entropy(column, amino_acid_sequences = amino_acid_sequences) @@ -164,7 +164,7 @@ def entropy_analysis(alignment_path, output_file = None, verbose = True, uniqued def quick_entropy(l, amino_acid_sequences = False): if len(set([len(x) for x in l])) != 1: - raise EntropyError, "Not all vectors have the same length." + raise EntropyError("Not all vectors have the same length.") entropy_tpls = [] for position in range(0, len(l[0])): diff --git a/Oligotyping/lib/fastalib.py b/Oligotyping/lib/fastalib.py index 8fe8666..228ffaa 100644 --- a/Oligotyping/lib/fastalib.py +++ b/Oligotyping/lib/fastalib.py @@ -36,7 +36,7 @@ def write_seq(self, seq, split = True): self.output_file_obj.write('%s\n' % seq) def split(self, sequence, piece_length = 80): - ticks = range(0, len(sequence), piece_length) + [len(sequence)] + ticks = list(range(0, len(sequence), piece_length)) + [len(sequence)] return '\n'.join([sequence[ticks[x]:ticks[x + 1]] for x in range(0, len(ticks) - 1)]) def close(self): @@ -50,7 +50,7 @@ def __init__(self, f_name): self.fasta = SequenceSource(f_name) - while self.fasta.next(): + while next(self.fasta): if self.fasta.pos % 1000 == 0 or self.fasta.pos == 1: sys.stderr.write('\r[fastalib] Reading FASTA into memory: %s' % (self.fasta.pos)) sys.stderr.flush() @@ -94,7 +94,7 @@ def __init__(self, fasta_file_path, lazy_init = True, unique = False, allow_mixe def init_unique_hash(self): while self.next_regular(): - hash = hashlib.sha1(self.seq.upper()).hexdigest() + hash = hashlib.sha1(self.seq.upper().encode('utf-8')).hexdigest() if hash in self.unique_hash_dict: self.unique_hash_dict[hash]['ids'].append(self.id) self.unique_hash_dict[hash]['count'] += 1 @@ -111,7 +111,7 @@ def init_unique_hash(self): self.total_unique = len(self.unique_hash_dict) self.reset() - def next(self): + def __next__(self): if self.unique: return self.next_unique() else: @@ -159,7 +159,7 @@ def next_regular(self): def get_seq_by_read_id(self, read_id): self.reset() - while self.next(): + while next(self): if self.id == read_id: return self.seq @@ -184,7 +184,7 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len self.reset() - while self.next(): + while next(self): if self.pos % 10000 == 0 or self.pos == 1: sys.stderr.write('\r[fastalib] Reading: %s' % (self.pos)) sys.stderr.flush() @@ -213,7 +213,7 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len plt.subplots_adjust(left=0.05, bottom = 0.03, top = 0.95, right = 0.98) plt.plot(seq_len_distribution, color = 'black', alpha = 0.3) - plt.fill_between(range(0, max_seq_len + 1), seq_len_distribution, y2 = 0, color = 'black', alpha = 0.15) + plt.fill_between(list(range(0, max_seq_len + 1)), seq_len_distribution, y2 = 0, color = 'black', alpha = 0.15) plt.ylabel('number of sequences') plt.xlabel('sequence length') @@ -223,8 +223,8 @@ def visualize_sequence_length_distribution(self, title, dest = None, max_seq_len if ytickstep == None: ytickstep = max(seq_len_distribution) / 20 or 1 - plt.xticks(range(xtickstep, max_seq_len + 1, xtickstep), rotation=90, size='xx-small') - plt.yticks(range(0, max(seq_len_distribution) + 1, ytickstep), + plt.xticks(list(range(xtickstep, max_seq_len + 1, xtickstep)), rotation=90, size='xx-small') + plt.yticks(list(range(0, max(seq_len_distribution) + 1, ytickstep)), [y for y in range(0, max(seq_len_distribution) + 1, ytickstep)], size='xx-small') plt.xlim(xmin = 0, xmax = max_seq_len) @@ -281,7 +281,7 @@ def __init__(self, quals_file_path, lazy_init = True): self.reset() - def next(self): + def __next__(self): self.id = self.file_pointer.readline()[1:].strip() self.quals = None self.quals_int = None diff --git a/Oligotyping/lib/oligotyping.py b/Oligotyping/lib/oligotyping.py index 9ddfd78..f53907d 100644 --- a/Oligotyping/lib/oligotyping.py +++ b/Oligotyping/lib/oligotyping.py @@ -15,7 +15,7 @@ import sys import copy import shutil -import cPickle +import pickle import logging import itertools import math @@ -32,6 +32,7 @@ from Oligotyping.visualization.oligotype_sets_distribution import vis_oligotype_sets_distribution from Oligotyping.visualization.oligotype_distribution_stack_bar import oligotype_distribution_stack_bar from Oligotyping.visualization.oligotype_distribution_across_samples import oligotype_distribution_across_samples +from functools import reduce class Oligotyping: @@ -52,7 +53,7 @@ def __init__(self, args = None): self.project = None self.output_directory = None self.sample_name_separator = '_' - self.limit_representative_sequences = sys.maxint + self.limit_representative_sequences = sys.maxsize self.quick = False self.no_figures = False self.no_display = False @@ -89,7 +90,7 @@ def __init__(self, args = None): self.project = args.project or os.path.basename(args.alignment).split('.')[0] self.output_directory = args.output_directory self.sample_name_separator = args.sample_name_separator - self.limit_representative_sequences = args.limit_representative_sequences or sys.maxint + self.limit_representative_sequences = args.limit_representative_sequences or sys.maxsize self.quick = args.quick self.no_figures = args.no_figures self.no_display = args.no_display @@ -140,19 +141,19 @@ def check_apps(self): try: blast.LocalBLAST(None, None, None) except blast.ModuleVersionError: - raise utils.ConfigError, blast.version_error_text + raise utils.ConfigError(blast.version_error_text) except blast.ModuleBinaryError: - raise utils.ConfigError, blast.missing_binary_error_text + raise utils.ConfigError(blast.missing_binary_error_text) # FIXME: check R modules here. def check_dirs(self): if self.number_of_auto_components != None and self.selected_components != None: - raise utils.ConfigError, "You either have to declare 'auto components' (-c) or 'selected components' (-C)." + raise utils.ConfigError("You either have to declare 'auto components' (-c) or 'selected components' (-C).") if self.number_of_auto_components == None and self.selected_components == None: - raise utils.ConfigError, "Both 'auto components' (-c), and 'selected components' (-C) were declared." + raise utils.ConfigError("Both 'auto components' (-c), and 'selected components' (-C) were declared.") # check output associated stuff if not self.output_directory: @@ -162,10 +163,10 @@ def check_dirs(self): try: os.makedirs(self.output_directory) except: - raise utils.ConfigError, "Output directory does not exist (attempt to create one failed as well): '%s'" % \ - (self.output_directory) + raise utils.ConfigError("Output directory does not exist (attempt to create one failed as well): '%s'" % \ + (self.output_directory)) if not os.access(self.output_directory, os.W_OK): - raise utils.ConfigError, "You do not have write permission for the output directory: '%s'" % self.output_directory + raise utils.ConfigError("You do not have write permission for the output directory: '%s'" % self.output_directory) self.tmp_directory = self.generate_output_destination('TMP', directory = True) self.figures_directory = self.generate_output_destination('FIGURES', directory = True) @@ -173,25 +174,25 @@ def check_dirs(self): def check_input(self): if (not os.path.exists(self.alignment)) or (not os.access(self.alignment, os.R_OK)): - raise utils.ConfigError, "Alignment file is not accessible: '%s'" % self.alignment + raise utils.ConfigError("Alignment file is not accessible: '%s'" % self.alignment) if (not os.path.exists(self.entropy)) or (not os.access(self.entropy, os.R_OK)): - raise utils.ConfigError, "Entropy file is not accessible: '%s'" % self.entropy + raise utils.ConfigError("Entropy file is not accessible: '%s'" % self.entropy) if self.sample_mapping: if (not os.path.exists(self.sample_mapping)) or (not os.access(self.sample_mapping, os.R_OK)): - raise utils.ConfigError, "Sample mapping file is not accessible: '%s'" % self.sample_mapping + raise utils.ConfigError("Sample mapping file is not accessible: '%s'" % self.sample_mapping) if self.colors_list_file: if not os.path.exists(self.colors_list_file): - raise utils.ConfigError, "Colors list file does not exist: '%s'" % self.colors_list_file + raise utils.ConfigError("Colors list file does not exist: '%s'" % self.colors_list_file) first_characters = list(set([c.strip()[0] for c in open(self.colors_list_file)])) if len(first_characters) != 1 or first_characters[0] != '#': - raise utils.ConfigError, "Colors list file does not seem to be correctly formatted" + raise utils.ConfigError("Colors list file does not seem to be correctly formatted") # set the alignment lentgh (it will be necessary to check certain params) alignment = u.SequenceSource(self.alignment) - alignment.next() + next(alignment) self.alignment_length = len(alignment.seq) alignment.close() @@ -203,7 +204,7 @@ def check_input(self): self.progress.new('Checking the input FASTA') samples = utils.check_input_alignment(self.alignment, self.sample_name_separator, self.progress) if not samples: - raise utils.ConfigError, 'Exiting.' + raise utils.ConfigError('Exiting.') self.progress.end() if self.sample_mapping: @@ -218,44 +219,44 @@ def check_params(self): try: self.selected_components = [int(c) for c in self.selected_components.split(',')] except: - raise utils.ConfigError, "Selected components should be comma separated integer values (such as '4,8,15,25,47')." + raise utils.ConfigError("Selected components should be comma separated integer values (such as '4,8,15,25,47').") if max(self.selected_components) >= self.alignment_length: - raise utils.ConfigError, "There is at least one component ('%d') that is bigger than the alignment length."\ - % max(self.selected_components) + raise utils.ConfigError("There is at least one component ('%d') that is bigger than the alignment length."\ + % max(self.selected_components)) if min(self.selected_components) < 0: - raise utils.ConfigError, "Selected components can't be smaller than 0" + raise utils.ConfigError("Selected components can't be smaller than 0") components_declared_more_than_once = [c[0] for c in itertools.groupby(sorted(self.selected_components))\ if len(list(c[1])) > 1] N = len(components_declared_more_than_once) if N: - raise utils.ConfigError, "You declared %s component%s (%s) more than once."\ + raise utils.ConfigError("You declared %s component%s (%s) more than once."\ % ('a' if N == 1 else '%s' % str(N), 's' if N > 1 else '', - ', '.join([str(c) for c in components_declared_more_than_once])) + ', '.join([str(c) for c in components_declared_more_than_once]))) if self.min_base_quality: try: self.min_base_quality = int(self.min_base_quality) assert(self.min_base_quality >= 0 and self.min_base_quality <= 40) except: - raise utils.ConfigError, "Minimum base quality must be an integer between 0 and 40." + raise utils.ConfigError("Minimum base quality must be an integer between 0 and 40.") if self.limit_oligotypes_to: self.limit_oligotypes_to = [o.strip().upper() for o in self.limit_oligotypes_to.split(',')] if len(self.limit_oligotypes_to) == 1: - raise utils.ConfigError, "There must be more than one oligotype for --limit-oligotypes parameter." + raise utils.ConfigError("There must be more than one oligotype for --limit-oligotypes parameter.") if len([n for n in ''.join(self.limit_oligotypes_to) if n not in ['A', 'T', 'C', 'G', '-']]): - raise utils.ConfigError, "Oligotypes defined by --limit-oligotypes parameter seems to have ambiguous characters." + raise utils.ConfigError("Oligotypes defined by --limit-oligotypes parameter seems to have ambiguous characters.") if self.exclude_oligotypes: self.exclude_oligotypes = [o.strip().upper() for o in self.exclude_oligotypes.split(',')] if len([n for n in ''.join(self.exclude_oligotypes) if n not in ['A', 'T', 'C', 'G', '-']]): - raise utils.ConfigError, "Oligotypes defined by --exclude-oligotypes parameter seems to have ambiguous characters." + raise utils.ConfigError("Oligotypes defined by --exclude-oligotypes parameter seems to have ambiguous characters.") return True @@ -450,14 +451,14 @@ def _construct_samples_dict(self): num_reads_eliminated_due_to_min_base_quality = 0 self.fasta.reset() - while self.fasta.next(): + while next(self.fasta): if self.fasta.pos % 1000 == 0: self.progress.update('Analyzing: %s' \ % (utils.pretty_print(self.fasta.pos))) sample = utils.get_sample_name_from_defline(self.fasta.id, self.sample_name_separator) - if not self.samples_dict.has_key(sample): + if sample not in self.samples_dict: self.samples_dict[sample] = {} self.samples.append(sample) @@ -483,7 +484,7 @@ def _construct_samples_dict(self): # checking the base qualities oligo = ''.join(self.fasta.seq[o] for o in self.bases_of_interest_locs) - if self.samples_dict[sample].has_key(oligo): + if oligo in self.samples_dict[sample]: self.samples_dict[sample][oligo] += 1 else: self.samples_dict[sample][oligo] = 1 @@ -495,16 +496,16 @@ def _construct_samples_dict(self): if self.quals_dict: self.run.info('num_reads_eliminated_due_to_min_base_quality', num_reads_eliminated_due_to_min_base_quality) if self.fasta.total_seq == num_reads_eliminated_due_to_min_base_quality: - raise utils.ConfigError, "All reads were eliminated due to --min-base-quality (%d) rule" % self.min_base_quality + raise utils.ConfigError("All reads were eliminated due to --min-base-quality (%d) rule" % self.min_base_quality) def _register_removal(self, oligo, reason = 'unknown'): - if not self.excluded_read_ids_tracker.has_key(reason): + if reason not in self.excluded_read_ids_tracker: self.excluded_read_ids_tracker[reason] = {} for sample in self.samples: - if self.samples_dict[sample].has_key(oligo): - if not self.excluded_read_ids_tracker[reason].has_key(sample): + if oligo in self.samples_dict[sample]: + if sample not in self.excluded_read_ids_tracker[reason]: self.excluded_read_ids_tracker[reason][sample] = self.samples_dict[sample][oligo] else: self.excluded_read_ids_tracker[reason][sample] += self.samples_dict[sample][oligo] @@ -569,7 +570,7 @@ def _contrive_abundant_oligos(self): # for each sample. computing it here once is more optimized. sample_sums = {} SUM = lambda sample: sum([self.samples_dict[sample][o] for o in non_singleton_oligos \ - if self.samples_dict[sample].has_key(o)]) + if o in self.samples_dict[sample]]) for sample in self.samples: sample_sums[sample] = SUM(sample) @@ -583,7 +584,7 @@ def _contrive_abundant_oligos(self): percent_abundances = [] for sample in self.samples: - if self.samples_dict[sample].has_key(oligo): + if oligo in self.samples_dict[sample]: percent_abundances.append((self.samples_dict[sample][oligo] * 100.0 / sample_sums[sample], self.samples_dict[sample][oligo], sample_sums[sample], @@ -625,7 +626,7 @@ def _contrive_abundant_oligos(self): self.progress.update(utils.P(i, len(non_singleton_oligos))) oligo_actual_abundance = sum([self.samples_dict[sample][oligo] for sample in self.samples_dict\ - if self.samples_dict[sample].has_key(oligo)]) + if oligo in self.samples_dict[sample]]) if self.min_actual_abundance > oligo_actual_abundance: oligos_for_removal.append(oligo) @@ -695,8 +696,8 @@ def _contrive_abundant_oligos(self): self.run.info('num_oligos_after_l_elim', len(self.abundant_oligos)) if len(self.abundant_oligos) == 0: - raise utils.ConfigError, "\n\n\t--limit-oligotypes parameter eliminated all oligotypes.\ - \n\tPlease make sure --limit-oligotypes matches with actual oligotypes.\n\n\tQuiting.\n" + raise utils.ConfigError("\n\n\t--limit-oligotypes parameter eliminated all oligotypes.\ + \n\tPlease make sure --limit-oligotypes matches with actual oligotypes.\n\n\tQuiting.\n") # if 'exclude_oligotypes' is defined, remove them from analysis if they are present if self.exclude_oligotypes: @@ -711,12 +712,12 @@ def _contrive_abundant_oligos(self): # storing final counts for oligo in self.abundant_oligos: self.final_oligo_counts_dict[oligo] = sum([self.samples_dict[sample][oligo] for sample in self.samples_dict\ - if self.samples_dict[sample].has_key(oligo)]) + if oligo in self.samples_dict[sample]]) # in case no oligos left if not len(self.abundant_oligos): - raise utils.ConfigError, "\n\n\tAll oligotypes were discarded during the noise removal step.\ - \n\tPlease check your parameters.\n\n\tQuiting.\n" + raise utils.ConfigError("\n\n\tAll oligotypes were discarded during the noise removal step.\ + \n\tPlease check your parameters.\n\n\tQuiting.\n") # if there is only one oligotype left, skip basic analyses if len(self.abundant_oligos) == 1: @@ -892,7 +893,7 @@ def get_dict_entry_tmpl(): self.progress.update('Processing reads that were represented in results') for sample in self.samples_dict: - if not read_distribution_dict.has_key(sample): + if sample not in read_distribution_dict: read_distribution_dict[sample] = get_dict_entry_tmpl() read_distribution_dict[sample]['represented_reads'] = sum(self.samples_dict[sample].values()) @@ -900,7 +901,7 @@ def get_dict_entry_tmpl(): for reason in self.excluded_read_ids_tracker: self.progress.update('Processing excluded oligos (%s)' % (reason)) for sample in self.excluded_read_ids_tracker[reason]: - if not read_distribution_dict.has_key(sample): + if sample not in read_distribution_dict: read_distribution_dict[sample] = get_dict_entry_tmpl() read_distribution_dict[sample][reason] = self.excluded_read_ids_tracker[reason][sample] @@ -921,9 +922,9 @@ def _generate_random_colors(self): # it means user provided a list of colors to be used for oligotypes colors = [c.strip() for c in open(self.colors_list_file).readlines()] if len(colors) < len(self.abundant_oligos): - raise utils.ConfigError, "Number of colors defined in colors file (%d),\ + raise utils.ConfigError("Number of colors defined in colors file (%d),\ is smaller than the number of abundant oligotypes (%d)" % \ - (len(colors), len(self.abundant_oligos)) + (len(colors), len(self.abundant_oligos))) colors_dict = {} for i in range(0, len(self.abundant_oligos)): colors_dict[self.abundant_oligos[i]] = colors[i] @@ -959,7 +960,7 @@ def _agglomerate_oligos_based_on_cosine_similarity(self): self.progress.new('Generating data objects for newly generated oligotype sets') self.progress.update('New Colors') - self.oligotype_set_ids = range(0, len(self.oligotype_sets)) + self.oligotype_set_ids = list(range(0, len(self.oligotype_sets))) self.colors_dict_for_oligotype_sets = {} for set_id in self.oligotype_set_ids: @@ -996,7 +997,7 @@ def _generate_MATRIX_files_for_oligotype_sets(self): counts = [] percents = [] for sample in self.samples: - if d[sample].has_key(oligotype_set_id): + if oligotype_set_id in d[sample]: counts.append(d[sample][oligotype_set_id]) percents.append(d[sample][oligotype_set_id] * 100.0 / sum(d[sample].values())) else: @@ -1031,10 +1032,10 @@ def _get_unique_sequence_distributions_within_abundant_oligos(self): # listed in this dictionary MAY NOT be the final oligos once the noise # filtering step has ended. - temp_unique_distributions = dict(zip(self.abundant_oligos, [{} for x in range(0, len(self.abundant_oligos))])) + temp_unique_distributions = dict(list(zip(self.abundant_oligos, [{} for x in range(0, len(self.abundant_oligos))]))) self.fasta.reset() - while self.fasta.next(): + while next(self.fasta): if self.progress and self.fasta.pos % 1000 == 0: self.progress.update('Computing sequence distributions: %.2f%%' \ % (self.fasta.pos * 100.0 / self.fasta.total_seq)) @@ -1046,7 +1047,7 @@ def _get_unique_sequence_distributions_within_abundant_oligos(self): temp_unique_distributions[oligo][self.fasta.seq] = 1 for oligo in self.abundant_oligos: - temp_unique_distributions[oligo] = sorted(temp_unique_distributions[oligo].values(), reverse = True) + temp_unique_distributions[oligo] = sorted(list(temp_unique_distributions[oligo].values()), reverse = True) return temp_unique_distributions @@ -1073,7 +1074,7 @@ def _generate_representative_sequences(self): unique_files_dict[oligo] = {'file': open(fasta_file_path + '_unique', 'w'), 'path': fasta_file_path + '_unique'} except IOError: - print '\n\t'.join(['', + print('\n\t'.join(['', 'WARNING: Oligotyping process has reached the maximum number of open files', 'limit defined by the operating system. There are "%d" oligotypes to be'\ % len(self.abundant_oligos), @@ -1085,15 +1086,15 @@ def _generate_representative_sequences(self): '', 'Until this issue is solved, representative sequences are not going to be', 'computed.', - '']) + ''])) # clean after yourself. close every file, delete directory, exit. - [map(lambda x: x.close(), [g[o]['file'] for o in g]) for g in [fasta_files_dict, unique_files_dict]] + [[x.close() for x in [g[o]['file'] for o in g]] for g in [fasta_files_dict, unique_files_dict]] shutil.rmtree(output_directory_for_reps) sys.exit() self.fasta.reset() - while self.fasta.next(): + while next(self.fasta): if self.fasta.pos % 1000 == 0: self.progress.update('Generating Individual FASTA Files: %.2f%%' \ % (self.fasta.pos * 100.0 / self.fasta.total_seq)) @@ -1120,7 +1121,7 @@ def _generate_representative_sequences(self): # is distributed among samples: distribution_among_samples = {} - fasta.next() + next(fasta) # this is the first read in the unique reads list, which is the most abundant unique sequence # for the oligotype. so we are going to store it in a dict to generate # representative sequences FASTA file: @@ -1131,7 +1132,7 @@ def _generate_representative_sequences(self): # FIXME: I am going to come back to this and fix it at some point. Storing 'distribution_among_samples' # information in separate cPickle files per oligo is not the smartest thing to do. self.final_oligo_unique_distribution_dict[oligo] = [] - while fasta.next() and fasta.pos <= self.limit_representative_sequences: + while next(fasta) and fasta.pos <= self.limit_representative_sequences: unique_files_dict[oligo]['file'].write('>%s_%d|freq:%d\n'\ % (oligo, fasta.pos, @@ -1144,10 +1145,10 @@ def _generate_representative_sequences(self): for sample_id in fasta.ids: sample_name = utils.get_sample_name_from_defline(sample_id, self.sample_name_separator) - if not distribution_among_samples.has_key(sample_name): + if sample_name not in distribution_among_samples: distribution_among_samples[sample_name] = {} d = distribution_among_samples[sample_name] - if not d.has_key(fasta.pos): + if fasta.pos not in d: d[fasta.pos] = 1 else: d[fasta.pos] += 1 @@ -1157,7 +1158,7 @@ def _generate_representative_sequences(self): unique_fasta_path = unique_files_dict[oligo]['path'] distribution_among_samples_dict_path = unique_fasta_path + '_distribution.cPickle' - cPickle.dump(distribution_among_samples, open(distribution_among_samples_dict_path, 'w')) + pickle.dump(distribution_among_samples, open(distribution_among_samples_dict_path, 'wb')) self.progress.end() @@ -1249,7 +1250,7 @@ def _perform_local_BLAST_search_for_oligo_representative(self, unique_files_dict self.progress.update('Storing representative sequences for "%s" ...' % oligo) unique_fasta_path = unique_files_dict[oligo]['path'] unique_fasta = u.SequenceSource(unique_fasta_path) - unique_fasta.next() + next(unique_fasta) representative_fasta_entries.append((oligo, unique_fasta.seq),) unique_fasta.close() utils.append_reads_to_FASTA(representative_fasta_entries, query) @@ -1284,10 +1285,10 @@ def _perform_local_BLAST_search_for_oligo_representative(self, unique_files_dict for oligo in self.abundant_oligos: unique_fasta_path = unique_files_dict[oligo]['path'] fancy_blast_result_output_path = unique_fasta_path + '_BLAST.cPickle' - if fancy_results_dict.has_key(oligo): - cPickle.dump(fancy_results_dict[oligo], open(fancy_blast_result_output_path, 'w')) + if oligo in fancy_results_dict: + pickle.dump(fancy_results_dict[oligo], open(fancy_blast_result_output_path, 'w')) else: - cPickle.dump([], open(fancy_blast_result_output_path, 'w')) + pickle.dump([], open(fancy_blast_result_output_path, 'w')) def _perform_remote_BLAST_search_for_oligo_representative(self, oligo, unique_files_dict): @@ -1296,7 +1297,7 @@ def _perform_remote_BLAST_search_for_oligo_representative(self, oligo, unique_fi unique_fasta_path = unique_files_dict[oligo]['path'] unique_fasta = u.SequenceSource(unique_fasta_path) - unique_fasta.next() + next(unique_fasta) blast_output_xml = unique_fasta_path + '_BLAST.xml' blast_output_dict = unique_fasta_path + '_BLAST.cPickle' @@ -1307,7 +1308,7 @@ def blast_search_wrapper(seq, xml_path, pickle_path): try: results = r.search(seq, xml_path) results_list = r.get_fancy_results_list(results) - cPickle.dump(results_list, open(pickle_path, 'w')) + pickle.dump(results_list, open(pickle_path, 'w')) return True except: return False @@ -1348,7 +1349,7 @@ def _generate_entropy_figure_for_abundant_oligotype(self, oligo, unique_fasta_pa for i in range(0, self.alignment_length): color_per_column[i] = color_shade_dict[entropy_values_per_column[i]] - cPickle.dump(color_per_column, open(color_per_column_path, 'w')) + pickle.dump(color_per_column, open(color_per_column_path, 'wb')) def _generate_oligos_across_samples_figure(self): @@ -1393,7 +1394,7 @@ def _generate_default_figures(self): figures_dict = generate_default_figures(self) figures_dict_file_path = self.generate_output_destination("FIGURES.cPickle") - cPickle.dump(figures_dict, open(figures_dict_file_path, 'w')) + pickle.dump(figures_dict, open(figures_dict_file_path, 'wb')) self.progress.end() self.run.info('figures_dict_file_path', figures_dict_file_path) @@ -1407,7 +1408,7 @@ def _generate_exclusive_figures(self): exclusive_figures_dict = generate_exclusive_figures(self) exclusive_figures_dict_file_path = self.generate_output_destination("EXCLUSIVE-FIGURES.cPickle") - cPickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w')) + pickle.dump(exclusive_figures_dict, open(exclusive_figures_dict_file_path, 'w')) self.progress.end() self.run.info('exclusive_figures_dict_file_path', exclusive_figures_dict_file_path) @@ -1440,7 +1441,7 @@ def _generate_html_output(self): from Oligotyping.utils.html.error import HTMLError try: from Oligotyping.utils.html.for_oligotyping import generate_html_output - except HTMLError, e: + except HTMLError as e: sys.stdout.write('\n\n\t%s\n\n' % e) sys.exit() diff --git a/Oligotyping/lib/shared.py b/Oligotyping/lib/shared.py index 7dbb7be..9f521a9 100644 --- a/Oligotyping/lib/shared.py +++ b/Oligotyping/lib/shared.py @@ -116,11 +116,11 @@ def generate_exclusive_figures(_object): for category in sample_mapping_dict: exclusive_figures_dict[category] = {} - samples = sample_mapping_dict[category].keys() + samples = list(sample_mapping_dict[category].keys()) # double filter: first makes sure sample was not removed from the analysis due to losing all its reads during the #¬†refinement, second makes sure that sample was actually mapped to something in the sample mapping file. - samples = filter(lambda s: sample_mapping_dict[category][s], filter(lambda s: s in _object.samples, samples)) + samples = [s for s in [s for s in samples if s in _object.samples] if sample_mapping_dict[category][s]] samples.sort() mapping_file_path = get_temporary_file_name('%s-' % category, '-mapping.txt', _object.tmp_directory) diff --git a/Oligotyping/lib/topology.py b/Oligotyping/lib/topology.py index 3766ad2..d4ef932 100644 --- a/Oligotyping/lib/topology.py +++ b/Oligotyping/lib/topology.py @@ -54,7 +54,7 @@ def get_new_node_id(self): def add_new_node(self, node_id, unique_read_objects_list, root = False, parent_id = None): if not self.nodes_output_directory: - raise ConfigError, "Nodes output directory has to be declared before adding new nodes" + raise ConfigError("Nodes output directory has to be declared before adding new nodes") node = Node(node_id, self.nodes_output_directory) @@ -103,15 +103,15 @@ def get_node(self, node_id): def print_node(self, node_id): node = self.nodes[node_id] - print - print 'Node "%s"' % node - print '---------------------------------' - print 'Alive : %s' % (not node.killed) - print 'Dirty : %s' % node.dirty - print 'Size : %d' % node.size - print 'Parent : %s' % node.parent - print 'Children :', node.children - print + print() + print('Node "%s"' % node) + print('---------------------------------') + print('Alive : %s' % (not node.killed)) + print('Dirty : %s' % node.dirty) + print('Size : %d' % node.size) + print('Parent : %s' % node.parent) + print('Children :', node.children) + print() def get_final_count(self): diff --git a/Oligotyping/utils/blast.py b/Oligotyping/utils/blast.py index 20dcd34..3cc47cb 100644 --- a/Oligotyping/utils/blast.py +++ b/Oligotyping/utils/blast.py @@ -12,7 +12,7 @@ import os import time import copy -import cStringIO +import io import Oligotyping.lib.fastalib as u import Oligotyping.lib.b6lib as b6lib @@ -97,7 +97,7 @@ def __str__(self): from Bio.Blast import NCBIWWW from Bio.Blast import NCBIXML except: - raise MissingModuleError, biopython_error_text + raise MissingModuleError(biopython_error_text) class LocalBLAST: @@ -140,11 +140,11 @@ def get_cmd_line_params_dict(self): def binary_check(self): if (not is_program_exist(self.binary)) or (not is_program_exist(self.makeblastdb)): - raise ModuleBinaryError, missing_binary_error_text + raise ModuleBinaryError(missing_binary_error_text) def version_check(self): - version_text = check_command_output('%(binary)s -version' % self.get_cmd_line_params_dict()) + version_text = check_command_output('%(binary)s -version' % self.get_cmd_line_params_dict()).decode("utf-8") # we expect to see an output like this: # # blastn: 2.2.26+ @@ -153,7 +153,7 @@ def version_check(self): major_blastn_version = version_text.strip().split()[1].split('.')[0] if major_blastn_version != '2': - raise ModuleVersionError, version_error_text + raise ModuleVersionError(version_error_text) def search_parallel(self, num_processes, num_reads_per_process = 2000, keep_parts = False): @@ -222,7 +222,7 @@ def get_results_dict(self, mismatches = None, gaps = None, min_identity = None, b6 = b6lib.B6Source(self.output) ids_with_hits = set() - while b6.next(): + while next(b6): if b6.entry.query_id == b6.entry.subject_id: continue @@ -292,8 +292,8 @@ def get_fancy_results_dict(self, max_per_query = 10, defline_white_space_mask = query_counts = {} fancy_results_dict = {} - while b6.next(): - if not query_counts.has_key(b6.entry.query_id): + while next(b6): + if b6.entry.query_id not in query_counts: query_counts[b6.entry.query_id] = 1 if query_counts[b6.entry.query_id] - 1 == max_per_query: @@ -301,7 +301,7 @@ def get_fancy_results_dict(self, max_per_query = 10, defline_white_space_mask = else: query_counts[b6.entry.query_id] += 1 - if not fancy_results_dict.has_key(b6.entry.query_id): + if b6.entry.query_id not in fancy_results_dict: fancy_results_dict[b6.entry.query_id] = [] query_seq = input_fasta.get_seq_by_read_id(b6.entry.query_id).replace('-', '') @@ -344,7 +344,7 @@ def search(self, sequence, output_file = None): if output_file: open(output_file, "w").write(result) - return cStringIO.StringIO(result) + return io.StringIO(result) def get_fancy_results_list(self, blast_results, num_results = 20): @@ -386,5 +386,5 @@ def get_fancy_results_list(self, blast_results, num_results = 20): try: u = LocalBLAST(None, None) except ModuleVersionError: - raise ModuleVersionError, version_error_text + raise ModuleVersionError(version_error_text) diff --git a/Oligotyping/utils/cosine_similarity.py b/Oligotyping/utils/cosine_similarity.py index e84c0cd..0965cef 100755 --- a/Oligotyping/utils/cosine_similarity.py +++ b/Oligotyping/utils/cosine_similarity.py @@ -56,7 +56,7 @@ def get_oligotype_sets_greedy(oligos, vectors, cosine_similarity_threshold, outp vector = vectors[oligo] shortest_distance_set_ID = None - shortest_distance = sys.maxint + shortest_distance = sys.maxsize for set_representative in set_representatives: distance = cosine_distance(set_representatives[set_representative], vector) @@ -93,16 +93,16 @@ def get_oligotype_sets(oligos, vectors, cosine_similarity_threshold, output_file distances = {} for i in range(0, len(oligos)): - if not distances.has_key(oligos[i]): + if oligos[i] not in distances: distances[oligos[i]] = {} for j in range(i, len(oligos)): - if not distances.has_key(oligos[j]): + if oligos[j] not in distances: distances[oligos[j]] = {} distances[oligos[i]][oligos[j]] = cosine_distance(vectors[oligos[i]], vectors[oligos[j]]) distances[oligos[j]][oligos[i]] = cosine_distance(vectors[oligos[i]], vectors[oligos[j]]) - ids = range(0, len(oligos)) + ids = list(range(0, len(oligos))) while 1: if not len(ids): break @@ -161,11 +161,11 @@ def get_samples(): samples = get_samples() - print '\n\t%d oligotypes split into %d partitions based on cosine similarity of %f. Here how they were distributed:\n'\ - % (len(oligos), len(partitions), args.cosine_similarity_threshold) + print('\n\t%d oligotypes split into %d partitions based on cosine similarity of %f. Here how they were distributed:\n'\ + % (len(oligos), len(partitions), args.cosine_similarity_threshold)) for partition in partitions: - print ' - %s\n' % (', '.join(partition)) + print(' - %s\n' % (', '.join(partition))) vis_oligotype_sets_distribution(partitions, vectors, samples, legend = True,\ project_title = 'Cosine Similarity Threshold %.4f' % args.cosine_similarity_threshold) diff --git a/Oligotyping/utils/html/for_decomposition.py b/Oligotyping/utils/html/for_decomposition.py index 37e4c18..4e3b4b3 100644 --- a/Oligotyping/utils/html/for_decomposition.py +++ b/Oligotyping/utils/html/for_decomposition.py @@ -13,13 +13,13 @@ import os import copy import shutil -import cPickle +import pickle from Oligotyping.utils.constants import pretty_names from Oligotyping.utils.utils import pretty_print from Oligotyping.utils.utils import get_samples_dict_from_environment_file from Oligotyping.utils.random_colors import get_list_of_colors -from error import HTMLError +from .error import HTMLError try: @@ -59,7 +59,7 @@ from django.template.loader import render_to_string from django.template.defaultfilters import register except ImportError: - raise HTMLError, 'You need to have Django module (http://djangoproject.com) installed on your system to generate HTML output.' + raise HTMLError('You need to have Django module (http://djangoproject.com) installed on your system to generate HTML output.') @register.filter(name='diffs') def diffs(l, index): @@ -99,7 +99,7 @@ def get_blast_hits(d, max_num = 8): ret_line = '

BLAST search results at a glance (%d of %d total hits are shown):' %\ (num_show, len(d)) - for i in d.keys()[0:num_show]: + for i in list(d.keys())[0:num_show]: if d[i]['identity'] == 100.0: ret_line += '

* %s (identity: %.2f%%, query coverage: %.2f%%)' \ % (d[i]['hit_def'].replace("'", '"'), @@ -138,7 +138,7 @@ def get_colors(number_of_colors): @register.filter(name='values') def values(d): - return d.values() + return list(d.values()) @register.filter(name='mod') def mod(value, arg): @@ -164,7 +164,7 @@ def sumvals(arg, clean = None): @register.filter(name='mklist') def mklist(arg): - return range(0, int(arg)) + return list(range(0, int(arg))) t = get_template('index_for_decomposition.tmpl') @@ -199,8 +199,8 @@ def copy_as(source, dest_name): def get_figures_dict(html_dict_prefix): html_dict_key = '%s_file_path' % html_dict_prefix - if html_dict.has_key(html_dict_key): - figures_dict = cPickle.load(open(html_dict[html_dict_key])) + if html_dict_key in html_dict: + figures_dict = pickle.load(open(html_dict[html_dict_key])) for _map in figures_dict: for _func in figures_dict[_map]: for _op in figures_dict[_map][_func]: @@ -219,12 +219,12 @@ def get_figures_dict(html_dict_prefix): html_dict['exclusive_figures_dict'] = get_figures_dict('exclusive_figures_dict') - if html_dict.has_key('node_representatives_file_path'): + if 'node_representatives_file_path' in html_dict: html_dict['node_representatives_file_path'] = copy_as(run_info_dict['node_representatives_file_path'], 'node-representatives.fa.txt') else: html_dict['node_representatives_file_path'] = None - if run_info_dict.has_key('blast_ref_db') and os.path.exists(run_info_dict['blast_ref_db']): + if 'blast_ref_db' in run_info_dict and os.path.exists(run_info_dict['blast_ref_db']): html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa') if run_info_dict['sample_mapping']: @@ -267,8 +267,8 @@ def get_figures_dict(html_dict_prefix): args = parser.parse_args() - run_info_dict = cPickle.load(open(args.run_info_dict_path)) + run_info_dict = pickle.load(open(args.run_info_dict_path)) index_page = generate_html_output(run_info_dict, args.output_directory) - print '\n\tHTML output is ready: "%s"\n' % index_page + print('\n\tHTML output is ready: "%s"\n' % index_page) diff --git a/Oligotyping/utils/html/for_oligotyping.py b/Oligotyping/utils/html/for_oligotyping.py index b6a72e6..c422720 100644 --- a/Oligotyping/utils/html/for_oligotyping.py +++ b/Oligotyping/utils/html/for_oligotyping.py @@ -14,14 +14,14 @@ import sys import copy import shutil -import cPickle +import pickle from Oligotyping.lib import fastalib as u from Oligotyping.utils.constants import pretty_names from Oligotyping.utils.utils import pretty_print from Oligotyping.utils.utils import get_samples_dict_from_environment_file from Oligotyping.utils.random_colors import get_list_of_colors -from error import HTMLError +from .error import HTMLError try: @@ -61,7 +61,7 @@ from django.template.loader import render_to_string from django.template.defaultfilters import register except ImportError: - raise HTMLError, 'You need to have Django module (http://djangoproject.com) installed on your system to generate HTML output.' + raise HTMLError('You need to have Django module (http://djangoproject.com) installed on your system to generate HTML output.') @register.filter(name='diffs') def diffs(l, index): @@ -150,7 +150,7 @@ def get_colors(number_of_colors): @register.filter(name='values') def values(d): - return d.values() + return list(d.values()) @register.filter(name='mod') def mod(value, arg): @@ -180,7 +180,7 @@ def sumvals(arg, clean = None): @register.filter(name='mklist') def mklist(arg): - return range(0, int(arg)) + return list(range(0, int(arg))) t = get_template('index_for_oligo.tmpl') @@ -254,8 +254,8 @@ def copy_as(source, dest_name, essential = True): def get_figures_dict(html_dict_prefix): html_dict_key = '%s_file_path' % html_dict_prefix - if html_dict.has_key(html_dict_key): - figures_dict = cPickle.load(open(html_dict[html_dict_key])) + if html_dict_key in html_dict: + figures_dict = pickle.load(open(html_dict[html_dict_key], 'rb')) for _map in figures_dict: for _func in figures_dict[_map]: for _op in figures_dict[_map][_func]: @@ -285,11 +285,11 @@ def get_figures_dict(html_dict_prefix): html_dict['oligotype_sets_file'] = copy_as(run_info_dict['oligotype_sets_file_path'], 'oligotype_sets.txt') html_dict['oligotype_sets'] = [l.strip().split('\t')[1].split(',') for l in open(run_info_dict['oligotype_sets_file_path'])] - if html_dict.has_key('representative_seqs_fasta_file_path'): + if 'representative_seqs_fasta_file_path' in html_dict: html_dict['representative_seqs_fasta_file_path'] = copy_as(run_info_dict['representative_seqs_fasta_file_path'], 'oligo-representatives.fa.txt') else: html_dict['representative_seqs_fasta_file_path'] = None - if run_info_dict.has_key('blast_ref_db') and os.path.exists(run_info_dict['blast_ref_db']): + if 'blast_ref_db' in run_info_dict and os.path.exists(run_info_dict['blast_ref_db']): html_dict['blast_ref_db_path'] = copy_as(run_info_dict['blast_ref_db'], 'reference_db.fa') html_dict['entropy_components'] = [int(x) for x in html_dict['bases_of_interest_locs'].split(',')] html_dict['samples_dict'] = get_samples_dict_from_environment_file(run_info_dict['environment_file_path']) @@ -314,13 +314,13 @@ def get_figures_dict(html_dict_prefix): # get oligo frequencies html_dict['frequency'] = {} for oligo in html_dict['oligos']: - html_dict['frequency'][oligo] = pretty_print(sum([d[oligo] for d in html_dict['samples_dict'].values() if d.has_key(oligo)])) + html_dict['frequency'][oligo] = pretty_print(sum([d[oligo] for d in list(html_dict['samples_dict'].values()) if oligo in d])) # get purity score html_dict['purity_score'] = run_info_dict['final_purity_score_dict'] # get total purity score html_dict['total_purity_score'] = run_info_dict['total_purity_score_dict'] # get unique sequence dict (which will contain the most frequent unique sequence for given oligotype) - if html_dict.has_key('output_directory_for_reps'): + if 'output_directory_for_reps' in html_dict: html_dict['rep_oligo_seqs_clean_dict'], html_dict['rep_oligo_seqs_fancy_dict'] = get_unique_sequences_dict(html_dict) html_dict['oligo_reps_dict'] = get_oligo_reps_dict(html_dict, html_output_directory) html_dict['component_reference'] = ''.join(['|' % i for i in range(0, html_dict['alignment_length'])]) @@ -331,7 +331,7 @@ def get_figures_dict(html_dict_prefix): # FIXME: code below is very inefficient and causes a huge # memory issue. fix it by not using deepcopy. # generate individual oligotype pages - if html_dict.has_key('output_directory_for_reps'): + if 'output_directory_for_reps' in html_dict: for i in range(0, len(html_dict['oligos'])): oligo = html_dict['oligos'][i] tmp_dict = copy.deepcopy(html_dict) @@ -350,14 +350,14 @@ def get_figures_dict(html_dict_prefix): rendered = render_to_string('single_oligo.tmpl', tmp_dict) - open(oligo_page, 'w').write(rendered.encode("utf-8")) + open(oligo_page, 'wb').write(rendered.encode("utf-8")) # generate index index_page = os.path.join(html_output_directory, 'index.html') rendered = render_to_string('index_for_oligo.tmpl', html_dict) - open(index_page, 'w').write(rendered.encode("utf-8")) + open(index_page, 'wb').write(rendered.encode("utf-8")) return index_page @@ -370,21 +370,21 @@ def get_colors_dict(colors_file_path): def get_oligos_list(oligos_file_path): oligos_list = [] fasta = u.SequenceSource(oligos_file_path) - while fasta.next(): + while next(fasta): oligos_list.append(fasta.seq) return oligos_list def get_oligo_distribution_dict(oligo, html_dict): rep_dir = html_dict['output_directory_for_reps'] - oligo_distribution_dict = cPickle.load(open(os.path.join(rep_dir, '%.5d_'\ - % html_dict['oligos'].index(oligo) + oligo + '_unique_distribution.cPickle'))) + oligo_distribution_dict = pickle.load(open(os.path.join(rep_dir, '%.5d_'\ + % html_dict['oligos'].index(oligo) + oligo + '_unique_distribution.cPickle'), 'rb')) ret_dict = {} for sample in oligo_distribution_dict: ret_dict[sample] = [0] * 20 for i in range(0, 20): - if oligo_distribution_dict[sample].has_key(i + 1): + if i + 1 in oligo_distribution_dict[sample]: ret_dict[sample][i] = oligo_distribution_dict[sample][i + 1] return ret_dict @@ -416,7 +416,7 @@ def get_oligo_reps_dict(html_dict, html_output_directory): oligo_reps_dict['fancy_seqs'][oligo] = [] oligo_reps_dict['clear_seqs'][oligo] = [] oligo_reps_dict['frequency'][oligo] = [] - while uniques.next() and uniques.pos <= 20: + while next(uniques) and uniques.pos <= 20: oligo_reps_dict['clear_seqs'][oligo].append(uniques.seq) oligo_reps_dict['fancy_seqs'][oligo].append(get_decorated_sequence(uniques.seq, html_dict['entropy_components'])) oligo_reps_dict['frequency'][oligo].append(pretty_print(uniques.id.split('|')[1].split(':')[1])) @@ -426,13 +426,13 @@ def get_oligo_reps_dict(html_dict, html_output_directory): for column, entropy in [x.strip().split('\t') for x in open(entropy_file_path)]: entropy_values_per_column[int(column)] = float(entropy) - color_per_column = cPickle.load(open(alignment_base_path + '_unique_color_per_column.cPickle')) + color_per_column = pickle.load(open(alignment_base_path + '_unique_color_per_column.cPickle', 'rb')) oligo_reps_dict['component_references'][oligo] = ''.join(['|' % (color_per_column[i], i, entropy_values_per_column[i]) for i in range(0, html_dict['alignment_length'])]) blast_results_dict = alignment_base_path + '_unique_BLAST.cPickle' if os.path.exists(blast_results_dict): html_dict['blast_results_found'] = True - oligo_reps_dict['blast_results'][oligo] = cPickle.load(open(blast_results_dict)) + oligo_reps_dict['blast_results'][oligo] = pickle.load(open(blast_results_dict)) else: oligo_reps_dict['blast_results'][oligo] = None @@ -440,7 +440,7 @@ def get_oligo_reps_dict(html_dict, html_output_directory): def get_alignment_length(alignment_path): alignment = u.SequenceSource(alignment_path) - alignment.next() + next(alignment) return len(alignment.seq) def get_unique_sequences_dict(html_dict): @@ -452,7 +452,7 @@ def get_unique_sequences_dict(html_dict): for i in range(0, len(oligos)): unique_file_path = os.path.join(rep_dir, '%.5d_' % i + oligos[i] + '_unique') f = u.SequenceSource(unique_file_path) - f.next() + next(f) rep_oligo_seqs_clean_dict[oligos[i]] = f.seq rep_oligo_seqs_fancy_dict[oligos[i]] = get_decorated_sequence(f.seq, html_dict['entropy_components']) f.close() @@ -460,7 +460,7 @@ def get_unique_sequences_dict(html_dict): def get_decorated_sequence(seq, components): """returns sequence with html decorations""" - return ''.join(map(lambda j: '%s' % seq[j] if j in components else seq[j], [j for j in range(len(seq))])) + return ''.join(['%s' % seq[j] if j in components else seq[j] for j in [j for j in range(len(seq))]]) if __name__ == '__main__': import argparse @@ -475,8 +475,8 @@ def get_decorated_sequence(seq, components): args = parser.parse_args() - run_info_dict = cPickle.load(open(args.run_info_dict_path)) + run_info_dict = pickle.load(open(args.run_info_dict_path)) index_page = generate_html_output(run_info_dict, args.output_directory, args.entropy_figure) - print '\n\tHTML output is ready: "%s"\n' % index_page + print('\n\tHTML output is ready: "%s"\n' % index_page) diff --git a/Oligotyping/utils/random_colors.py b/Oligotyping/utils/random_colors.py index d7ee65f..287f8c4 100755 --- a/Oligotyping/utils/random_colors.py +++ b/Oligotyping/utils/random_colors.py @@ -82,4 +82,4 @@ def get_color_shade_dict_for_list_of_values(values, colormap = 'OrRd'): if not args.output_file: for oligo in colors_dict: - print '%s: %s' % (oligo, colors_dict[oligo]) + print('%s: %s' % (oligo, colors_dict[oligo])) diff --git a/Oligotyping/utils/utils.py b/Oligotyping/utils/utils.py index b62cefe..f2cadfe 100644 --- a/Oligotyping/utils/utils.py +++ b/Oligotyping/utils/utils.py @@ -20,7 +20,7 @@ import random import string import termios -import cPickle +import pickle import textwrap import tempfile import subprocess @@ -91,7 +91,7 @@ def get_unit_counts_and_percents(units, samples_dict): counts = [] percents = [] for unit in units: - if samples_dict[sample].has_key(unit): + if unit in samples_dict[sample]: counts.append(samples_dict[sample][unit]) percents.append(samples_dict[sample][unit] * 100.0 / sample_totals[sample]) else: @@ -105,7 +105,7 @@ def get_unit_counts_and_percents(units, samples_dict): def import_error(e): - print ''' + print(''' Sorry. It seems you are missing a module that is required by the oligotyping pipeline. Here is the original import error: @@ -117,7 +117,7 @@ def import_error(e): https://meren.github.io/2012/05/11/oligotyping-pipeline-explained/ - \n''' % e + \n''' % e) sys.exit() @@ -174,7 +174,7 @@ def generate_MATRIX_files_for_units_across_samples(units, samples, MN_fp, SN_fp, def get_num_nt_diff_between_two_aligned_sequences(seq1, seq2): if len(seq1) != len(seq2): - raise LibError, "Two sequences are not equal in length:\n\t%s\n\t%s" % (seq1, seq2) + raise LibError("Two sequences are not equal in length:\n\t%s\n\t%s" % (seq1, seq2)) return len(["diff" for i in range(0, len(seq1)) if seq1[i] != seq2[i]]) @@ -209,7 +209,7 @@ def homopolymer_indel_exists(seq1, seq2): return False isHP = lambda x: len(set(x)) == 1 - isHPindel = lambda (s, e): seq1[s:e] == seq2[s:e] and isHP(seq1[s:e]) == 1 and seq2[gap_index] == seq2[s] + isHPindel = lambda s_e: seq1[s_e[0]:s_e[1]] == seq2[s_e[0]:s_e[1]] and isHP(seq1[s_e[0]:s_e[1]]) == 1 and seq2[gap_index] == seq2[s_e[0]] def DownStream(sequence): i = 3 @@ -279,7 +279,7 @@ def mask_defline_whitespaces_in_FASTA(fasta_file_path, defline_white_space_mask fasta = u.SequenceSource(fasta_file_path) output = u.FastaOutput(fasta_file_path + '.tmp') - while fasta.next(): + while next(fasta): output.write_id(fasta.id.replace(' ', defline_white_space_mask)) output.write_seq(fasta.seq, split = False) @@ -289,13 +289,13 @@ def unique_and_store_alignment(alignment_path, output_path): output = u.FastaOutput(output_path) alignment = u.SequenceSource(alignment_path, unique = True) - alignment.next() + next(alignment) most_abundant_unique_read = alignment.seq alignment.reset() read_ids = [] unique_read_counts = [] - while alignment.next(): + while next(alignment): read_ids += alignment.ids unique_read_counts.append(len(alignment.ids)) output.store(alignment, split = False) @@ -313,7 +313,7 @@ def generate_TAB_delim_file_from_dict(data_dict, output_file_path, order, first_ for item in data_dict: line = [item] for column in order: - if not data_dict[item].has_key(column): + if column not in data_dict[item]: line.append('') else: line.append(str(data_dict[item][column])) @@ -336,19 +336,19 @@ def get_unique_sequences_from_FASTA(alignment, limit = 10): fasta = u.SequenceSource(alignment, unique = True, lazy_init = False) - while fasta.next() and fasta.pos < limit: + while next(fasta) and fasta.pos < limit: unique_sequences.append((fasta.seq, len(fasta.ids), len(fasta.ids) / float(fasta.total_seq))) return unique_sequences def get_oligos_sorted_by_abundance(samples_dict, oligos = None, min_abundance = 0): - samples = samples_dict.keys() + samples = list(samples_dict.keys()) samples.sort() if oligos == None: oligos = [] - map(lambda o: oligos.extend(o), [v.keys() for v in samples_dict.values()]) + list(map(lambda o: oligos.extend(o), [list(v.keys()) for v in list(samples_dict.values())])) oligos = list(set(oligos)) abundant_oligos = [] @@ -358,7 +358,7 @@ def get_oligos_sorted_by_abundance(samples_dict, oligos = None, min_abundance = for sample in samples: sum_sample = sum(samples_dict[sample].values()) - if samples_dict[sample].has_key(oligo): + if oligo in samples_dict[sample]: percent_abundances.append((samples_dict[sample][oligo] * 100.0 / sum_sample,\ samples_dict[sample][oligo], sum_sample, sample)) @@ -394,8 +394,8 @@ def generate_gexf_network_file(units, samples_dict, unit_percents, output_file, output = open(output_file, 'w') samples = sorted(samples_dict.keys()) - sample_mapping_categories = sorted([k for k in sample_mapping_dict.keys() if k != 'colors']) if sample_mapping_dict else None - unit_mapping_categories = sorted([k for k in unit_mapping_dict.keys() if k not in ['colors', 'labels']]) if unit_mapping_dict else None + sample_mapping_categories = sorted([k for k in list(sample_mapping_dict.keys()) if k != 'colors']) if sample_mapping_dict else None + unit_mapping_categories = sorted([k for k in list(unit_mapping_dict.keys()) if k not in ['colors', 'labels']]) if unit_mapping_dict else None output.write('''\n''') output.write('''\n''') @@ -428,7 +428,7 @@ def generate_gexf_network_file(units, samples_dict, unit_percents, output_file, else: output.write(''' \n''' % (sample, sample)) output.write(''' \n''' % sample_size) - if sample_mapping_dict and sample_mapping_dict.has_key('colors'): + if sample_mapping_dict and 'colors' in sample_mapping_dict: output.write(''' \n''' %\ HTMLColorToRGB(sample_mapping_dict['colors'][sample], scaled = False)) @@ -445,7 +445,7 @@ def generate_gexf_network_file(units, samples_dict, unit_percents, output_file, if skip_unit_labels: output.write(''' \n''' % (unit)) else: - if unit_mapping_dict and unit_mapping_dict.has_key('labels'): + if unit_mapping_dict and 'labels' in unit_mapping_dict: output.write(''' \n''' % (unit, unit_mapping_dict['labels'][unit])) else: output.write(''' \n''' % (unit)) @@ -492,7 +492,7 @@ def generate_gexf_network_file_for_nodes_topology(nodes_dict, output_file, attri output = open(output_file, 'w') nodes = sorted(nodes_dict.keys()) - nodes_mapping_categories = sorted([k for k in nodes_dict[nodes[0]].keys() if k not in ['children', 'parent']]) if nodes_dict else None + nodes_mapping_categories = sorted([k for k in list(nodes_dict[nodes[0]].keys()) if k not in ['children', 'parent']]) if nodes_dict else None output.write('''\n''') output.write('''\n''') @@ -507,7 +507,7 @@ def generate_gexf_network_file_for_nodes_topology(nodes_dict, output_file, attri output.write('''\n''') for i in range(0, len(nodes_mapping_categories)): category = nodes_mapping_categories[i] - attr_type = attribute_types_dict[category] if attribute_types_dict.has_key(category) else "string" + attr_type = attribute_types_dict[category] if category in attribute_types_dict else "string" output.write(''' \n''' % (i, category, attr_type)) output.write('''\n\n''') @@ -564,12 +564,12 @@ def get_qual_stats_dict(quals_dict, output_file_path = None, verbose = True): progress.new('Summary of quality scores per column is being computed') qual_stats_dict = {} - alignment_length = len(quals_dict[quals_dict.keys()[0]]) + alignment_length = len(quals_dict[list(quals_dict.keys())[0]]) for pos in range(0, alignment_length): progress.update('Position: %d of %d' % (pos + 1, alignment_length)) qual_stats_dict[pos] = {} - quals_for_pos = [q[pos] for q in quals_dict.values() if q[pos]] + quals_for_pos = [q[pos] for q in list(quals_dict.values()) if q[pos]] if not quals_for_pos: qual_stats_dict[pos] = None continue @@ -580,7 +580,7 @@ def get_qual_stats_dict(quals_dict, output_file_path = None, verbose = True): qual_stats_dict[pos]['count'] = len(quals_for_pos) if output_file_path: - cPickle.dump(quals_dict, open(output_file_path, 'w')) + pickle.dump(quals_dict, open(output_file_path, 'w')) progress.end() return qual_stats_dict @@ -602,12 +602,12 @@ def get_quals_dict(quals_file, alignment_file, output_file_path = None, verbose alignment = u.SequenceSource(alignment_file) qual = u.QualSource(quals_file) - while qual.next(): + while next(qual): if qual.pos % 1000 == 0: progress.update('Step 1 of 2 :: Quality scores read: %s' % (pretty_print(qual.pos))) quals_dict[qual.id] = qual.quals_int - while alignment.next(): + while next(alignment): if alignment.pos % 1000 == 0: progress.update('Step 2 of 2 :: Alignments matched: %s' % (pretty_print(alignment.pos))) sys.stderr.flush() @@ -625,7 +625,7 @@ def get_quals_dict(quals_file, alignment_file, output_file_path = None, verbose progress.end() if output_file_path: - cPickle.dump(quals_aligned_dict, open(output_file_path, 'w')) + pickle.dump(quals_aligned_dict, open(output_file_path, 'w')) return quals_aligned_dict @@ -658,7 +658,7 @@ def process_command_line_args_for_quality_files(args, _return = 'qual_stats_dict return qual_stats_dict elif args.qual_scores_dict: - quals_dict = cPickle.load(open(args.qual_scores_dict)) + quals_dict = pickle.load(open(args.qual_scores_dict)) if _return == 'quals_dict': return quals_dict @@ -670,7 +670,7 @@ def process_command_line_args_for_quality_files(args, _return = 'qual_stats_dict return qual_stats_dict elif args.qual_stats_dict: - qual_stats_dict = cPickle.load(open(args.qual_stats_dict)) + qual_stats_dict = pickle.load(open(args.qual_stats_dict)) if _return == 'qual_stats_dict': return qual_stats_dict @@ -685,7 +685,7 @@ def get_filtered_samples_dict(units, samples, samples_dict): for sample in samples: filtered_samples_dict[sample] = {} for unit in units: - if samples_dict[sample].has_key(unit): + if unit in samples_dict[sample]: filtered_samples_dict[sample][unit] = samples_dict[sample][unit] return filtered_samples_dict @@ -694,8 +694,8 @@ def get_filtered_samples_dict(units, samples, samples_dict): def get_samples_dict_from_environment_file(environment_file_path): samples_dict = {} for oligo, sample, count in [l.strip().split('\t') for l in open(environment_file_path).readlines()]: - if samples_dict.has_key(sample): - if samples_dict[sample].has_key(oligo): + if sample in samples_dict: + if oligo in samples_dict[sample]: samples_dict[sample][oligo] += int(count) else: samples_dict[sample][oligo] = int(count) @@ -726,7 +726,7 @@ def pretty_print(n): def same_but_gaps(sequence1, sequence2): if len(sequence1) != len(sequence2): - raise ValueError, "Alignments have different lengths" + raise ValueError("Alignments have different lengths") for i in range(0, len(sequence1)): if sequence1[i] == '-' or sequence2[i] == '-': @@ -738,7 +738,7 @@ def same_but_gaps(sequence1, sequence2): def trim_uninformative_gaps_from_sequences(sequence1, sequence2): if len(sequence1) != len(sequence2): - raise ValueError, "Alignments have different lengths" + raise ValueError("Alignments have different lengths") columns_to_discard = [] @@ -789,12 +789,12 @@ def is_program_exist(program): def trim_uninformative_columns_from_alignment(input_file_path): input_fasta = u.SequenceSource(input_file_path, lazy_init = False) - input_fasta.next() + next(input_fasta) fasta_read_len = len(input_fasta.seq) - invalid_columns = range(0, fasta_read_len) + invalid_columns = list(range(0, fasta_read_len)) input_fasta.reset() - while input_fasta.next(): + while next(input_fasta): cols_not_invalid = [] for i in invalid_columns: if input_fasta.seq[i] != '-': @@ -811,7 +811,7 @@ def trim_uninformative_columns_from_alignment(input_file_path): temp_file = u.FastaOutput(temp_file_path) - while input_fasta.next(): + while next(input_fasta): new_seq = '' for i in columns_to_keep: new_seq += input_fasta.seq[i] @@ -871,7 +871,7 @@ def HTMLColorToRGB(colorstring, scaled = True): colorstring = colorstring.strip() if colorstring[0] == '#': colorstring = colorstring[1:] if len(colorstring) != 6: - raise ValueError, "input #%s is not in #RRGGBB format" % colorstring + raise ValueError("input #%s is not in #RRGGBB format" % colorstring) r, g, b = colorstring[:2], colorstring[2:4], colorstring[4:] r, g, b = [int(n, 16) for n in (r, g, b)] @@ -884,9 +884,9 @@ def HTMLColorToRGB(colorstring, scaled = True): def run_command(cmdline): try: if subprocess.call(cmdline, shell = True) < 0: - raise ConfigError, "command was terminated: '%s'" % (cmdline) - except OSError, e: - raise ConfigError, "command was failed for the following reason: '%s' ('%s')" % (e, cmdline) + raise ConfigError("command was terminated: '%s'" % (cmdline)) + except OSError as e: + raise ConfigError("command was failed for the following reason: '%s' ('%s')" % (e, cmdline)) def check_command_output(cmdline): @@ -916,7 +916,7 @@ def check_input_alignment(alignment_path, sample_name_separator, progress_func = samples = set([]) previous_alignment_length = None - while alignment.next(): + while next(alignment): if progress_func and alignment.pos % 5000 == 0: progress_func.update('Reading input; %s, %s samples found'\ % (pretty_print(alignment.pos), @@ -929,7 +929,7 @@ def check_input_alignment(alignment_path, sample_name_separator, progress_func = # check the alignment lengths along the way: if previous_alignment_length: if previous_alignment_length != len(alignment.seq): - raise ConfigError, "Not all reads have the same length." + raise ConfigError("Not all reads have the same length.") previous_alignment_length = len(alignment.seq) @@ -984,16 +984,16 @@ def mapping_file_simple_check(mapping_file_path, samples_expected = None): header_line = mapping_file.readline() if header_line.find('\t') < 0: - raise ConfigError, "Mapping file doesn't seem to be a TAB delimited file" + raise ConfigError("Mapping file doesn't seem to be a TAB delimited file") header_fields = header_line.strip('\n').split('\t') if len(header_fields) < 2: - raise ConfigError, "No categories were found in the mapping file" + raise ConfigError("No categories were found in the mapping file") if header_fields[0] != 'samples': - raise ConfigError, "First column of the first row of mapping file must be 'samples'" + raise ConfigError("First column of the first row of mapping file must be 'samples'") if len(header_fields) != len(set(header_fields)): - raise ConfigError, "In the mapping file, every category must be unique" + raise ConfigError("In the mapping file, every category must be unique") samples_found = [] num_entries = 0 @@ -1005,13 +1005,13 @@ def mapping_file_simple_check(mapping_file_path, samples_expected = None): samples_found.append(fields[0]) if len(fields) != len(header_fields): - raise ConfigError, "Not every line in the mapping file has the same number of fields " +\ - "(line %d has %d columns)" % (num_entries + 1, len(fields)) + raise ConfigError("Not every line in the mapping file has the same number of fields " +\ + "(line %d has %d columns)" % (num_entries + 1, len(fields))) for field in fields[1:]: if field == "": continue if field[0] in '0123456789': - raise ConfigError, "Categories in the mapping file cannot start with digits: '%s'" % field + raise ConfigError("Categories in the mapping file cannot start with digits: '%s'" % field) if samples_expected: samples_missing = [] @@ -1020,11 +1020,11 @@ def mapping_file_simple_check(mapping_file_path, samples_expected = None): samples_missing.append(sample) if samples_missing: - raise ConfigError, "Mapping file seems to be missing %d sample(s) that appear in the FASTA file:\n\n- %s\n\n"\ - % (len(samples_missing), ', '.join(samples_missing)) + raise ConfigError("Mapping file seems to be missing %d sample(s) that appear in the FASTA file:\n\n- %s\n\n"\ + % (len(samples_missing), ', '.join(samples_missing))) if num_entries < 3: - raise ConfigError, "Mapping file seems to have less than three samples" + raise ConfigError("Mapping file seems to have less than three samples") mapping_file.close() return True @@ -1095,7 +1095,7 @@ def get_terminal_width(self): def new(self, pid): if self.pid: - raise LibError, "Progress.new() can't be called before ending the previous one (Existing: '%s', Competing: '%s')." % (self.pid, pid) + raise LibError("Progress.new() can't be called before ending the previous one (Existing: '%s', Competing: '%s')." % (self.pid, pid)) if not self.verbose: return @@ -1153,7 +1153,7 @@ def end(self): def get_pretty_name(key): - if pretty_names.has_key(key): + if key in pretty_names: return pretty_names[key] else: return key @@ -1250,7 +1250,7 @@ def warning(self, message, header='WARNING', lc = 'red', raw = False): def store_info_dict(self, destination): - cPickle.dump(self.info_dict, open(destination, 'w')) + pickle.dump(self.info_dict, open(destination, 'wb')) def quit(self): @@ -1261,7 +1261,7 @@ def get_read_objects_from_file(input_file_path): input_fasta = u.SequenceSource(input_file_path, unique = True) read_objects = [] - while input_fasta.next(): + while next(input_fasta): read_objects.append(UniqueFASTAEntry(input_fasta.seq, input_fasta.ids)) input_fasta.close() @@ -1275,12 +1275,12 @@ def split_fasta_file(input_file_path, dest_dir, prefix = 'part', num_reads_per_f next_part = 1 part_obj = None - while input_fasta.next(): + while next(input_fasta): if (input_fasta.pos - 1) % num_reads_per_file == 0: if part_obj: part_obj.close() - rand_bit = ''.join([random.choice(string.ascii_letters + string.digits) for n in xrange(8)]) + rand_bit = ''.join([random.choice(string.ascii_letters + string.digits) for n in range(8)]) file_path = os.path.join(dest_dir, '%s-%d-%s.fa' % (prefix, next_part, rand_bit)) parts.append(file_path) next_part += 1 diff --git a/Oligotyping/visualization/decomposition_topology.py b/Oligotyping/visualization/decomposition_topology.py index 4525cec..66fb41e 100644 --- a/Oligotyping/visualization/decomposition_topology.py +++ b/Oligotyping/visualization/decomposition_topology.py @@ -10,7 +10,7 @@ # # Please read the COPYING file. -import cPickle +import pickle import networkx as nx import matplotlib.pyplot as plt import matplotlib.image as mpimg @@ -23,7 +23,7 @@ def topology_graph(topology_dict_path, match_levels = False): G = nx.MultiDiGraph() - topology = cPickle.load(open(topology_dict_path)) + topology = pickle.load(open(topology_dict_path)) nodes = {} levels = [] @@ -40,7 +40,7 @@ def topology_graph(topology_dict_path, match_levels = False): parent_nodes.append(node_id) nodes[node_id] = {'size': node.size, 'parent': node.parent, 'level': node.level, - 'children': [child_node_id for child_node_id in node.children if topology.has_key(child_node_id) and not topology[child_node_id].killed], 'type': 'node'} + 'children': [child_node_id for child_node_id in node.children if child_node_id in topology and not topology[child_node_id].killed], 'type': 'node'} if node.freq_curve_img_path: nodes[node_id]['freq_curve_img_path'] = node.freq_curve_img_path levels.append(int(node.level)) @@ -52,7 +52,7 @@ def topology_graph(topology_dict_path, match_levels = False): for node_id in nodes: node = nodes[node_id] if node['level'] < max_level and not node['children']: - levels_to_cover = range(node['level'] + 1, max_level + 1) + levels_to_cover = list(range(node['level'] + 1, max_level + 1)) for level in levels_to_cover: if levels_to_cover.index(level) == 0: new_nodes[node_id + ':l%d' % level] = {'size': node['size'], 'parent': node_id, @@ -76,17 +76,17 @@ def topology_graph(topology_dict_path, match_levels = False): else: break G.add_edge(node_id, node['parent'], size = int(node['size']), label = label,\ - image = node['freq_curve_img_path'] if node.has_key('freq_curve_img_path') else None,\ + image = node['freq_curve_img_path'] if 'freq_curve_img_path' in node else None,\ final_node = True if not node['children'] else False) else: G.add_edge(node_id, node['parent'], size = int(node['size']), label = '',\ - image = node['freq_curve_img_path'] if node.has_key('freq_curve_img_path') else None,\ + image = node['freq_curve_img_path'] if 'freq_curve_img_path' in node else None,\ final_node = True if not node['children'] else False) for node_id in nodes['root']['children']: node = nodes['root'] G.add_edge('root', node_id, size = int(nodes['root']['size']), label = 'root',\ - image = node['freq_curve_img_path'] if node.has_key('freq_curve_img_path') else None,\ + image = node['freq_curve_img_path'] if 'freq_curve_img_path' in node else None,\ final_node = False) return (G, nodes) @@ -98,7 +98,7 @@ def topology(topology_dict_path, output_file = None, title = None): number_of_edges = G.number_of_edges() number_of_nodes = G.number_of_nodes() - print("Loaded %d edges and %d nodes." % (number_of_edges, number_of_nodes)) + print(("Loaded %d edges and %d nodes." % (number_of_edges, number_of_nodes))) plt.figure(figsize=(24, 16)) @@ -133,8 +133,8 @@ def topology(topology_dict_path, output_file = None, title = None): nx.draw_networkx_labels(G, pos, font_size=8, font_weight = 'bold', labels = dict([(u, '%s\n(%s)' % (d['label'], pretty_print(d['size']))) for u, v, d in G.edges(data=True)])) # adjust the plot limits - xmax = 1.02 * max(x for x, y in pos.values()) - ymax = 1.02 * max(y for x, y in pos.values()) + xmax = 1.02 * max(x for x, y in list(pos.values())) + ymax = 1.02 * max(y for x, y in list(pos.values())) plt.xlim(0, xmax) plt.ylim(0, ymax) plt.xticks([]) @@ -149,15 +149,15 @@ def topology(topology_dict_path, output_file = None, title = None): plt.setp(ax, frame_on=False) #plt.axis('off') - if nodes_dict['root'].has_key('freq_curve_img_path'): + if 'freq_curve_img_path' in nodes_dict['root']: AX=plt.gca() f=plt.gcf() - for node in nodes_dict.keys(): + for node in list(nodes_dict.keys()): (x, y) = pos[node] xt,yt = AX.transData.transform((x, y)) # figure coordinates xf, yf = f.transFigure.inverted().transform((xt, yt)) # axes coordinates - print xf, yf + print(xf, yf) if node == 'root': imsize = 0.04 else: diff --git a/Oligotyping/visualization/entropy_distribution_bar.py b/Oligotyping/visualization/entropy_distribution_bar.py index 3210309..562db57 100644 --- a/Oligotyping/visualization/entropy_distribution_bar.py +++ b/Oligotyping/visualization/entropy_distribution_bar.py @@ -49,7 +49,7 @@ def entropy_distribution_bar(alignment, entropy_values, output_file, quick = Fal colors_dict = NUCL_COLORS - missing_chars = [char for char in chars if char not in NUCL_COLORS.keys()] + missing_chars = [char for char in chars if char not in list(NUCL_COLORS.keys())] if missing_chars: colors_for_missing_chars = get_list_of_colors(len(missing_chars), colormap="RdYlGn") @@ -81,7 +81,7 @@ def entropy_distribution_bar(alignment, entropy_values, output_file, quick = Fal fontsize = 5, color = colors_dict[unique_sequence[i]]) percent = int(round(frequency * len(unique_sequence))) or 1 - plt.fill_between(range(0, percent), (y + 1.15) / 100.0, (y - 0.85) / 100.0, color="green", alpha = 0.2) + plt.fill_between(list(range(0, percent)), (y + 1.15) / 100.0, (y - 0.85) / 100.0, color="green", alpha = 0.2) plt.text(percent + 0.8, (y - 1.2) / 100.0, count, fontsize = 5, color = 'gray') current += 1 diff --git a/Oligotyping/visualization/frequency_curve_and_entropy.py b/Oligotyping/visualization/frequency_curve_and_entropy.py index 8a2902f..b667ecb 100755 --- a/Oligotyping/visualization/frequency_curve_and_entropy.py +++ b/Oligotyping/visualization/frequency_curve_and_entropy.py @@ -24,11 +24,11 @@ def vis_freq_curve(fasta_file_path, output_file = None, x_limit = 20, display = fasta = u.SequenceSource(fasta_file_path) frequency_list = [] - while fasta.next(): + while next(fasta): try: frequency_list.append(freq_from_defline(fasta.id)) except: - print 'frequency info can not be read from defline.' + print('frequency info can not be read from defline.') sys.exit() frequency_list_to_plot = frequency_list[0:x_limit] + [0] * (x_limit - len(frequency_list) \ @@ -72,7 +72,7 @@ def vis_freq_curve(fasta_file_path, output_file = None, x_limit = 20, display = plt.grid(True) plt.rcParams.update({'axes.linewidth' : 0.9}) plt.rc('grid', color='0.50', linestyle='-', linewidth=0.1) - plt.xticks( range(0, len(entropy_values), 5), rotation=90, size = 'x-small') + plt.xticks( list(range(0, len(entropy_values), 5)), rotation=90, size = 'x-small') plt.plot(frequency_list_to_plot, lw = 3, c = 'black') @@ -84,7 +84,7 @@ def vis_freq_curve(fasta_file_path, output_file = None, x_limit = 20, display = plt.title('Frequency Distribution of Unique Sequences in %s' % os.path.basename(fasta_file_path)) plt.ylim(ymin = -max(frequency_list_to_plot) * 0.05, ymax = max(frequency_list_to_plot) * 1.05) plt.xlim(xmin = -0.05, xmax = x_limit - 1) - plt.xticks(range(0, x_limit), [str(i) for i in range(1, x_limit + 1)], rotation=90, size='small') + plt.xticks(list(range(0, x_limit)), [str(i) for i in range(1, x_limit + 1)], rotation=90, size='small') plt.subplot(2, 1, 2) @@ -103,7 +103,7 @@ def vis_freq_curve(fasta_file_path, output_file = None, x_limit = 20, display = plt.bar(ind, entropy_values, color = 'black', lw = 0.5) plt.xlim([0, len(entropy_values)]) plt.ylim([0, y_maximum]) - plt.xticks( range(0, len(entropy_values), 5), rotation=90, size = 'x-small') + plt.xticks( list(range(0, len(entropy_values), 5)), rotation=90, size = 'x-small') plt.xlabel('Position in the Alignment', size = 'x-large') plt.ylabel('Shannon Entropy', size = 'x-large') diff --git a/Oligotyping/visualization/oligotype_distribution_across_samples.py b/Oligotyping/visualization/oligotype_distribution_across_samples.py index 9e383b0..074bd95 100755 --- a/Oligotyping/visualization/oligotype_distribution_across_samples.py +++ b/Oligotyping/visualization/oligotype_distribution_across_samples.py @@ -19,7 +19,7 @@ def oligotype_distribution_across_samples(samples_dict, colors_dict, output_file = None, legend = False, project_title = None, display = True, oligos = None): - samples = samples_dict.keys() + samples = list(samples_dict.keys()) samples.sort() if oligos == None: @@ -38,7 +38,7 @@ def oligotype_distribution_across_samples(samples_dict, colors_dict, output_file for oligo in oligos: percents = [] for sample in samples: - if samples_dict[sample].has_key(oligo): + if oligo in samples_dict[sample]: percents.append(samples_dict[sample][oligo] * 100.0 / sum(samples_dict[sample].values())) else: percents.append(0.0) @@ -163,8 +163,8 @@ def oligotype_distribution_across_samples(samples_dict, colors_dict, output_file samples_dict = {} for oligotype, sample, count in [line.strip().split('\t') for line in open(args.environment_file).readlines()]: - if samples_dict.has_key(sample): - if samples_dict[sample].has_key(oligotype): + if sample in samples_dict: + if oligotype in samples_dict[sample]: samples_dict[sample][oligotype] += int(count) else: samples_dict[sample][oligotype] = int(count) diff --git a/Oligotyping/visualization/oligotype_distribution_stack_bar.py b/Oligotyping/visualization/oligotype_distribution_stack_bar.py index 7ecb014..d1abb99 100755 --- a/Oligotyping/visualization/oligotype_distribution_stack_bar.py +++ b/Oligotyping/visualization/oligotype_distribution_stack_bar.py @@ -22,7 +22,7 @@ def oligotype_distribution_stack_bar(samples_dict, colors_dict, output_file = None, legend = False,\ colors_export = None, project_title = None, display = True, oligos = None): - samples = samples_dict.keys() + samples = list(samples_dict.keys()) samples.sort() if oligos == None: @@ -37,7 +37,7 @@ def oligotype_distribution_stack_bar(samples_dict, colors_dict, output_file = No for sample in samples: vector = [] for oligo in oligos: - if samples_dict[sample].has_key(oligo): + if oligo in samples_dict[sample]: vector.append(samples_dict[sample][oligo]) else: vector.append(0) diff --git a/Oligotyping/visualization/oligotype_sets_distribution.py b/Oligotyping/visualization/oligotype_sets_distribution.py index eaf66a5..0bab40b 100755 --- a/Oligotyping/visualization/oligotype_sets_distribution.py +++ b/Oligotyping/visualization/oligotype_sets_distribution.py @@ -11,7 +11,7 @@ import numpy as np import matplotlib.pyplot as plt -import cPickle +import pickle from Oligotyping.utils.random_colors import get_list_of_colors from Oligotyping.utils.utils import HTMLColorToRGB @@ -43,7 +43,7 @@ def vis_oligotype_sets_distribution(partitions, vectors, samples, colors_dict = ind = np.arange(N) width = 0.75 - number_of_dimensions = len(vectors.values()[0]) + number_of_dimensions = len(list(vectors.values())[0]) for i in range(0, len(partitions)): group = partitions[i] @@ -61,7 +61,7 @@ def vis_oligotype_sets_distribution(partitions, vectors, samples, colors_dict = except: color = 'black' - plt.fill_between(range(0, len(vector)), maxs, mins, color=color, alpha = 0.1) + plt.fill_between(list(range(0, len(vector))), maxs, mins, color=color, alpha = 0.1) plt.plot(vector, color=color, linewidth = 1, alpha = 0.95, label = 'Set #%d' % i) if len(vector) < 50: plt.plot(vector, color=color, linewidth = 3, alpha = 0.7, label = '_nolegend_') @@ -135,7 +135,7 @@ def vis_oligotype_sets_distribution(partitions, vectors, samples, colors_dict = else: colors_dict = None - partitions = cPickle.load(open(args.partitions_file)) + partitions = pickle.load(open(args.partitions_file)) oligos, vectors = get_vectors_from_oligotypes_across_samples_matrix(args.oligotypes_across_samples) diff --git a/bin/decompose b/bin/decompose index 614373d..c5424db 100755 --- a/bin/decompose +++ b/bin/decompose @@ -17,12 +17,12 @@ try: import Oligotyping except ImportError: import inspect - print ''' + print(''' Oligotyping package seems to be missing from your PYTHONPATH. Running this may help: export PYTHONPATH="$PYTHONPATH:%s" - ''' % (os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0]))) + ''' % (os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0])))) sys.exit() from Oligotyping.lib.decomposer import Decomposer @@ -32,7 +32,7 @@ from Oligotyping.utils import parsers if __name__ == '__main__': if '--version' in sys.argv: - print parsers.version + print(parsers.version) sys.exit() parser = parsers.decomposer() @@ -40,6 +40,6 @@ if __name__ == '__main__': try: decomposer.decompose() - except ConfigError, e: - print e + except ConfigError as e: + print(e) sys.exit(-1) diff --git a/bin/entropy-analysis b/bin/entropy-analysis index c732bb9..09a6b56 100755 --- a/bin/entropy-analysis +++ b/bin/entropy-analysis @@ -16,26 +16,26 @@ try: import Oligotyping except ImportError: import inspect - print ''' + print(''' Oligotyping package seems to be missing from your PYTHONPATH. Running this may help: export PYTHONPATH="$PYTHONPATH:%s" - ''' % (os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0]))) + ''' % (os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0])))) sys.exit() from Oligotyping.utils.utils import import_error try: import matplotlib -except ImportError, e: +except ImportError as e: import_error(e) sys.exit() try: import matplotlib.pyplot as plt except RuntimeError: - print ''' + print(''' matplotlib is failing to connect to any X server for its GTK display. Please add the following directive into the 'matplotlibrc' file (which should be under '~/.matplotlib/' directory, if there is no such file, you should @@ -43,7 +43,7 @@ except RuntimeError: backend: Agg - ''' + ''') sys.exit() try: @@ -52,13 +52,13 @@ try: from Oligotyping.lib.entropy import EntropyError from Oligotyping.utils.utils import process_command_line_args_for_quality_files from Oligotyping.visualization.entropy_distribution_bar import entropy_distribution_bar -except ImportError, e: +except ImportError as e: import_error(e) sys.exit() if __name__ == '__main__': if '--version' in sys.argv: - print parsers.version + print(parsers.version) sys.exit() parser = parsers.entropy() @@ -78,8 +78,8 @@ if __name__ == '__main__': weighted = args.weighted, qual_stats_dict = qual_stats_dict, amino_acid_sequences = args.amino_acid_sequences) - except EntropyError, e: - print "Something went wrong. Here is what we know:\n\n\t%s\n\n" % e + except EntropyError as e: + print("Something went wrong. Here is what we know:\n\n\t%s\n\n" % e) sys.exit(-1) entropy_distribution_bar(args.alignment, diff --git a/bin/o-create-GG-alignment-template-from-taxon b/bin/o-create-GG-alignment-template-from-taxon index 67808b9..b8e1516 100755 --- a/bin/o-create-GG-alignment-template-from-taxon +++ b/bin/o-create-GG-alignment-template-from-taxon @@ -31,7 +31,7 @@ def get_ids (taxon, otu_id_to_greengenes): def gen_tmpl(taxon, otu_id_to_greengenes, greengenes_alignment, output_file_path = None): ids = list(set(get_ids(taxon, otu_id_to_greengenes))) - print '%d ids found for %s.' % (len(ids), taxon) + print('%d ids found for %s.' % (len(ids), taxon)) o_path = '%s.tmpl' % taxon if output_file_path: @@ -39,7 +39,7 @@ def gen_tmpl(taxon, otu_id_to_greengenes, greengenes_alignment, output_file_path template = u.FastaOutput(o_path) fasta = u.SequenceSource(greengenes_alignment) - while fasta.next(): + while next(fasta): if fasta.id in ids: template.store(fasta, split = False) ids.remove(fasta.id) diff --git a/bin/o-fasta-length-distribution b/bin/o-fasta-length-distribution index 358b08e..76456c6 100755 --- a/bin/o-fasta-length-distribution +++ b/bin/o-fasta-length-distribution @@ -27,7 +27,7 @@ def length_distribution(fasta, output = None, title = None): fasta.reset() - while fasta.next(): + while next(fasta): if fasta.pos % 1000 == 0 or fasta.pos == 1: sys.stderr.write('\r[fastalib] Reading: %s' % (fasta.pos)) sys.stderr.flush() @@ -72,14 +72,14 @@ def length_distribution(fasta, output = None, title = None): plt.subplots_adjust(left=0.05, bottom = 0.01, top = 0.95, right = 0.98) plt.plot(seq_len_distribution, color = 'black', alpha = 0.3) - plt.fill_between(range(0, max_seq_len + 1), seq_len_distribution, y2 = 0, color = 'black', alpha = 0.30) + plt.fill_between(list(range(0, max_seq_len + 1)), seq_len_distribution, y2 = 0, color = 'black', alpha = 0.30) plt.ylabel('number of sequences') xtickstep = (max_seq_len / 50) or 1 ytickstep = max(seq_len_distribution) / 20 or 1 - plt.xticks(range(xtickstep, max_seq_len + 1, xtickstep), rotation=90, size='xx-small') - plt.yticks(range(0, max(seq_len_distribution) + 1, ytickstep), + plt.xticks(list(range(xtickstep, max_seq_len + 1, xtickstep)), rotation=90, size='xx-small') + plt.yticks(list(range(0, max(seq_len_distribution) + 1, ytickstep)), [y for y in range(0, max(seq_len_distribution) + 1, ytickstep)], size='xx-small') plt.xlim(xmin = 0, xmax = max_seq_len) @@ -96,7 +96,7 @@ def length_distribution(fasta, output = None, title = None): length_abundance = {} for l in sequence_lengths: - if length_abundance.has_key(l): + if l in length_abundance: length_abundance[l] += 1 else: length_abundance[l] = 1 @@ -104,15 +104,15 @@ def length_distribution(fasta, output = None, title = None): percentages = [] total_percentage = 0 for i in range(0, max_seq_len): - if length_abundance.has_key(i): + if i in length_abundance: total_percentage += length_abundance[i] * 100.0 / total_seqs percentages.append(total_percentage) else: percentages.append(total_percentage) xtickstep = (max_seq_len / 50) or 1 - plt.xticks(range(xtickstep, max_seq_len + 1, xtickstep), rotation=90, size='xx-small') - plt.yticks(range(0, 101, 5), + plt.xticks(list(range(xtickstep, max_seq_len + 1, xtickstep)), rotation=90, size='xx-small') + plt.yticks(list(range(0, 101, 5)), ['%d%%' % y for y in range(0, 101, 5)], size='xx-small') plt.ylabel('percent of reads') @@ -120,7 +120,7 @@ def length_distribution(fasta, output = None, title = None): plt.xlim(xmin = 0, xmax = max_seq_len) plt.ylim(ymin = 0, ymax = 100) plt.plot(percentages) - plt.fill_between(range(0, max_seq_len + 1), percentages + [100], y2 = 0, color = 'blue', alpha = 0.30) + plt.fill_between(list(range(0, max_seq_len + 1)), percentages + [100], y2 = 0, color = 'blue', alpha = 0.30) ############################################################################################################# diff --git a/bin/o-gen-dicts-for-qual-stats b/bin/o-gen-dicts-for-qual-stats index 4745fef..1e9d431 100755 --- a/bin/o-gen-dicts-for-qual-stats +++ b/bin/o-gen-dicts-for-qual-stats @@ -16,7 +16,7 @@ # import sys -import cPickle +import pickle from Oligotyping.utils.utils import get_quals_dict @@ -26,7 +26,7 @@ quals_file = sys.argv[2] quals_dict = get_quals_dict(quals_file, alignment_file) -cPickle.dump(quals_dict, open(alignment_file + '-QUALS-DICT', 'w')) +pickle.dump(quals_dict, open(alignment_file + '-QUALS-DICT', 'w')) -print 'output file:' -print ' - "%s"' % (alignment_file + '-QUALS-DICT') +print('output file:') +print(' - "%s"' % (alignment_file + '-QUALS-DICT')) diff --git a/bin/o-gen-stackbar-with-sets-from-ENVIRONMENT b/bin/o-gen-stackbar-with-sets-from-ENVIRONMENT index 1985986..26fce11 100755 --- a/bin/o-gen-stackbar-with-sets-from-ENVIRONMENT +++ b/bin/o-gen-stackbar-with-sets-from-ENVIRONMENT @@ -35,15 +35,15 @@ environ_output_file_name = input_file_path + '-cos-%s-SETS-ENVIRON' % cosine_sim samples_dict = get_samples_dict_from_environment_file(input_file_path) oligos = get_oligos_sorted_by_abundance(samples_dict) unit_counts, unit_percents = get_unit_counts_and_percents(oligos, samples_dict) -samples = samples_dict.keys() +samples = list(samples_dict.keys()) -across_samples_sum_normalized, across_samples_max_normalized = get_units_across_samples_dicts(oligos, samples_dict.keys(), unit_percents) +across_samples_sum_normalized, across_samples_max_normalized = get_units_across_samples_dicts(oligos, list(samples_dict.keys()), unit_percents) oligotype_sets = get_oligotype_sets_greedy(oligos, across_samples_sum_normalized, cosine_similarity_value, sets_output_file_name) -print '%d sets from %d units' % (len(oligotype_sets), len(oligos)) +print('%d sets from %d units' % (len(oligotype_sets), len(oligos))) samples_dict_with_agglomerated_oligos = {} diff --git a/bin/o-generate-consensus-from-alignment b/bin/o-generate-consensus-from-alignment index 3408a93..6624f5f 100755 --- a/bin/o-generate-consensus-from-alignment +++ b/bin/o-generate-consensus-from-alignment @@ -19,7 +19,7 @@ import Oligotyping.lib.fastalib as u consensus_sequence = '' fasta = u.SequenceSource(sys.argv[1]) -fasta.next() +next(fasta) alignment_length = len(fasta.seq) consensus_dict = {} for i in range(0, alignment_length): @@ -27,12 +27,12 @@ for i in range(0, alignment_length): fasta.reset() -while fasta.next(): +while next(fasta): for pos in range(0, alignment_length): consensus_dict[pos][fasta.seq[pos]] += 1 for pos in range(0, alignment_length): - consensus_sequence += sorted(consensus_dict[pos].iteritems(), key=operator.itemgetter(1), reverse=True)[0][0] + consensus_sequence += sorted(iter(consensus_dict[pos].items()), key=operator.itemgetter(1), reverse=True)[0][0] -print '>' + os.path.basename(sys.argv[1]) -print consensus_sequence +print('>' + os.path.basename(sys.argv[1])) +print(consensus_sequence) diff --git a/bin/o-generate-exclusive-figures b/bin/o-generate-exclusive-figures index ec51831..9b0c9ed 100755 --- a/bin/o-generate-exclusive-figures +++ b/bin/o-generate-exclusive-figures @@ -12,13 +12,13 @@ import os import sys -import cPickle +import pickle from Oligotyping.lib.decomposer import Decomposer from Oligotyping.utils.utils import get_samples_dict_from_environment_file -runinfo = cPickle.load(open(sys.argv[1])) +runinfo = pickle.load(open(sys.argv[1])) sample_mapping = sys.argv[2] decomposer = Decomposer() diff --git a/bin/o-generate-html-output b/bin/o-generate-html-output index fd5acf5..0150c67 100755 --- a/bin/o-generate-html-output +++ b/bin/o-generate-html-output @@ -12,7 +12,7 @@ import os import sys -import cPickle +import pickle import argparse parser = argparse.ArgumentParser(description='Generate Static HTML Output from MED or Oligotyping runs') @@ -27,14 +27,14 @@ parser.add_argument('--entropy-figure', default = None, metavar = 'ENTROPY_FIGUR args = parser.parse_args() if args.type not in ['oligotyping', 'med']: - print "Run type must be either 'oligotyping' or 'med'" + print("Run type must be either 'oligotyping' or 'med'") sys.exit() if not os.path.exists(args.run_info_dict_path): - print "Runinfo file is not where you said it would be: '%s'" % args.run_info_dict_path + print("Runinfo file is not where you said it would be: '%s'" % args.run_info_dict_path) sys.exit() -run_info_dict = cPickle.load(open(args.run_info_dict_path)) +run_info_dict = pickle.load(open(args.run_info_dict_path)) if args.type == 'oligotyping': from Oligotyping.utils.html.for_oligotyping import generate_html_output @@ -44,4 +44,4 @@ else: index_page = generate_html_output(run_info_dict, args.output_directory) -print '\n\tHTML output is ready: "%s"\n' % index_page +print('\n\tHTML output is ready: "%s"\n' % index_page) diff --git a/bin/o-generate-matching-qual-file b/bin/o-generate-matching-qual-file index a8374f8..ea4b5f4 100755 --- a/bin/o-generate-matching-qual-file +++ b/bin/o-generate-matching-qual-file @@ -18,8 +18,8 @@ import Oligotyping.lib.fastalib as u alignment = u.SequenceSource(sys.argv[1]) quals = u.SequenceSource(sys.argv[2]) -alignment.next() -quals.next() +next(alignment) +next(quals) qual = [int(q) for q in quals.seq.split()] qual_aligned = [] @@ -28,5 +28,5 @@ for i in range(0, len(alignment.seq)): qual_aligned.append(qual.pop(0)) else: qual_aligned.append(None) -print alignment.seq -print qual_aligned +print(alignment.seq) +print(qual_aligned) diff --git a/bin/o-generate-oligo-base-networks b/bin/o-generate-oligo-base-networks index fac0946..4612b1c 100755 --- a/bin/o-generate-oligo-base-networks +++ b/bin/o-generate-oligo-base-networks @@ -29,20 +29,20 @@ def oligotype_network_structure(environment_file_path, output_dir = None): m = [] for base in oligo: if base not in base_pos: - print 'Error: The environment file does not seem to be generated by an oligotyping analysis.' + print('Error: The environment file does not seem to be generated by an oligotyping analysis.') sys.exit(-1) m.append(base_pos[base]) - if samples_dict.has_key(sample): + if sample in samples_dict: samples_dict[sample][oligo] = (m, int(count)) else: samples_dict[sample] = {oligo: (m, int(count))} for sample in samples_dict: - total_reads = sum([x[1] for x in samples_dict[sample].values()]) + total_reads = sum([x[1] for x in list(samples_dict[sample].values())]) - N = len(samples_dict[sample].keys()[0]) + N = len(list(samples_dict[sample].keys())[0]) ind = np.arange(N) + 1 fig = plt.figure(figsize = (N + 2, 6)) @@ -63,7 +63,7 @@ def oligotype_network_structure(environment_file_path, output_dir = None): bases = {} for oligo in samples_dict[sample]: base = oligo[pos] - if bases.has_key(base): + if base in bases: bases[base] += samples_dict[sample][oligo][1] else: bases[base] = samples_dict[sample][oligo][1] @@ -84,7 +84,7 @@ def oligotype_network_structure(environment_file_path, output_dir = None): plt.yticks(np.arange(6), ('', 'G', 'C', 'T', 'A', '--'), size = 'x-large') plt.title(sample + " (total reads: %s)" % total_reads) - locs = range(0, N + 2) + locs = list(range(0, N + 2)) plt.xticks(locs, [''] + ["VL " + str(x) for x in range(0, len(locs))[1:-1]] + ['']) if output_dir: @@ -111,7 +111,7 @@ if __name__ == '__main__': try: os.makedirs(args.output_dir) except: - print "Error: Attempt to create the output directory ('%s') have failed" % args.output_dir + print("Error: Attempt to create the output directory ('%s') have failed" % args.output_dir) sys.exit(-1) sys.exit(oligotype_network_structure(args.environment_file, args.output_dir)) diff --git a/bin/o-get-reads-from-fasta b/bin/o-get-reads-from-fasta index d5bbebd..2afbe13 100755 --- a/bin/o-get-reads-from-fasta +++ b/bin/o-get-reads-from-fasta @@ -31,7 +31,7 @@ def main(input_fasta, ids_file_path, output_fasta, compare_up_to_the_first_space num_ids_found = 0 progress.new('Processing input FASTA') - while fasta.next() and len(read_ids): + while next(fasta) and len(read_ids): if fasta.pos % 1000 == 0: progress.update('%d processed; %d ids matched' % (fasta.pos, num_ids_found)) diff --git a/bin/o-get-sample-info-from-fasta b/bin/o-get-sample-info-from-fasta index 921e643..038f736 100755 --- a/bin/o-get-sample-info-from-fasta +++ b/bin/o-get-sample-info-from-fasta @@ -20,24 +20,24 @@ from Oligotyping.utils.utils import pretty_print as pp fasta = u.SequenceSource(sys.argv[1]) samples = {} -while fasta.next(): +while next(fasta): if fasta.pos % 1000 == 0: sys.stderr.write('\rreads processed so far: %d' % (fasta.pos)) sys.stderr.flush() sample_name = '_'.join(fasta.id.split('_')[:-1]) - if samples.has_key(sample_name): + if sample_name in samples: samples[sample_name] += 1 else: samples[sample_name] = 1 sys.stderr.write('\rSamples and read counts found in the FASTA file:\n') -for sample, read_count in sorted(samples.iteritems(), key=operator.itemgetter(1), reverse = True): - print '%-30s %s' % (sample, pp(read_count)) - -print -print -print 'Total number of samples: ', pp(len(samples)) -print 'Total number of reads: ', pp(fasta.pos) -print +for sample, read_count in sorted(iter(samples.items()), key=operator.itemgetter(1), reverse = True): + print('%-30s %s' % (sample, pp(read_count))) + +print() +print() +print('Total number of samples: ', pp(len(samples))) +print('Total number of reads: ', pp(fasta.pos)) +print() fasta.close() diff --git a/bin/o-keep-or-remove-samples-from-fasta b/bin/o-keep-or-remove-samples-from-fasta index 749c5c6..7892f8d 100755 --- a/bin/o-keep-or-remove-samples-from-fasta +++ b/bin/o-keep-or-remove-samples-from-fasta @@ -37,7 +37,7 @@ def main(fasta_file_path, samples_file_path, retain_samples = False, output_file fasta_file_path, ', '.join(samples_list[0:3]))) - while fasta.next(): + while next(fasta): if fasta.pos % 1000 == 0: sys.stderr.write('\rreads processed so far: %s' % (pp(fasta.pos))) sys.stderr.flush() diff --git a/bin/o-pad-with-gaps b/bin/o-pad-with-gaps index e815aea..96ae959 100755 --- a/bin/o-pad-with-gaps +++ b/bin/o-pad-with-gaps @@ -23,13 +23,13 @@ def main(input_fasta_path, output_fasta_path=None, reverse=False): output = u.FastaOutput(output_fasta_path) longest_read = 0 - while fasta.next(): + while next(fasta): if len(fasta.seq) > longest_read: longest_read = len(fasta.seq) fasta.reset() - while fasta.next(): + while next(fasta): if fasta.pos % 10000 == 0: sys.stderr.write('\rreads processed so far: %d' % (fasta.pos)) sys.stderr.flush() diff --git a/bin/o-populate-datasets-from-VAMPS-download b/bin/o-populate-datasets-from-VAMPS-download index 673f8aa..4408cef 100755 --- a/bin/o-populate-datasets-from-VAMPS-download +++ b/bin/o-populate-datasets-from-VAMPS-download @@ -18,7 +18,7 @@ def main(input_file, output_file, taxon = None): fasta = u.SequenceSource(input_file) VAMPS_output_type = None - fasta.next() + next(fasta) if len(fasta.id.split('|')) == 5: # >GR7EWKD02FPDXN|MBJ_GOS_Bv6v4|C65_5|1.7% from GASTtaxonomy|Count VAMPS_output_type = 1 @@ -32,7 +32,7 @@ def main(input_file, output_file, taxon = None): output = u.FastaOutput(output_file) - while fasta.next(): + while next(fasta): if taxon and fasta.id.find(taxon) == -1: continue diff --git a/bin/o-remove-gaps b/bin/o-remove-gaps index 9e5cb3d..65b2484 100755 --- a/bin/o-remove-gaps +++ b/bin/o-remove-gaps @@ -22,7 +22,7 @@ def main(input_fasta_path, output_fasta_path=None, reverse=False): fasta = u.SequenceSource(input_fasta_path) output = u.FastaOutput(output_fasta_path) - while fasta.next(): + while next(fasta): if fasta.pos % 1000 == 0: sys.stderr.write('\rreads processed so far: %d' % (fasta.pos)) sys.stderr.flush() diff --git a/bin/o-sequence-distances b/bin/o-sequence-distances index 64d9f80..bf8221e 100755 --- a/bin/o-sequence-distances +++ b/bin/o-sequence-distances @@ -41,11 +41,11 @@ def main(input_file, output_file, align = False): similarities = {} fasta = u.SequenceSource(input_file) - while fasta.next(): + while next(fasta): sequences[fasta.id] = fasta.seq similarities[fasta.id] = {} - keys = sequences.keys() + keys = list(sequences.keys()) progress.new('Processing sequences') for i in range(0, len(keys)): diff --git a/bin/o-smart-trim b/bin/o-smart-trim index b15cd0b..86f580a 100755 --- a/bin/o-smart-trim +++ b/bin/o-smart-trim @@ -25,9 +25,9 @@ def smart_trim(fasta_file_path, min_percent = 95.0, output_file_path = None, fro run.info('Input File', fasta_file_path) progress.new('Sanity check') - fasta.next() + next(fasta) alignment_length = len(fasta.seq) - while fasta.next(): + while next(fasta): if fasta.pos % 100 == 0: progress.update(fasta.pos) if len(fasta.seq) != alignment_length: @@ -44,7 +44,7 @@ def smart_trim(fasta_file_path, min_percent = 95.0, output_file_path = None, fro positions = dict([(i, 0) for i in range(0, alignment_length)]) progress.new('First pass') - while fasta.next(): + while next(fasta): if not from_start: fasta.seq = fasta.seq[::-1] @@ -96,7 +96,7 @@ def smart_trim(fasta_file_path, min_percent = 95.0, output_file_path = None, fro fasta.reset() progress.new('Storing trimmed reads') - while fasta.next(): + while next(fasta): if fasta.pos % 100 == 0: progress.update('%d' % fasta.pos) @@ -135,10 +135,10 @@ if __name__ == "__main__": args = parser.parse_args() if args.from_start and args.from_end: - print "Error: You have to use either --from-start flag, or --from-end flag for each run. Sorry!" + print("Error: You have to use either --from-start flag, or --from-end flag for each run. Sorry!") sys.exit() if (not args.from_start) and (not args.from_end): - print "Error: You must choose the appropriate flag to declare from where the trimming should start (--from-start or --from-end)." + print("Error: You must choose the appropriate flag to declare from where the trimming should start (--from-start or --from-end).") sys.exit() smart_trim(args.fasta_file, args.min_percent, args.output, from_start = args.from_start) \ No newline at end of file diff --git a/bin/o-subsample-fasta-file b/bin/o-subsample-fasta-file index a0ae842..a3edb33 100755 --- a/bin/o-subsample-fasta-file +++ b/bin/o-subsample-fasta-file @@ -22,14 +22,14 @@ def main(input_fasta, subsample_to, output_fasta): fasta_content = {} - while fasta.next(): + while next(fasta): if fasta.pos % 1000 == 0: sys.stderr.write('\r[Reading FASTA into memory] reads processed so far: %d' % (fasta.pos)) sys.stderr.flush() sample_name = get_sample_name_from_defline(fasta.id) - if not fasta_content.has_key(sample_name): + if sample_name not in fasta_content: fasta_content[sample_name] = [] fasta_content[sample_name].append((fasta.id, fasta.seq),) diff --git a/bin/o-subsample-matrix-file b/bin/o-subsample-matrix-file index b98f6ff..afc2e87 100755 --- a/bin/o-subsample-matrix-file +++ b/bin/o-subsample-matrix-file @@ -15,7 +15,7 @@ import sys def remove(matrix_file, cols_to_remove = None, rows_to_remove = None, output_file = None): if cols_to_remove == None and rows_to_remove == None: - print 'Error: both cols and rows to remove are empty. Exiting.' + print('Error: both cols and rows to remove are empty. Exiting.') sys.exit() matrix = open(matrix_file) @@ -25,8 +25,8 @@ def remove(matrix_file, cols_to_remove = None, rows_to_remove = None, output_fil for line in matrix.readlines(): rows.append(line.strip().split('\t')) - cols_to_keep = range(0, len(header)) - rows_to_keep = range(0, len(rows)) + cols_to_keep = list(range(0, len(header))) + rows_to_keep = list(range(0, len(rows))) if cols_to_remove: for i in range(0, len(header)): @@ -49,7 +49,7 @@ def remove(matrix_file, cols_to_remove = None, rows_to_remove = None, output_fil def keep(matrix_file, cols_to_keep = None, rows_to_keep = None, output_file = None): if cols_to_keep == None and rows_to_keep == None: - print 'Error: both cols and rows to keep are empty. Exiting.' + print('Error: both cols and rows to keep are empty. Exiting.') sys.exit() matrix = open(matrix_file) @@ -59,8 +59,8 @@ def keep(matrix_file, cols_to_keep = None, rows_to_keep = None, output_file = No for line in matrix.readlines(): rows.append(line.strip().split('\t')) - col_ids_to_keep = range(0, len(header)) - row_ids_to_keep = range(0, len(rows)) + col_ids_to_keep = list(range(0, len(header))) + row_ids_to_keep = list(range(0, len(rows))) if cols_to_keep: for i in range(0, len(header)): @@ -120,7 +120,7 @@ if __name__ == '__main__': if args.cols_to_keep or args.rows_to_keep: mode = 'keep' if (args.cols_to_keep or args.rows_to_keep) and (args.cols_to_remove or args.rows_to_remove): - print "Sorry, you can't mix -c and -r with -C and -R parameters..." + print("Sorry, you can't mix -c and -r with -C and -R parameters...") sys.exit() if mode == 'remove': diff --git a/bin/o-treat-homopolymer-regions b/bin/o-treat-homopolymer-regions index dc0e871..a721012 100755 --- a/bin/o-treat-homopolymer-regions +++ b/bin/o-treat-homopolymer-regions @@ -165,18 +165,18 @@ if __name__ == '__main__': if os.path.exists(args.output_fasta): sys.stderr.write('Output file ("%s") exists. Overwrite? [Y|n] ' % args.output_fasta) - response = raw_input() + response = input() if response == '' or response.lower() == 'y': output_fasta = open(args.output_fasta, 'w') else: - print 'Exiting.' + print('Exiting.') sys.exit(1) else: output_fasta = open(args.output_fasta, 'w') if args.log: if os.path.exists(args.log): - print 'Log file ("%s") exists. Exting.' % args.log + print('Log file ("%s") exists. Exting.' % args.log) sys.exit(1) else: log = open(args.log, 'w') @@ -184,11 +184,11 @@ if __name__ == '__main__': log = sys.stdout - while input_alignment.next(): + while next(input_alignment): target_id = input_alignment.id target_seq = input_alignment.seq - input_alignment.next() + next(input_alignment) query_id = input_alignment.id query_seq = input_alignment.seq diff --git a/bin/o-trim b/bin/o-trim index 1589b21..a306eba 100755 --- a/bin/o-trim +++ b/bin/o-trim @@ -16,14 +16,14 @@ import Oligotyping.lib.fastalib as u from Oligotyping.utils.utils import pretty_print as pp -def main(input_fasta, trim_from = 0, trim_to = sys.maxint, min_length = 0, output = None): +def main(input_fasta, trim_from = 0, trim_to = sys.maxsize, min_length = 0, output = None): if not output: output = input_fasta + '.TRIMMED.fa' fasta = u.SequenceSource(input_fasta) output = u.FastaOutput(output) - while fasta.next(): + while next(fasta): if fasta.pos % 1000 == 0: sys.stderr.write('\rreads processed so far: %s' % (pp(fasta.pos))) sys.stderr.flush() @@ -47,7 +47,7 @@ if __name__ == '__main__': help = 'FASTA file to subsample') parser.add_argument('--trim-from', metavar = 'INTEGER', type = int, default = 0, help = 'Start position') - parser.add_argument('--trim-to', metavar = 'INTEGER', type = int, default = sys.maxint, + parser.add_argument('--trim-to', metavar = 'INTEGER', type = int, default = sys.maxsize, help = 'End position') parser.add_argument('--min-length', metavar = 'INTEGER', type = int, default = 0, help = 'Minimum lenght of a read to be kept') diff --git a/bin/o-trim-uninformative-columns-from-alignment b/bin/o-trim-uninformative-columns-from-alignment index 9a189bb..4c11b55 100755 --- a/bin/o-trim-uninformative-columns-from-alignment +++ b/bin/o-trim-uninformative-columns-from-alignment @@ -20,14 +20,14 @@ run = utils.Run() # get read length fasta = u.SequenceSource(sys.argv[1], lazy_init = False) -fasta.next() +next(fasta) len_fasta_entry = len(fasta.seq) # reset fasta. fasta.reset() # make sure all reads have equal length -while fasta.next(): +while next(fasta): if len(fasta.seq) != len_fasta_entry: sys.stderr.write('All reads must have equal number of characters, but it is not the case. Sorry and bye.\n') sys.exit() @@ -39,7 +39,7 @@ nucleotide_positions = set(range(0, len_fasta_entry)) invalid_columns = set(range(0, len_fasta_entry)) progress.new('Step 1') -while fasta.next(): +while next(fasta): if fasta.pos % 100 == 1: progress.update('%.2d%% -- pos: %d' % (fasta.pos * 100 / fasta.total_seq, fasta.pos)) @@ -57,7 +57,7 @@ columns_to_keep = [x for x in range(0, len_fasta_entry) if x not in invalid_colu f = open(sys.argv[1] + '-TRIMMED', 'w') progress.new('Step 2') -while fasta.next(): +while next(fasta): if fasta.pos % 100 == 1: progress.update('%.2d%% -- pos: %d' % (fasta.pos * 100 / fasta.total_seq, fasta.pos)) diff --git a/bin/o-visualize-qual-scores-along-columns.py b/bin/o-visualize-qual-scores-along-columns.py index d354f82..aa4b6eb 100755 --- a/bin/o-visualize-qual-scores-along-columns.py +++ b/bin/o-visualize-qual-scores-along-columns.py @@ -17,7 +17,7 @@ # import sys -import cPickle +import pickle from scipy import log2 as log import matplotlib.pyplot as plt @@ -33,12 +33,12 @@ 'N': 'white'} alignment = u.SequenceSource(sys.argv[1]) -quals_dict = cPickle.load(open(sys.argv[2])) +quals_dict = pickle.load(open(sys.argv[2])) quals_dict_filtered = {} ids_in_alignment_file = [] -while alignment.next(): +while next(alignment): ids_in_alignment_file.append(alignment.id) ids_in_alignment_file = set(ids_in_alignment_file) @@ -53,7 +53,7 @@ colors = [colors[0] for _ in range(0, 20)] + colors max_count = max([qual_stats_dict[q]['count'] for q in qual_stats_dict if qual_stats_dict[q]]) -alignment_length = len(quals_dict.values()[0]) +alignment_length = len(list(quals_dict.values())[0]) fig = plt.figure(figsize = (25, 8)) plt.rc('grid', color='0.50', linestyle='-', linewidth=0.1) @@ -62,7 +62,7 @@ plt.subplots_adjust(left=0.02, bottom = 0.09, top = 0.95, right = 0.98) for position in range(0, alignment_length): - print position + print(position) if not qual_stats_dict[position]: continue diff --git a/bin/oligotype b/bin/oligotype index 0678f51..e5f894a 100755 --- a/bin/oligotype +++ b/bin/oligotype @@ -17,12 +17,12 @@ try: import Oligotyping except ImportError: import inspect - print ''' + print(''' Oligotyping package seems to be missing from your PYTHONPATH. Running this may help: export PYTHONPATH="$PYTHONPATH:%s" - ''' % (os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0]))) + ''' % (os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0])))) sys.exit() @@ -32,14 +32,14 @@ try: from Oligotyping.lib.oligotyping import Oligotyping from Oligotyping.utils.utils import ConfigError from Oligotyping.utils import parsers -except ImportError, e: +except ImportError as e: import_error(e) sys.exit() if __name__ == '__main__': if '--version' in sys.argv: - print parsers.version + print(parsers.version) sys.exit() parser = parsers.oligotyping() @@ -49,6 +49,6 @@ if __name__ == '__main__': try: oligotyping.run_all() - except ConfigError, e: - print e + except ConfigError as e: + print(e) sys.exit(-1) diff --git a/setup.py b/setup.py index 98f1ae9..af1cda4 100755 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ name = "oligotyping", version = open('VERSION').read().strip(), description = "The oligotyping and minimum entropy decomposition (MED) pipeline for the analysis of marker gene amplicons", - author = u"A. Murat Eren", + author = "A. Murat Eren", author_email = "meren@mbl.edu", license = "GPLv3+", url = "http://oligotyping.org",